lucene得到聚类的数量

singnojava 2014-04-22

1.先定义一个baseCollertor

public abstract class BaseCollector extends TopDocsCollector<BaseScoreDoc> {

	BaseScoreDoc pqTop;
	int docBase = 0;
	Scorer scorer;
	private Comparable cpb = Comparable.DFAULT_COMPARABLE;

	protected BaseCollector(int numHits,Comparable cpb) {
		super(new HitQueue(numHits, true,cpb));
		if(cpb != null){
			this.cpb = cpb;
		}
		pqTop = pq.top();		
	}

	protected BaseCollector(int numHits) {
		super(new HitQueue(numHits, true));
		pqTop = pq.top();
	}
	
	/**
	 * 关键代码,别乱改
	 */
	public void collect(int doc) throws IOException {
		// This collector cannot handle these scores:
		float score = scorer.score() ;
		assert score != Float.NEGATIVE_INFINITY;
		assert !Float.isNaN(score);
		BaseScoreDoc csb = new BaseScoreDoc(doc,score);
		csb.doc = doc;
		csb.score = score;
		process(csb);
		if(csb.f < 0){
			return ;
		}
		
		totalHits++;
		if(cpb.lessThan(csb, pqTop)){
			return;
		}

		pqTop.f = csb.f;
		pqTop.sortValue = csb.sortValue;
		pqTop.doc = doc + docBase;
		pqTop.score = score;
		pqTop = pq.updateTop(); 
	}

	public abstract void process(BaseScoreDoc csb);

	@Override
	protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
		if (results == null) {
			return EMPTY_TOPDOCS;
		}
		float maxScore = Float.NaN;
		if (start == 0) {
			maxScore = results[0].score;
		} else {
			for (int i = pq.size(); i > 1; i--) {
				pq.pop();
			}
			maxScore = pq.pop().score;
		}

		return new TopDocs(totalHits, results, maxScore);
	}

	@Override
	public void setNextReader(IndexReader reader, int base) {
		docBase = base;
	}

	@Override
	public void setScorer(Scorer scorer) throws IOException {
		this.scorer = scorer;
	}

	@Override
	public boolean acceptsDocsOutOfOrder() {
		return false;
	}

2.写自己的collertor,有两个分类,一个是单位名称分类,一个是地区分类

public class AnimalManagementCollector extends BaseCollector {
    private Map<String, Integer> unitMap = new HashMap<String, Integer>();//单位名称
    private Map<String, Integer> zoneMap = new HashMap<String, Integer>();//地区

    public AnimalManagementCollector(int numHits) {
        super(numHits, Comparable.DESC_COMPARABLE);
    }

    @Override
    public void process(BaseScoreDoc csb) {
        int doc = csb.doc;
        String unit_cache = InstrumentFields.UNIT_CACHE[doc];
        String zone_cache = InstrumentFields.ZONE_CACHE[doc];

        if (!(StringUtil.isEmpty(unit_cache))) {
            if (unitMap.containsKey(unit_cache)) {
                unitMap.put(unit_cache, unitMap.get(unit_cache) + 1);
            } else {
                unitMap.put(unit_cache, 1);
            }
        }

        if (!(StringUtil.isEmpty(zone_cache))) {
            if (zoneMap.containsKey(zone_cache)) {
                zoneMap.put(zone_cache, zoneMap.get(zone_cache) + 1);
            } else {
                zoneMap.put(zone_cache, 1);
            }
        }
    }

    public Map<String, Integer> getUnitMap() {
        return unitMap;
    }

    public void setUnitMap(Map<String, Integer> unitMap) {
        this.unitMap = unitMap;
    }

    public Map<String, Integer> getZoneMap() {
        return zoneMap;
    }

    public void setZoneMap(Map<String, Integer> zoneMap) {
        this.zoneMap = zoneMap;
    }

3.定制field

public class AnimalManagementFields {
    public static String[] UNIT_CACHE;      //单位名称
    public static String[] ZONE_CACHE;      //地区

    public synchronized void init(IndexReader ir) {
        readCache(ir);
    }

    public static void readCache(IndexReader ir) {
        int maxDoc = ir.maxDoc();
        final String[] tempUnit = new String[maxDoc + 1];
        final String[] tempZone = new String[maxDoc + 1];

        FieldExtractor.extract(ir, "unit1", new FieldExtractor.FieldWalker() {
            @Override
            public void stroll(int doc, String value) {
                try {
                    tempUnit[doc] = value;
                } catch (Exception e) {
                }
            }
        });

        FieldExtractor.extract(ir, "zone", new FieldExtractor.FieldWalker() {
            @Override
            public void stroll(int doc, String value) {
                try {
                    tempZone[doc] = value;
                } catch (Exception e) {
                }
            }
        });

        UNIT_CACHE = tempUnit;
        ZONE_CACHE = tempZone;
    }
}

4.在web.xml设置初始化

<servlet>
        <servlet-name>Init</servlet-name>
        <servlet-class>com.dayainfo.action.InitServlet</servlet-class>
        <load-on-startup>1</load-on-startup>
    </servlet>

5.在InitServlet中初始化

public class InitServlet extends HttpServlet {

    private static final long serialVersionUID = 1L;
    private Logger logger = Logger.getLogger(InitServlet.class);

    public void init(ServletConfig config) throws ServletException {

        try {
            long beginTime1 = System.currentTimeMillis();
            AnimalManagementFields animalManagementFields = new AnimalManagementFields();
            animalManagementFields.init(SQLCreatReader.getReader(SystemConstant.ANIMAL_MANAGEMENT_LICENCE_INDEX_KEY));
            long endTime1 = System.currentTimeMillis();
            logger.info("初始化_动物管理许可证_聚类信息耗时:" + StringUtil.millsecondChange(endTime1 - beginTime1, 1) + "秒");

        } catch (IOException e) {
            e.printStackTrace();
        }
    }

6.在搜索中使用

public class AnimalManagementSearchService {
    private AnimalManagementReturnParam animalManagementReturnParam = new AnimalManagementReturnParam();
    private int totalData;
    private ScoreDoc[] scoreDocs;

    public void handleInstrumentSearch(AnimalManagementReceiveParam animalManagementReceiveParam) throws IOException {
        long beginTime = System.currentTimeMillis();
        int numHits = animalManagementReceiveParam.getPageSize() * (animalManagementReceiveParam.getCurrentPage());
        AnimalManagementCollector animalManagementCollector = new AnimalManagementCollector(numHits);
        List<AnimalManagementLicenceBean> instrumentBeanListWithPage = luceneSearch(animalManagementCollector, animalManagementReceiveParam);
        animalManagementReturnParam.setAnimalManagementLicenceList(instrumentBeanListWithPage);

        animalManagementReturnParam.setUnitMap(animalManagementCollector.getUnitMap()); //单位名称
        animalManagementReturnParam.setZoneMap(animalManagementCollector.getZoneMap()); //地区

        long endtime = System.currentTimeMillis();
        animalManagementReturnParam.setTotalTime(StringUtil.millsecondChange(endtime - beginTime, 1));
    }

    //在索引中搜索数据
    public List<AnimalManagementLicenceBean> luceneSearch(AnimalManagementCollector animalManagementCollector, AnimalManagementReceiveParam animalManagementReceiveParam) throws IOException {

        QueryTerm term = new FuzzyQueryTerm();
        DXSearcher dxSearcher = new DXSearcher(SQLCreatReader.getReader(SystemConstant.ANIMAL_MANAGEMENT_LICENCE_INDEX_KEY));

        if ("1".equals(animalManagementReceiveParam.getFlag())) {       //分类检索
            term.addTerm("flag", "1", false);
        } else {
            if ((!StringUtil.isEmpty(animalManagementReceiveParam.getUnit()))) {
                term.addTerm("unit1", animalManagementReceiveParam.getUnit(), false);
            }
            if ((!StringUtil.isEmpty(animalManagementReceiveParam.getZone()))) {
                term.addTerm("zone", animalManagementReceiveParam.getZone(), false);
            }

            if (!StringUtil.isEmpty(animalManagementReceiveParam.getField())) {
                if ("1".equals(animalManagementReceiveParam.getField())) {          //全部字段
                    QueryTerm term1 = new FuzzyQueryTerm();
                    term1.addTerm("lic_number", animalManagementReceiveParam.getSw(), 2);
                    term1.addTerm("unit", animalManagementReceiveParam.getSw(), 2);
                    term.addTerm(term1, 1);
                } else if ("2".equals(animalManagementReceiveParam.getField())) {    //许可证编号
                    term.addTerm("lic_number", animalManagementReceiveParam.getSw());
                } else if ("3".equals(animalManagementReceiveParam.getField())) {    //单位名称
                    term.addTerm("unit", animalManagementReceiveParam.getSw());
                }
            }
        }
        dxSearcher.search(term, animalManagementCollector);
        if (term.getQuery() != null) {
            System.out.println("搜索字段:" + term.getQuery().toString());
        }
        int begin = animalManagementReceiveParam.getPageSize() * (animalManagementReceiveParam.getCurrentPage() - 1);
        int end = animalManagementReceiveParam.getPageSize();
        scoreDocs = animalManagementCollector.topDocs(begin, end).scoreDocs;
        totalData = animalManagementCollector.getTotalHits();
        animalManagementReturnParam.setTotalData(totalData);

        List<AnimalManagementLicenceBean> instrumentBeanList = new ArrayList<AnimalManagementLicenceBean>();
        FieldHighlighter fieldHighlighter = new FieldHighlighter(animalManagementReceiveParam.getSw());
        for (int i = 0; i < scoreDocs.length; i++) {
            ScoreDoc scoreDoc = scoreDocs[i];
            int docID = scoreDoc.doc;
            Document doc = dxSearcher.doc(docID);
            AnimalManagementLicenceBean animalManagementLicenceBean = new AnimalManagementLicenceBean();
            if (!StringUtil.isEmpty(doc.get("dxid"))) {
                animalManagementLicenceBean.setDxid(doc.get("dxid"));
            }
            if (!StringUtil.isEmpty(doc.get("title"))) {
                animalManagementLicenceBean.setTitle(fieldHighlighter.getTextFragment(doc.get("title"), false));
            }
            if (!StringUtil.isEmpty(doc.get("type"))) {
                animalManagementLicenceBean.setType(fieldHighlighter.getTextFragment(doc.get("type"), false));
            }
            if (!StringUtil.isEmpty(doc.get("lic_number"))) {
                animalManagementLicenceBean.setLic_number(fieldHighlighter.getTextFragment(doc.get("lic_number"), false));
            }
            if (!StringUtil.isEmpty(doc.get("unit"))) {
                animalManagementLicenceBean.setUnit(fieldHighlighter.getTextFragment(doc.get("unit"), false));
            }
            if (!StringUtil.isEmpty(doc.get("unit1"))) {
                animalManagementLicenceBean.setUnit1(fieldHighlighter.getTextFragment(doc.get("unit1"), false));
            }
            if (!StringUtil.isEmpty(doc.get("enable_range"))) {
                animalManagementLicenceBean.setEnable_range(fieldHighlighter.getTextFragment(doc.get("enable_range"), false));
            }
            if (!StringUtil.isEmpty(doc.get("zone"))) {
                animalManagementLicenceBean.setZone(fieldHighlighter.getTextFragment(doc.get("zone"), false));
            }
            if (!StringUtil.isEmpty(doc.get("url"))) {
                animalManagementLicenceBean.setUrl(fieldHighlighter.getTextFragment(doc.get("url"), false));
            }
            instrumentBeanList.add(animalManagementLicenceBean);
        }
        return instrumentBeanList;
    }

    public AnimalManagementReturnParam getAnimalManagementReturnParam() {
        return animalManagementReturnParam;
    }

    public void setAnimalManagementReturnParam(AnimalManagementReturnParam animalManagementReturnParam) {
        this.animalManagementReturnParam = animalManagementReturnParam;
    }
}

相关推荐

EffortsRun / 0评论 2014-04-22