搜索引擎之全文搜索算法功能实现（基于Lucene）-白红宇

搜索引擎之全文搜索算法功能实现（基于Lucene）

阅读量：4560 次

发布时间：2019-06-08

本文共 6228 字，大约阅读时间需要 20 分钟。

之前做的时候，我已经公开了非全文搜索的代码，需要的朋友希望能够前去阅读我的博客。本文主要讨论如何进行全文搜索，由于本人花了很长时间设计了新作：，观点对全文搜索的要求还是很高的，所以我又花了不少时间研究全文搜索，你可以先体验下：。废话也不多说了，直接上代码：

public Map
     
        articleSearchAlgorithms(SearchCondition condition,IndexSearcher searcher) throws ParseException, IOException{                     Map
      
        map =new HashMap
       
        ();             String[] filedsList=condition.getFiledsList();             String keyWord=condition.getKeyWord();             int currentPage=condition.getCurrentPage();             int pageSize=condition.getPageSize();             String sortField=condition.getSortField();             boolean isASC=condition.isDESC();             String sDate=condition.getsDate();            String eDate=condition.geteDate();            String classify=condition.getClassify();                                     //过滤终结字符            keyWord=escapeExprSpecialWord(keyWord);                        BooleanQuery q1 = new BooleanQuery();            BooleanQuery q2 = new BooleanQuery();             BooleanQuery booleanQuery = new BooleanQuery(); //boolean查询                          if(classify!=null&&(classify.equals("guanzhi")||classify.equals("opinion")||classify.equals("write"))){                 String typeId="1";//默认言论                 if(classify.equals("guanzhi")){                     typeId="2";                 }                 if(classify.equals("opinion")){                     typeId="3";                 }                 Query termQuery = new TermQuery(new Term("typeId",typeId));                  q1.add(termQuery,BooleanClause.Occur.MUST);             }             if(sDate!=null&&eDate!=null){
   //是否范围查询由这两个参数决定                Query rangeQuery = new TermRangeQuery("writingTime", new BytesRef(sDate), new BytesRef(eDate),true, true);                q1.add(rangeQuery,BooleanClause.Occur.MUST);             }            Sort sort = new Sort(); // 排序            sort.setSort(SortField.FIELD_SCORE);            if(sortField!=null){                sort.setSort(new SortField(sortField, SortField.Type.STRING, isASC));            }                        int start = (currentPage - 1) * pageSize;            int hm = start + pageSize;                        TopFieldCollector res = TopFieldCollector.create(sort,hm,false, false, false, false);            //完全匹配查询            Term t0=new Term(filedsList[1],keyWord);            TermQuery termQuery = new TermQuery(t0);//两种高度匹配的查询            q2.add(termQuery,BooleanClause.Occur.SHOULD);                        //前缀匹配            Term t1=new Term(filedsList[1],keyWord);            PrefixQuery prefixQuery=new PrefixQuery(t1);            q2.add(prefixQuery,BooleanClause.Occur.SHOULD);                        //短语，相似度匹配，适用于分词的内容            for(int i=0;i
        
         0){                booleanQuery.add(q1,BooleanClause.Occur.MUST);            }            if(q2!=null && q2.toString().length()>0){                 booleanQuery.add(q2,BooleanClause.Occur.MUST);            }                        searcher.search(booleanQuery, res);            long amount = res.getTotalHits();             TopDocs tds = res.topDocs(start, pageSize);            map.put("amount",amount);            map.put("tds",tds);            map.put("query",booleanQuery);            return map;    }

注意下：上面代码的搜索条件（SearchCondition ）是的具体需求，您可以按照您自己的搜索条件做改动，这里也很难适配所有读者。

public Map
     
       searchArticle(SearchCondition condition) throws Exception{                    Map
      
        map =new HashMap
       
        ();        List
        
          list=new ArrayList
         
          ();                 DirectoryReader reader=condition.getReader();         String URL=condition.getURL();         boolean isHighligth=condition.isHighlight();         String keyWord=condition.getKeyWord();         IndexSearcher searcher=getSearcher(reader,URL);                try{            Map
          
            output=articleSearchAlgorithms(condition,searcher); if(output==null){ map.put("amount",0L); map.put("source",null); return map; } map.put("amount", output.get("amount")); TopDocs tds = (TopDocs) output.get("tds"); ScoreDoc[] sd = tds.scoreDocs; Query query =(Query) output.get("query"); for (int i = 0; i < sd.length; i++) { Document doc = searcher.doc(sd[i].doc); String id = doc.get("id"); /**********************start*************************需要处理的放一块儿********************/ String temp=doc.get("title"); String title =temp; //默认不高亮 if(isHighligth){ //高亮文章标题 Highlighter highlighterTitle = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterTitle.setTextFragmenter(new SimpleFragmenter(40)); // 字长度 TokenStream ts = analyzer.tokenStream("title", new StringReader(temp)); title= highlighterTitle.getBestFragment(ts,temp); if(title==null){ title=temp.replace(keyWord,"
           "+keyWord+"");//高亮处理插件bug，加这句话避免 } } String temp1=HtmlEnDecode.htmlEncode(doc.get("content")); String content=temp1;//使用自己封装的方法来转义 if(isHighligth){ //做高亮处理,content Highlighter highlighterContent = new Highlighter(simpleHTMLFormatter, new QueryScorer(query)); highlighterContent.setTextFragmenter(new SimpleFragmenter(Constant.HIGHLIGHT_CONTENT_LENGTH)); // 字长度 //temp1=StringEscapeUtils.escapeHtml(temp1);//将汉字转义导致高亮失效 TokenStream ts1 = analyzer.tokenStream("content", new StringReader(temp1)); content = highlighterContent.getBestFragment(ts1,temp1); if(content==null){ content=temp1.replace(keyWord,"
           "+keyWord+"");//高亮处理插件bug，加这句话避免 //假设遇上这种情况做处理，其他的高亮器会自动截图 content=subContent(content);//截取处理 content=HtmlEnDecode.htmldecode(content);//html解码 content=SubStringHTML.sub(content,Constant.HIGHLIGHT_CONTENT_LENGTH); } } /*---------------------------------------不断变动的数据放一块儿----------------------------*/ Write write=writeDao.getArticle(Long.parseLong(id)); if(write!=null){ write.setTitle(title); write.setContent(content); Date writingTime=write.getWritingTime(); String timeGap=DateUtil.dateGap(writingTime);//timeGap write.setTimeGap(timeGap); list.add(write); } } }catch(Exception e){ e.printStackTrace(); } map.put("source",list); return map; }

注意上面，这是具体的搜索代码，不同的应用场景有不同的需求，请您按照自己的需求封装对象，查询数据库等，代码毫无保留，绝对可用。

如果有什么疑问可以加qq群：284205104 如果群满了就麻烦去趟找下最新的群加了即可，谢谢您的阅读。

转载于:https://www.cnblogs.com/huangxie/p/8059132.html

你可能感兴趣的文章