package com.bin.lucene.highlighter;
import java.io.File;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.highlight.Formatter;
import org.apache.lucene.search.highlight.Fragmenter;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.QueryScorer;
import org.apache.lucene.search.highlight.Scorer;
import org.apache.lucene.search.highlight.SimpleFragmenter;
import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
import org.junit.Test;
import com.bin.lucene.utils.File2DocUtils;
/**
* 测试高亮器
* @author Administrator
*
*/
public class HighlighterTest {
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
String filePath = "D:\\workspace\\myeclipse\\LuceneDemo\\luceneDatassource\\CHANGES.txt";
String indexPath = "D:\\workspace\\myeclipse\\LuceneDemo\\luceneIndex";
@Test
public void test1() throws Exception {
//先查询
String queryStr = "Lucene";
QueryParser parser = new MultiFieldQueryParser(Version.LUCENE_35, new String[]{"name", "content"}, analyzer);
Query query = parser.parse(queryStr);
Filter filter = null;
IndexReader indexReader = IndexReader.open(FSDirectory.open(new File(indexPath)));
IndexSearcher searcher = new IndexSearcher(indexReader);
TopDocs topDocs = searcher.search(query, filter, 10000);
System.out.println("总共有【" + topDocs.totalHits + "】条匹配结果");
List<Document> recordList = new ArrayList<Document>();
Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
Scorer fragmentScorer = new QueryScorer(query);
Highlighter highlighter = new Highlighter(formatter, fragmentScorer);
Fragmenter fragmenter = new SimpleFragmenter(50);
highlighter.setTextFragmenter(fragmenter);
//取出当前页的数据
int pageSize = 20;
int pageNo = 1;
int firstResult = (pageNo-1) * pageSize;
int end = Math.min(pageNo * pageSize, topDocs.totalHits);
System.out.println(firstResult + ":" + end);
for(int i=firstResult; i<end; i++) {
ScoreDoc scoreDoc = topDocs.scoreDocs[i];
int docSn = scoreDoc.doc; //文档内部编号
Document doc = searcher.doc(docSn); //根据文档编号取出文档
//高亮===================
String docContent = doc.get("content");
String hc = highlighter.getBestFragment(analyzer, "content", docContent);
if(hc == null) {
if(docContent.length()>=50) {
hc = docContent.substring(0, 50);
} else {
hc = docContent;
}
}
doc.getField("content").setValue(hc);
//高亮===================
recordList.add(doc);
}
//显示
for(Document doc : recordList) {
File2DocUtils.printDocumentInfo(doc);
}
}
}
分享到:
相关推荐
NULL 博文链接:https://sunlongan666.iteye.com/blog/580380
lucene-highlighter-3.5.0.jar lucene高亮包
lucene3.0-highlighter.jar lucene3.0的高亮jar包,从lucene3.0源码中导出来的
Lucene高亮显示的源码,这个源码太难找了,这里共享给大家,希望有所帮助!
用于编写Lucene高亮摘要的jar包,版本2.0.0,好用。谁用谁知道~~~~
lucene3.6.2及highlighter jar包 可以把关键字在搜索结果中高亮显示
lucene-highlighter-3.6.0-sources
Lucene.Net.dll Highlighter.Net.dll
Lucene搜索的内容进行高亮显示。
希望大家可以关注我的博客http://blog.csdn.net/wuyinggui10000/article/category/3173543
lucene需要用到的基本架包 核心包 高亮 中文分词,希望有所帮助。je-analysis1.5.3.jar lucene-highlighter-3.0.0.jar lucene-core-3.0.0.jar lucene-memory-3.0.0.jar
该一整套lucene开发jar包包含以下内容: lucene-core-3.0.1.jar(核心包) contrib\analyzers\common\...contrib\highlighter\lucene-highlighter-3.0.1.jar(高亮) contrib\memory\lucene-memory-3.0.1.jar(高亮)
NULL 博文链接:https://llying.iteye.com/blog/570208
Lucene.Net_2_9_1 含高亮等包完整版,之前在csdn下的没有高亮包,这个是完整版的。高亮Highlighter.Net编译后可以用。
高亮显示 .net 分词系统 全文检索 与lucene.net结合使用
实例是一个java实例,可直接导入到MyEclipse中...其中是lucene3.0整合了庖丁解牛分词法,添加了高亮显示。因为lucene3.0无法整合paoding-analysis.jar 所以我已经把paoding-analysis中的源码整合进来了避免无法整合问题
此包中包含 lucene-core-3.0.3.jar,lucene-highlighter-3.0.1.jar,lucene-memory-3.0.1.jar
20 lucene-highlighter-3.0.0.jar 搜索引擎高亮处理 21 mail.jar 邮件核心包 22 mysql-connector-java-5.1.10-bin.jar MYSQL驱动包 23 slf4j-api-1.5.10.jar 日志包,EhCache依赖 24 slf4j-jdk14-1.5.10.jar ...
版本比较低,可能是要和相应的lucene.net 版本一块使用!1.3.2.1的
该demo演示了Lucene的常用功能(智能分词、关键字高亮等) 在测试自定义添加数据后点击建立索引按钮才能生效(此功能由51aspx添加),原版只检索目录下的html文件,51Aspx改为了检索所有文件 1) Indexer文件夹:...