`
dato0123
  • 浏览: 911132 次
文章分类
社区版块
存档分类
最新评论

Lucene学习笔记(二)

 
阅读更多
importjava.io.IOException;

importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.SimpleAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;

importjunit.framework.TestCase;


publicclassBaseIndexTestCaseextendsTestCase
{
protectedString[]keywords={"1","2"};
protectedString[]unindexed={"Netherlands","Italy"};
protectedString[]unstored={"Amsterdamhaslotsofbridges","Venicehaslotsofcanals"};
protectedString[]text={"Amsterdam","Venice"};
protectedDirectorydir;

protectedvoidsetUp()throwsIOException{
StringindexDir
=
System.getProperty(
"java.io.tmpdir","tmp")+
System.getProperty(
"file.separator")+"index-dir";
dir
=FSDirectory.getDirectory(indexDir,true);
addDocuments(dir);
}


protectedvoidaddDocuments(Directorydir)
throwsIOException
{
IndexWriterwriter
=newIndexWriter(dir,getAnalyzer(),true);
writer.setUseCompoundFile(isCompound());
for(inti=0;i<keywords.length;i++)
{
Documentdoc
=newDocument();
doc.add(
newField("id",keywords[i],Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(
newField("country",unindexed[i],Field.Store.YES,Field.Index.NO));
doc.add(
newField("contents",unstored[i],Field.Store.NO,Field.Index.TOKENIZED));
doc.add(
newField("city",text[i],Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}

writer.optimize();
writer.close();
}


protectedAnalyzergetAnalyzer()
{
returnnewSimpleAnalyzer();
}

protectedbooleanisCompound()
{
returntrue;
}


publicvoidtestIndexWriter()throwsIOException
{
IndexWriterwriter
=newIndexWriter(dir,this.getAnalyzer(),false);
assertEquals(keywords.length,writer.docCount());
writer.close();
}


publicvoidtestIndexReader()throwsIOException
{
IndexReaderreader
=IndexReader.open(dir);
assertEquals(keywords.length,reader.maxDoc());
assertEquals(keywords.length,reader.numDocs());
reader.close();
}

}

importjava.io.IOException;

importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.WhitespaceAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.TermQuery;


publicclassDocumentDeleteTestextendsBaseIndexTestCase
{
publicvoidtestDeleteBeforeIndexMerge()throwsIOException
{
assertEquals(
1,getHitCount("city","Amsterdam"));

IndexReaderreader
=IndexReader.open(dir);
assertEquals(
2,reader.maxDoc());
assertEquals(
2,reader.numDocs());


reader.deleteDocument(
1);

assertTrue(reader.isDeleted(
1));
assertTrue(reader.hasDeletions());
assertEquals(
2,reader.maxDoc());
assertEquals(
1,reader.numDocs());

reader.close();

reader
=IndexReader.open(dir);

assertEquals(
2,reader.maxDoc());
assertEquals(
1,reader.numDocs());

reader.close();
}


publicvoidtestDeleteAfterIndexMerge()throwsIOException
{
IndexReaderreader
=IndexReader.open(dir);
assertEquals(
2,reader.maxDoc());
assertEquals(
2,reader.numDocs());
reader.deleteDocument(
1);
reader.close();

IndexWriterwriter
=newIndexWriter(dir,getAnalyzer(),false);
writer.optimize();
writer.close();

reader
=IndexReader.open(dir);

assertFalse(reader.isDeleted(
1));
assertFalse(reader.hasDeletions());
assertEquals(
1,reader.maxDoc());
assertEquals(
1,reader.numDocs());

reader.close();
}



privateintgetHitCount(StringfieldName,StringsearchString)
throwsIOException
{
IndexSearchersearcher
=newIndexSearcher(dir);
Termt
=newTerm(fieldName,searchString);
Queryquery
=newTermQuery(t);
Hitshits
=searcher.search(query);
inthitCount=hits.length();
searcher.close();
returnhitCount;
}



protectedAnalyzergetAnalyzer(){
returnnewWhitespaceAnalyzer();
}




}


importjava.io.IOException;

importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.WhitespaceAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexReader;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.index.Term;
importorg.apache.lucene.search.Hits;
importorg.apache.lucene.search.IndexSearcher;
importorg.apache.lucene.search.Query;
importorg.apache.lucene.search.TermQuery;


publicclassDocumentUpdateTestextendsBaseIndexTestCase
{

publicvoidtestUpdate()throwsIOException
{
assertEquals(
1,getHitCount("city","Amsterdam"));
IndexReaderreader
=IndexReader.open(dir);
reader.deleteDocuments(
newTerm("city","Amsterdam"));
reader.close();

IndexWriterwriter
=newIndexWriter(dir,getAnalyzer(),
false);
Documentdoc
=newDocument();
doc.add(
newField("id","1",Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(
newField("country","Russia",Field.Store.YES,Field.Index.NO));
doc.add(
newField("contents","St.Petersburghaslotsofbridges",Field.Store.NO,Field.Index.TOKENIZED));
doc.add(
newField("city","St.Petersburg",Field.Store.YES,Field.Index.TOKENIZED));

writer.addDocument(doc);
writer.optimize();
writer.close();

assertEquals(
0,getHitCount("city","Amsterdam"));
assertEquals(
1,getHitCount("city","Petersburg"));
}


protectedAnalyzergetAnalyzer(){
returnnewWhitespaceAnalyzer();
}


privateintgetHitCount(StringfieldName,StringsearchString)
throwsIOException
{
IndexSearchersearcher
=newIndexSearcher(dir);
Termt
=newTerm(fieldName,searchString);
Queryquery
=newTermQuery(t);
Hitshits
=searcher.search(query);
inthitCount=hits.length();
searcher.close();
returnhitCount;
}


}


importorg.apache.lucene.analysis.Analyzer;
importorg.apache.lucene.analysis.SimpleAnalyzer;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;


publicclassIndexTuningDemo
{
publicstaticvoidmain(String[]args)throwsException{
intdocsInIndex=Integer.parseInt(args[0]);

//createanindexcalled'index-dir'inatempdirectory
Directorydir=FSDirectory.getDirectory(
System.getProperty(
"java.io.tmpdir","tmp")+
System.getProperty(
"file.separator")+"index-dir",true);
Analyzeranalyzer
=newSimpleAnalyzer();
IndexWriterwriter
=newIndexWriter(dir,analyzer,true);

//setvariablesthataffectspeedofindexing
writer.setMergeFactor(Integer.parseInt(args[1]));
writer.setMaxMergeDocs(Integer.parseInt(args[
2]));
writer.setInfoStream(System.
out);
writer.setMaxBufferedDocs(Integer.parseInt(args[
3]));

System.
out.println("Mergefactor:"+writer.getMergeFactor());
System.
out.println("Maxmergedocs:"+writer.getMaxMergeDocs());
System.
out.println("Minmergedocs:"+writer.getMaxBufferedDocs());

longstart=System.currentTimeMillis();
for(inti=0;i<docsInIndex;i++){
Documentdoc
=newDocument();
doc.add(
newField("fieldname","Bibamus",Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}

writer.close();
longstop=System.currentTimeMillis();
System.
out.println("Time:"+(stop-start)+"ms");
}


}



<!--<br><br>Code highlighting produced by Actipro CodeHighlighter (freeware)<br>http://www.CodeHighlighter.com/<br><br>-->importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;
importorg.apache.lucene.store.RAMDirectory;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.analysis.SimpleAnalyzer;

importjunit.framework.TestCase;
importjava.io.IOException;
importjava.util.Collection;
importjava.util.ArrayList;
importjava.util.Iterator;

publicclassFSversusRAMDirectoryTestextendsTestCase
{
privateDirectoryfsDir;
privateDirectoryramDir;
privateCollectiondocs=loadDocuments(3000,5);//加载数据

protectedvoidsetUp()throwsException
{
StringfsIndexDir
=System.getProperty("java.io.tmpdir","tmp")+System.getProperty("file.separator")+"fs-index";
ramDir
=newRAMDirectory();//内存中目录
fsDir=FSDirectory.getDirectory(fsIndexDir,true);
}

publicvoidtestTiming()throwsIOException
{
longramTiming=timeIndexWriter(ramDir);
longfsTiming=timeIndexWriter(fsDir);

assertTrue(fsTiming
>ramTiming);


System.
out.println("RAMDirectoryTime:"+(ramTiming)+"ms");
System.
out.println("FSDirectoryTime:"+(fsTiming)+"ms");
}

privatelongtimeIndexWriter(Directorydir)throwsIOException
{
longstart=System.currentTimeMillis();
addDocuments(dir);
longstop=System.currentTimeMillis();
return(stop-start);
}

privatevoidaddDocuments(Directorydir)throwsIOException
{
IndexWriterwriter
=newIndexWriter(dir,newSimpleAnalyzer(),true);

/**
//changetoadjustperformanceofindexingwithFSDirectory
writer.mergeFactor=writer.mergeFactor;
writer.maxMergeDocs=writer.maxMergeDocs;
writer.minMergeDocs=writer.minMergeDocs;
*/

for(Iteratoriter=docs.iterator();iter.hasNext();)
{
Documentdoc
=newDocument();
Stringword
=(String)iter.next();
doc.add(
newField("keyword",word,Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(
newField("unindexed",word,Field.Store.YES,Field.Index.NO));
doc.add(
newField("unstored",word,Field.Store.NO,Field.Index.TOKENIZED));
doc.add(
newField("text",word,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}

privateCollectionloadDocuments(intnumDocs,intwordsPerDoc)
{
Collectiondocs
=newArrayList(numDocs);
for(inti=0;i<numDocs;i++)
{
StringBufferdoc
=newStringBuffer(wordsPerDoc);
for(intj=0;j<wordsPerDoc;j++)
{
doc.append(
"Bibamus");
}
docs.add(doc.toString());
}
returndocs;
}
}


分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics