Lucene学习笔记(二)

dato0123

浏览: 911132 次

最近访客更多访客>>

u012363178

spirit5800

hksy

fengyunbo

博主相关

博客

微博

相册

留言

关于我

文章分类

全部博客 (1706)

社区版块

存档分类

importjava.io.IOException;

importorg.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.analysis.SimpleAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.index.IndexReader;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.store.Directory;

importorg.apache.lucene.store.FSDirectory;

importjunit.framework.TestCase;

publicclassBaseIndexTestCaseextendsTestCase

{

protectedString[]keywords={"1","2"};

protectedString[]unindexed={"Netherlands","Italy"};

protectedString[]unstored={"Amsterdamhaslotsofbridges","Venicehaslotsofcanals"};

protectedString[]text={"Amsterdam","Venice"};

protectedDirectorydir;

protectedvoidsetUp()throwsIOException{

StringindexDir=

System.getProperty("java.io.tmpdir","tmp")+

System.getProperty("file.separator")+"index-dir";

dir=FSDirectory.getDirectory(indexDir,true);

addDocuments(dir);

}

protectedvoidaddDocuments(Directorydir)

throwsIOException{

IndexWriterwriter=newIndexWriter(dir,getAnalyzer(),true);

writer.setUseCompoundFile(isCompound());

for(inti=0;i<keywords.length;i++)

{

Documentdoc=newDocument();

doc.add(newField("id",keywords[i],Field.Store.YES,Field.Index.UN_TOKENIZED));

doc.add(newField("country",unindexed[i],Field.Store.YES,Field.Index.NO));

doc.add(newField("contents",unstored[i],Field.Store.NO,Field.Index.TOKENIZED));

doc.add(newField("city",text[i],Field.Store.YES,Field.Index.TOKENIZED));

writer.addDocument(doc);

}

writer.optimize();

writer.close();

}

protectedAnalyzergetAnalyzer()

{

returnnewSimpleAnalyzer();

}

protectedbooleanisCompound()

{

returntrue;

}

publicvoidtestIndexWriter()throwsIOException

{

IndexWriterwriter=newIndexWriter(dir,this.getAnalyzer(),false);

assertEquals(keywords.length,writer.docCount());

writer.close();

}

publicvoidtestIndexReader()throwsIOException

{

IndexReaderreader=IndexReader.open(dir);

assertEquals(keywords.length,reader.maxDoc());

assertEquals(keywords.length,reader.numDocs());

reader.close();

}

importjava.io.IOException;

importorg.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.analysis.WhitespaceAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.index.IndexReader;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.Term;

importorg.apache.lucene.search.Hits;

importorg.apache.lucene.search.IndexSearcher;

importorg.apache.lucene.search.Query;

importorg.apache.lucene.search.TermQuery;

publicclassDocumentDeleteTestextendsBaseIndexTestCase

{

publicvoidtestDeleteBeforeIndexMerge()throwsIOException

{

assertEquals(1,getHitCount("city","Amsterdam"));

IndexReaderreader=IndexReader.open(dir);

assertEquals(2,reader.maxDoc());

assertEquals(2,reader.numDocs());

reader.deleteDocument(1);

assertTrue(reader.isDeleted(1));

assertTrue(reader.hasDeletions());

assertEquals(2,reader.maxDoc());

assertEquals(1,reader.numDocs());

reader.close();

reader=IndexReader.open(dir);

assertEquals(2,reader.maxDoc());

assertEquals(1,reader.numDocs());

reader.close();

}

publicvoidtestDeleteAfterIndexMerge()throwsIOException

{

IndexReaderreader=IndexReader.open(dir);

assertEquals(2,reader.maxDoc());

assertEquals(2,reader.numDocs());

reader.deleteDocument(1);

reader.close();

IndexWriterwriter=newIndexWriter(dir,getAnalyzer(),false);

writer.optimize();

writer.close();

reader=IndexReader.open(dir);

assertFalse(reader.isDeleted(1));

assertFalse(reader.hasDeletions());

assertEquals(1,reader.maxDoc());

assertEquals(1,reader.numDocs());

reader.close();

}

privateintgetHitCount(StringfieldName,StringsearchString)

throwsIOException{

IndexSearchersearcher=newIndexSearcher(dir);

Termt=newTerm(fieldName,searchString);

Queryquery=newTermQuery(t);

Hitshits=searcher.search(query);

inthitCount=hits.length();

searcher.close();

returnhitCount;

}

protectedAnalyzergetAnalyzer(){

returnnewWhitespaceAnalyzer();

}

importjava.io.IOException;

importorg.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.analysis.WhitespaceAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.index.IndexReader;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.index.Term;

importorg.apache.lucene.search.Hits;

importorg.apache.lucene.search.IndexSearcher;

importorg.apache.lucene.search.Query;

importorg.apache.lucene.search.TermQuery;

publicclassDocumentUpdateTestextendsBaseIndexTestCase

{

publicvoidtestUpdate()throwsIOException

{

assertEquals(1,getHitCount("city","Amsterdam"));

IndexReaderreader=IndexReader.open(dir);

reader.deleteDocuments(newTerm("city","Amsterdam"));

reader.close();

IndexWriterwriter=newIndexWriter(dir,getAnalyzer(),

false);

Documentdoc=newDocument();

doc.add(newField("id","1",Field.Store.YES,Field.Index.UN_TOKENIZED));

doc.add(newField("country","Russia",Field.Store.YES,Field.Index.NO));

doc.add(newField("contents","St.Petersburghaslotsofbridges",Field.Store.NO,Field.Index.TOKENIZED));

doc.add(newField("city","St.Petersburg",Field.Store.YES,Field.Index.TOKENIZED));

writer.addDocument(doc);

writer.optimize();

writer.close();

assertEquals(0,getHitCount("city","Amsterdam"));

assertEquals(1,getHitCount("city","Petersburg"));

}

protectedAnalyzergetAnalyzer(){

returnnewWhitespaceAnalyzer();

}

privateintgetHitCount(StringfieldName,StringsearchString)

throwsIOException{

IndexSearchersearcher=newIndexSearcher(dir);

Termt=newTerm(fieldName,searchString);

Queryquery=newTermQuery(t);

Hitshits=searcher.search(query);

inthitCount=hits.length();

searcher.close();

returnhitCount;

}

importorg.apache.lucene.analysis.Analyzer;

importorg.apache.lucene.analysis.SimpleAnalyzer;

importorg.apache.lucene.document.Document;

importorg.apache.lucene.document.Field;

importorg.apache.lucene.index.IndexWriter;

importorg.apache.lucene.store.Directory;

importorg.apache.lucene.store.FSDirectory;

publicclassIndexTuningDemo

{

publicstaticvoidmain(String[]args)throwsException{

intdocsInIndex=Integer.parseInt(args[0]);

//createanindexcalled'index-dir'inatempdirectory

Directorydir=FSDirectory.getDirectory(

System.getProperty("java.io.tmpdir","tmp")+

System.getProperty("file.separator")+"index-dir",true);

Analyzeranalyzer=newSimpleAnalyzer();

IndexWriterwriter=newIndexWriter(dir,analyzer,true);

//setvariablesthataffectspeedofindexing

writer.setMergeFactor(Integer.parseInt(args[1]));

writer.setMaxMergeDocs(Integer.parseInt(args[2]));

writer.setInfoStream(System.out);

writer.setMaxBufferedDocs(Integer.parseInt(args[3]));

System.out.println("Mergefactor:"+writer.getMergeFactor());

System.out.println("Maxmergedocs:"+writer.getMaxMergeDocs());

System.out.println("Minmergedocs:"+writer.getMaxBufferedDocs());

longstart=System.currentTimeMillis();

for(inti=0;i<docsInIndex;i++){

Documentdoc=newDocument();

doc.add(newField("fieldname","Bibamus",Field.Store.YES,Field.Index.TOKENIZED));

writer.addDocument(doc);

}

writer.close();

longstop=System.currentTimeMillis();

System.out.println("Time:"+(stop-start)+"ms");

}

importorg.apache.lucene.store.Directory;
importorg.apache.lucene.store.FSDirectory;
importorg.apache.lucene.store.RAMDirectory;
importorg.apache.lucene.document.Document;
importorg.apache.lucene.document.Field;
importorg.apache.lucene.index.IndexWriter;
importorg.apache.lucene.analysis.SimpleAnalyzer;

importjunit.framework.TestCase;
importjava.io.IOException;
importjava.util.Collection;
importjava.util.ArrayList;
importjava.util.Iterator;

publicclassFSversusRAMDirectoryTestextendsTestCase
{
privateDirectoryfsDir;
privateDirectoryramDir;
privateCollectiondocs=loadDocuments(3000,5);//加载数据

protectedvoidsetUp()throwsException
{
StringfsIndexDir=System.getProperty("java.io.tmpdir","tmp")+System.getProperty("file.separator")+"fs-index";
ramDir=newRAMDirectory();//内存中目录
fsDir=FSDirectory.getDirectory(fsIndexDir,true);
}

publicvoidtestTiming()throwsIOException
{
longramTiming=timeIndexWriter(ramDir);
longfsTiming=timeIndexWriter(fsDir);

assertTrue(fsTiming>ramTiming);

System.out.println("RAMDirectoryTime:"+(ramTiming)+"ms");
System.out.println("FSDirectoryTime:"+(fsTiming)+"ms");
}

privatelongtimeIndexWriter(Directorydir)throwsIOException
{
longstart=System.currentTimeMillis();
addDocuments(dir);
longstop=System.currentTimeMillis();
return(stop-start);
}

privatevoidaddDocuments(Directorydir)throwsIOException
{
IndexWriterwriter=newIndexWriter(dir,newSimpleAnalyzer(),true);

/**
//changetoadjustperformanceofindexingwithFSDirectory
writer.mergeFactor=writer.mergeFactor;
writer.maxMergeDocs=writer.maxMergeDocs;
writer.minMergeDocs=writer.minMergeDocs;
*/

for(Iteratoriter=docs.iterator();iter.hasNext();)
{
Documentdoc=newDocument();
Stringword=(String)iter.next();
doc.add(newField("keyword",word,Field.Store.YES,Field.Index.UN_TOKENIZED));
doc.add(newField("unindexed",word,Field.Store.YES,Field.Index.NO));
doc.add(newField("unstored",word,Field.Store.NO,Field.Index.TOKENIZED));
doc.add(newField("text",word,Field.Store.YES,Field.Index.TOKENIZED));
writer.addDocument(doc);
}
writer.optimize();
writer.close();
}

privateCollectionloadDocuments(intnumDocs,intwordsPerDoc)
{
Collectiondocs=newArrayList(numDocs);
for(inti=0;i<numDocs;i++)
{
StringBufferdoc=newStringBuffer(wordsPerDoc);
for(intj=0;j<wordsPerDoc;j++)
{
doc.append("Bibamus");
}
docs.add(doc.toString());
}
returndocs;
}
}

分享到：