Index: 3rdParty_sources/lucene/org/apache/lucene/LucenePackage.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/LucenePackage.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/LucenePackage.java 17 Aug 2012 14:55:14 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/LucenePackage.java 16 Dec 2014 11:32:21 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/Analyzer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/Analyzer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/Analyzer.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/Analyzer.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,65 +17,458 @@ * limitations under the License. */ -import java.io.Reader; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.Version; + +import java.io.Closeable; import java.io.IOException; +import java.io.Reader; +import java.util.HashMap; +import java.util.Map; -/** An Analyzer builds TokenStreams, which analyze text. It thus represents a - * policy for extracting index terms from text. - *

- * Typical implementations first build a Tokenizer, which breaks the stream of - * characters from the Reader into raw Tokens. One or more TokenFilters may - * then be applied to the output of the Tokenizer. +/** + * An Analyzer builds TokenStreams, which analyze text. It thus represents a + * policy for extracting index terms from text. + *

+ * In order to define what analysis is done, subclasses must define their + * {@link TokenStreamComponents TokenStreamComponents} in {@link #createComponents(String, Reader)}. + * The components are then reused in each call to {@link #tokenStream(String, Reader)}. + *

+ * Simple example: + *

+ * Analyzer analyzer = new Analyzer() {
+ *  {@literal @Override}
+ *   protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
+ *     Tokenizer source = new FooTokenizer(reader);
+ *     TokenStream filter = new FooFilter(source);
+ *     filter = new BarFilter(filter);
+ *     return new TokenStreamComponents(source, filter);
+ *   }
+ * };
+ * 
+ * For more examples, see the {@link org.apache.lucene.analysis Analysis package documentation}. + *

+ * For some concrete implementations bundled with Lucene, look in the analysis modules: + *

*/ -public abstract class Analyzer { - /** Creates a TokenStream which tokenizes all the text in the provided - * Reader. Must be able to handle null field name for backward compatibility. +public abstract class Analyzer implements Closeable { + + private final ReuseStrategy reuseStrategy; + private Version version = Version.LUCENE_CURRENT; + + // non final as it gets nulled if closed; pkg private for access by ReuseStrategy's final helper methods: + CloseableThreadLocal storedValue = new CloseableThreadLocal<>(); + + /** + * Create a new Analyzer, reusing the same set of components per-thread + * across calls to {@link #tokenStream(String, Reader)}. */ - public abstract TokenStream tokenStream(String fieldName, Reader reader); + public Analyzer() { + this(GLOBAL_REUSE_STRATEGY); + } - /** Creates a TokenStream that is allowed to be re-used - * from the previous time that the same thread called - * this method. Callers that do not need to use more - * than one TokenStream at the same time from this - * analyzer should use this method for better - * performance. + /** + * Expert: create a new Analyzer with a custom {@link ReuseStrategy}. + *

+ * NOTE: if you just want to reuse on a per-field basis, its easier to + * use a subclass of {@link AnalyzerWrapper} such as + * + * PerFieldAnalyerWrapper instead. */ - public TokenStream reusableTokenStream(String fieldName, Reader reader) throws IOException { - return tokenStream(fieldName, reader); + public Analyzer(ReuseStrategy reuseStrategy) { + this.reuseStrategy = reuseStrategy; } - private ThreadLocal tokenStreams = new ThreadLocal(); + /** + * Creates a new {@link TokenStreamComponents} instance for this analyzer. + * + * @param fieldName + * the name of the fields content passed to the + * {@link TokenStreamComponents} sink as a reader + * @param reader + * the reader passed to the {@link Tokenizer} constructor + * @return the {@link TokenStreamComponents} for this analyzer. + */ + protected abstract TokenStreamComponents createComponents(String fieldName, + Reader reader); - /** Used by Analyzers that implement reusableTokenStream - * to retrieve previously saved TokenStreams for re-use - * by the same thread. */ - protected Object getPreviousTokenStream() { - return tokenStreams.get(); + /** + * Returns a TokenStream suitable for fieldName, tokenizing + * the contents of reader. + *

+ * This method uses {@link #createComponents(String, Reader)} to obtain an + * instance of {@link TokenStreamComponents}. It returns the sink of the + * components and stores the components internally. Subsequent calls to this + * method will reuse the previously stored components after resetting them + * through {@link TokenStreamComponents#setReader(Reader)}. + *

+ * NOTE: After calling this method, the consumer must follow the + * workflow described in {@link TokenStream} to properly consume its contents. + * See the {@link org.apache.lucene.analysis Analysis package documentation} for + * some examples demonstrating this. + * + * NOTE: If your data is available as a {@code String}, use + * {@link #tokenStream(String, String)} which reuses a {@code StringReader}-like + * instance internally. + * + * @param fieldName the name of the field the created TokenStream is used for + * @param reader the reader the streams source reads from + * @return TokenStream for iterating the analyzed content of reader + * @throws AlreadyClosedException if the Analyzer is closed. + * @throws IOException if an i/o error occurs. + * @see #tokenStream(String, String) + */ + public final TokenStream tokenStream(final String fieldName, + final Reader reader) throws IOException { + TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName); + final Reader r = initReader(fieldName, reader); + if (components == null) { + components = createComponents(fieldName, r); + reuseStrategy.setReusableComponents(this, fieldName, components); + } else { + components.setReader(r); + } + return components.getTokenStream(); } - - /** Used by Analyzers that implement reusableTokenStream - * to save a TokenStream for later re-use by the same - * thread. */ - protected void setPreviousTokenStream(Object obj) { - tokenStreams.set(obj); + + /** + * Returns a TokenStream suitable for fieldName, tokenizing + * the contents of text. + *

+ * This method uses {@link #createComponents(String, Reader)} to obtain an + * instance of {@link TokenStreamComponents}. It returns the sink of the + * components and stores the components internally. Subsequent calls to this + * method will reuse the previously stored components after resetting them + * through {@link TokenStreamComponents#setReader(Reader)}. + *

+ * NOTE: After calling this method, the consumer must follow the + * workflow described in {@link TokenStream} to properly consume its contents. + * See the {@link org.apache.lucene.analysis Analysis package documentation} for + * some examples demonstrating this. + * + * @param fieldName the name of the field the created TokenStream is used for + * @param text the String the streams source reads from + * @return TokenStream for iterating the analyzed content of reader + * @throws AlreadyClosedException if the Analyzer is closed. + * @throws IOException if an i/o error occurs (may rarely happen for strings). + * @see #tokenStream(String, Reader) + */ + public final TokenStream tokenStream(final String fieldName, final String text) throws IOException { + TokenStreamComponents components = reuseStrategy.getReusableComponents(this, fieldName); + @SuppressWarnings("resource") final ReusableStringReader strReader = + (components == null || components.reusableStringReader == null) ? + new ReusableStringReader() : components.reusableStringReader; + strReader.setValue(text); + final Reader r = initReader(fieldName, strReader); + if (components == null) { + components = createComponents(fieldName, r); + reuseStrategy.setReusableComponents(this, fieldName, components); + } else { + components.setReader(r); + } + components.reusableStringReader = strReader; + return components.getTokenStream(); } + + /** + * Override this if you want to add a CharFilter chain. + *

+ * The default implementation returns reader + * unchanged. + * + * @param fieldName IndexableField name being indexed + * @param reader original Reader + * @return reader, optionally decorated with CharFilter(s) + */ + protected Reader initReader(String fieldName, Reader reader) { + return reader; + } - /** - * Invoked before indexing a Fieldable instance if + * Invoked before indexing a IndexableField instance if * terms have already been added to that field. This allows custom * analyzers to place an automatic position increment gap between - * Fieldable instances using the same field name. The default value + * IndexbleField instances using the same field name. The default value * position increment gap is 0. With a 0 position increment gap and * the typical default token position increment of 1, all terms in a field, - * including across Fieldable instances, are in successive positions, allowing - * exact PhraseQuery matches, for instance, across Fieldable instance boundaries. + * including across IndexableField instances, are in successive positions, allowing + * exact PhraseQuery matches, for instance, across IndexableField instance boundaries. * - * @param fieldName Fieldable name being indexed. - * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)} + * @param fieldName IndexableField name being indexed. + * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}. + * This value must be {@code >= 0}. */ - public int getPositionIncrementGap(String fieldName) - { + public int getPositionIncrementGap(String fieldName) { return 0; } + + /** + * Just like {@link #getPositionIncrementGap}, except for + * Token offsets instead. By default this returns 1. + * This method is only called if the field + * produced at least one token for indexing. + * + * @param fieldName the field just indexed + * @return offset gap, added to the next token emitted from {@link #tokenStream(String,Reader)}. + * This value must be {@code >= 0}. + */ + public int getOffsetGap(String fieldName) { + return 1; + } + + /** + * Returns the used {@link ReuseStrategy}. + */ + public final ReuseStrategy getReuseStrategy() { + return reuseStrategy; + } + + /** + * Set the version of Lucene this analyzer should mimic the behavior for for analysis. + */ + public void setVersion(Version v) { + version = v; // TODO: make write once? + } + + /** + * Return the version of Lucene this analyzer will mimic the behavior of for analysis. + */ + public Version getVersion() { + return version; + } + + /** Frees persistent resources used by this Analyzer */ + @Override + public void close() { + if (storedValue != null) { + storedValue.close(); + storedValue = null; + } + } + + /** + * This class encapsulates the outer components of a token stream. It provides + * access to the source ({@link Tokenizer}) and the outer end (sink), an + * instance of {@link TokenFilter} which also serves as the + * {@link TokenStream} returned by + * {@link Analyzer#tokenStream(String, Reader)}. + */ + public static class TokenStreamComponents { + /** + * Original source of the tokens. + */ + protected final Tokenizer source; + /** + * Sink tokenstream, such as the outer tokenfilter decorating + * the chain. This can be the source if there are no filters. + */ + protected final TokenStream sink; + + /** Internal cache only used by {@link Analyzer#tokenStream(String, String)}. */ + transient ReusableStringReader reusableStringReader; + + /** + * Creates a new {@link TokenStreamComponents} instance. + * + * @param source + * the analyzer's tokenizer + * @param result + * the analyzer's resulting token stream + */ + public TokenStreamComponents(final Tokenizer source, + final TokenStream result) { + this.source = source; + this.sink = result; + } + + /** + * Creates a new {@link TokenStreamComponents} instance. + * + * @param source + * the analyzer's tokenizer + */ + public TokenStreamComponents(final Tokenizer source) { + this.source = source; + this.sink = source; + } + + /** + * Resets the encapsulated components with the given reader. If the components + * cannot be reset, an Exception should be thrown. + * + * @param reader + * a reader to reset the source component + * @throws IOException + * if the component's reset method throws an {@link IOException} + */ + protected void setReader(final Reader reader) throws IOException { + source.setReader(reader); + } + + /** + * Returns the sink {@link TokenStream} + * + * @return the sink {@link TokenStream} + */ + public TokenStream getTokenStream() { + return sink; + } + + /** + * Returns the component's {@link Tokenizer} + * + * @return Component's {@link Tokenizer} + */ + public Tokenizer getTokenizer() { + return source; + } + } + + /** + * Strategy defining how TokenStreamComponents are reused per call to + * {@link Analyzer#tokenStream(String, java.io.Reader)}. + */ + public static abstract class ReuseStrategy { + + /** Sole constructor. (For invocation by subclass constructors, typically implicit.) */ + public ReuseStrategy() {} + + /** + * Gets the reusable TokenStreamComponents for the field with the given name. + * + * @param analyzer Analyzer from which to get the reused components. Use + * {@link #getStoredValue(Analyzer)} and {@link #setStoredValue(Analyzer, Object)} + * to access the data on the Analyzer. + * @param fieldName Name of the field whose reusable TokenStreamComponents + * are to be retrieved + * @return Reusable TokenStreamComponents for the field, or {@code null} + * if there was no previous components for the field + */ + public abstract TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName); + + /** + * Stores the given TokenStreamComponents as the reusable components for the + * field with the give name. + * + * @param fieldName Name of the field whose TokenStreamComponents are being set + * @param components TokenStreamComponents which are to be reused for the field + */ + public abstract void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components); + + /** + * Returns the currently stored value. + * + * @return Currently stored value or {@code null} if no value is stored + * @throws AlreadyClosedException if the Analyzer is closed. + */ + protected final Object getStoredValue(Analyzer analyzer) { + if (analyzer.storedValue == null) { + throw new AlreadyClosedException("this Analyzer is closed"); + } + return analyzer.storedValue.get(); + } + + /** + * Sets the stored value. + * + * @param storedValue Value to store + * @throws AlreadyClosedException if the Analyzer is closed. + */ + protected final void setStoredValue(Analyzer analyzer, Object storedValue) { + if (analyzer.storedValue == null) { + throw new AlreadyClosedException("this Analyzer is closed"); + } + analyzer.storedValue.set(storedValue); + } + + } + + /** + * A predefined {@link ReuseStrategy} that reuses the same components for + * every field. + */ + public static final ReuseStrategy GLOBAL_REUSE_STRATEGY = new GlobalReuseStrategy(); + + /** + * Implementation of {@link ReuseStrategy} that reuses the same components for + * every field. + * @deprecated This implementation class will be hidden in Lucene 5.0. + * Use {@link Analyzer#GLOBAL_REUSE_STRATEGY} instead! + */ + @Deprecated + public final static class GlobalReuseStrategy extends ReuseStrategy { + + /** Sole constructor. (For invocation by subclass constructors, typically implicit.) + * @deprecated Don't create instances of this class, use {@link Analyzer#GLOBAL_REUSE_STRATEGY} */ + @Deprecated + public GlobalReuseStrategy() {} + + @Override + public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) { + return (TokenStreamComponents) getStoredValue(analyzer); + } + + @Override + public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) { + setStoredValue(analyzer, components); + } + } + + /** + * A predefined {@link ReuseStrategy} that reuses components per-field by + * maintaining a Map of TokenStreamComponent per field name. + */ + public static final ReuseStrategy PER_FIELD_REUSE_STRATEGY = new PerFieldReuseStrategy(); + + /** + * Implementation of {@link ReuseStrategy} that reuses components per-field by + * maintaining a Map of TokenStreamComponent per field name. + * @deprecated This implementation class will be hidden in Lucene 5.0. + * Use {@link Analyzer#PER_FIELD_REUSE_STRATEGY} instead! + */ + @Deprecated + public static class PerFieldReuseStrategy extends ReuseStrategy { + + /** Sole constructor. (For invocation by subclass constructors, typically implicit.) + * @deprecated Don't create instances of this class, use {@link Analyzer#PER_FIELD_REUSE_STRATEGY} */ + @Deprecated + public PerFieldReuseStrategy() {} + + @SuppressWarnings("unchecked") + @Override + public TokenStreamComponents getReusableComponents(Analyzer analyzer, String fieldName) { + Map componentsPerField = (Map) getStoredValue(analyzer); + return componentsPerField != null ? componentsPerField.get(fieldName) : null; + } + + @SuppressWarnings("unchecked") + @Override + public void setReusableComponents(Analyzer analyzer, String fieldName, TokenStreamComponents components) { + Map componentsPerField = (Map) getStoredValue(analyzer); + if (componentsPerField == null) { + componentsPerField = new HashMap<>(); + setStoredValue(analyzer, componentsPerField); + } + componentsPerField.put(fieldName, components); + } + } + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/AnalyzerWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/CachingTokenFilter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/CachingTokenFilter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/CachingTokenFilter.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/CachingTokenFilter.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -22,52 +22,77 @@ import java.util.LinkedList; import java.util.List; +import org.apache.lucene.util.AttributeSource; + /** - * This class can be used if the Tokens of a TokenStream + * This class can be used if the token attributes of a TokenStream * are intended to be consumed more than once. It caches - * all Tokens locally in a List. + * all token attribute states locally in a List. * - * CachingTokenFilter implements the optional method + *

CachingTokenFilter implements the optional method * {@link TokenStream#reset()}, which repositions the * stream to the first Token. - * */ -public class CachingTokenFilter extends TokenFilter { - private List cache; - private Iterator iterator; +public final class CachingTokenFilter extends TokenFilter { + private List cache = null; + private Iterator iterator = null; + private AttributeSource.State finalState; + /** + * Create a new CachingTokenFilter around input, + * caching its token attributes, which can be replayed again + * after a call to {@link #reset()}. + */ public CachingTokenFilter(TokenStream input) { super(input); } - public Token next(final Token reusableToken) throws IOException { - assert reusableToken != null; + @Override + public final boolean incrementToken() throws IOException { if (cache == null) { // fill cache lazily - cache = new LinkedList(); - fillCache(reusableToken); + cache = new LinkedList<>(); + fillCache(); iterator = cache.iterator(); } if (!iterator.hasNext()) { - // the cache is exhausted, return null - return null; + // the cache is exhausted, return false + return false; } // Since the TokenFilter can be reset, the tokens need to be preserved as immutable. - Token nextToken = (Token) iterator.next(); - return (Token) nextToken.clone(); + restoreState(iterator.next()); + return true; } - public void reset() throws IOException { + @Override + public final void end() { + if (finalState != null) { + restoreState(finalState); + } + } + + /** + * Rewinds the iterator to the beginning of the cached list. + *

+ * Note that this does not call reset() on the wrapped tokenstream ever, even + * the first time. You should reset() the inner tokenstream before wrapping + * it with CachingTokenFilter. + */ + @Override + public void reset() { if(cache != null) { - iterator = cache.iterator(); + iterator = cache.iterator(); } } - private void fillCache(final Token reusableToken) throws IOException { - for (Token nextToken = input.next(reusableToken); nextToken != null; nextToken = input.next(reusableToken)) { - cache.add(nextToken.clone()); + private void fillCache() throws IOException { + while(input.incrementToken()) { + cache.add(captureState()); } + // capture final state + input.end(); + finalState = captureState(); } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/CharArraySet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/CharFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/CharTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/DelegatingAnalyzerWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/ISOLatin1AccentFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/KeywordAnalyzer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/KeywordTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/LengthFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/LetterTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/LowerCaseFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/LowerCaseTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/NumericTokenStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/PerFieldAnalyzerWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/PorterStemFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/PorterStemmer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/ReusableStringReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/SimpleAnalyzer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/SinkTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/StopAnalyzer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/StopFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/TeeTokenFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/Token.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/Token.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/Token.java 17 Aug 2012 14:55:07 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/Token.java 16 Dec 2014 11:31:57 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,165 +17,63 @@ * limitations under the License. */ -import org.apache.lucene.index.Payload; -import org.apache.lucene.index.TermPositions; // for javadoc -import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.analysis.tokenattributes.FlagsAttribute; +import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.index.DocsAndPositionsEnum; // for javadoc +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeReflector; +import org.apache.lucene.util.BytesRef; -/** A Token is an occurrence of a term from the text of a field. It consists of +/** + A Token is an occurrence of a term from the text of a field. It consists of a term's text, the start and end offset of the term in the text of the field, and a type string.

The start and end offsets permit applications to re-associate a token with its source text, e.g., to display highlighted query terms in a document - browser, or to show matching text fragments in a KWIC (KeyWord In Context) + browser, or to show matching text fragments in a KWIC display, etc.

The type is a string, assigned by a lexical analyzer (a.k.a. tokenizer), naming the lexical or syntactic class that the token belongs to. For example an end of sentence marker token might be implemented with type "eos". The default token type is "word".

- A Token can optionally have metadata (a.k.a. Payload) in the form of a variable - length byte array. Use {@link TermPositions#getPayloadLength()} and - {@link TermPositions#getPayload(byte[], int)} to retrieve the payloads from the index. + A Token can optionally have metadata (a.k.a. payload) in the form of a variable + length byte array. Use {@link DocsAndPositionsEnum#getPayload()} to retrieve the + payloads from the index.

-

- WARNING: The status of the Payloads feature is experimental. - The APIs introduced here might change in the future and will not be - supported anymore in such a case. - -

- -

NOTE: As of 2.3, Token stores the term text - internally as a malleable char[] termBuffer instead of - String termText. The indexing code and core tokenizers - have been changed to re-use a single Token instance, changing - its buffer and other fields in-place as the Token is - processed. This provides substantially better indexing - performance as it saves the GC cost of new'ing a Token and - String for every term. The APIs that accept String - termText are still available but a warning about the - associated performance cost has been added (below). The - {@link #termText()} method has been deprecated.

-

Tokenizers and filters should try to re-use a Token - instance when possible for best performance, by - implementing the {@link TokenStream#next(Token)} API. - Failing that, to create a new Token you should first use - one of the constructors that starts with null text. To load - the token from a char[] use {@link #setTermBuffer(char[], int, int)}. - To load from a String use {@link #setTermBuffer(String)} or {@link #setTermBuffer(String, int, int)}. - Alternatively you can get the Token's termBuffer by calling either {@link #termBuffer()}, - if you know that your text is shorter than the capacity of the termBuffer - or {@link #resizeTermBuffer(int)}, if there is any possibility - that you may need to grow the buffer. Fill in the characters of your term into this - buffer, with {@link String#getChars(int, int, char[], int)} if loading from a string, - or with {@link System#arraycopy(Object, int, Object, int, int)}, and finally call {@link #setTermLength(int)} to - set the length of the term text. See LUCENE-969 - for details.

-

Typical reuse patterns: -

+

NOTE: As of 2.9, Token implements all {@link Attribute} interfaces + that are part of core Lucene and can be found in the {@code tokenattributes} subpackage. + Even though it is not necessary to use Token anymore, with the new TokenStream API it can + be used as convenience class that implements all {@link Attribute}s, which is especially useful + to easily switch from the old to the new TokenStream API. + A few things to note:

- - @see org.apache.lucene.index.Payload +

+ Please note: With Lucene 3.1, the {@linkplain #toString toString()} method had to be changed to match the + {@link CharSequence} interface introduced by the interface {@link org.apache.lucene.analysis.tokenattributes.CharTermAttribute}. + This method now only prints the term text, no additional information anymore. +

+ @deprecated This class is outdated and no longer used since Lucene 2.9. Nuke it finally! */ -public class Token implements Cloneable { +@Deprecated +public class Token extends PackedTokenAttributeImpl implements FlagsAttribute, PayloadAttribute { - public static final String DEFAULT_TYPE = "word"; - - private static int MIN_BUFFER_SIZE = 10; - - /** @deprecated We will remove this when we remove the - * deprecated APIs */ - private String termText; - - /** - * Characters for the term text. - * @deprecated This will be made private. Instead, use: - * {@link termBuffer()}, - * {@link #setTermBuffer(char[], int, int)}, - * {@link #setTermBuffer(String)}, or - * {@link #setTermBuffer(String, int, int)} - */ - char[] termBuffer; - - /** - * Length of term text in the buffer. - * @deprecated This will be made private. Instead, use: - * {@link termLength()}, or @{link setTermLength(int)}. - */ - int termLength; - - /** - * Start in source text. - * @deprecated This will be made private. Instead, use: - * {@link startOffset()}, or @{link setStartOffset(int)}. - */ - int startOffset; - - /** - * End in source text. - * @deprecated This will be made private. Instead, use: - * {@link endOffset()}, or @{link setEndOffset(int)}. - */ - int endOffset; - - /** - * The lexical type of the token. - * @deprecated This will be made private. Instead, use: - * {@link type()}, or @{link setType(String)}. - */ - String type = DEFAULT_TYPE; - private int flags; - - /** - * @deprecated This will be made private. Instead, use: - * {@link getPayload()}, or @{link setPayload(Payload)}. - */ - Payload payload; - - /** - * @deprecated This will be made private. Instead, use: - * {@link getPositionIncrement()}, or @{link setPositionIncrement(String)}. - */ - int positionIncrement = 1; + private BytesRef payload; /** Constructs a Token will null text. */ public Token() { @@ -186,8 +84,7 @@ * @param start start offset in the source text * @param end end offset in the source text */ public Token(int start, int end) { - startOffset = start; - endOffset = end; + setOffset(start, end); } /** Constructs a Token with null text and start & end @@ -196,9 +93,8 @@ * @param end end offset in the source text * @param typ the lexical type of this Token */ public Token(int start, int end, String typ) { - startOffset = start; - endOffset = end; - type = typ; + setOffset(start, end); + setType(typ); } /** @@ -209,9 +105,8 @@ * @param flags The bits to set for this token */ public Token(int start, int end, int flags) { - startOffset = start; - endOffset = end; - this.flags = flags; + setOffset(start, end); + setFlags(flags); } /** Constructs a Token with the given term text, and start @@ -220,640 +115,273 @@ * instead use the char[] termBuffer methods to set the * term text. * @param text term text - * @param start start offset - * @param end end offset - * @deprecated + * @param start start offset in the source text + * @param end end offset in the source text */ - public Token(String text, int start, int end) { - termText = text; - startOffset = start; - endOffset = end; + public Token(CharSequence text, int start, int end) { + append(text); + setOffset(start, end); } /** Constructs a Token with the given text, start and end * offsets, & type. NOTE: for better indexing * speed you should instead use the char[] termBuffer * methods to set the term text. * @param text term text - * @param start start offset - * @param end end offset + * @param start start offset in the source text + * @param end end offset in the source text * @param typ token type - * @deprecated */ public Token(String text, int start, int end, String typ) { - termText = text; - startOffset = start; - endOffset = end; - type = typ; + append(text); + setOffset(start, end); + setType(typ); } /** * Constructs a Token with the given text, start and end * offsets, & type. NOTE: for better indexing * speed you should instead use the char[] termBuffer * methods to set the term text. - * @param text - * @param start - * @param end + * @param text term text + * @param start start offset in the source text + * @param end end offset in the source text * @param flags token type bits - * @deprecated */ public Token(String text, int start, int end, int flags) { - termText = text; - startOffset = start; - endOffset = end; - this.flags = flags; + append(text); + setOffset(start, end); + setFlags(flags); } /** * Constructs a Token with the given term buffer (offset * & length), start and end * offsets - * @param startTermBuffer - * @param termBufferOffset - * @param termBufferLength - * @param start - * @param end + * @param startTermBuffer buffer containing term text + * @param termBufferOffset the index in the buffer of the first character + * @param termBufferLength number of valid characters in the buffer + * @param start start offset in the source text + * @param end end offset in the source text */ public Token(char[] startTermBuffer, int termBufferOffset, int termBufferLength, int start, int end) { - setTermBuffer(startTermBuffer, termBufferOffset, termBufferLength); - startOffset = start; - endOffset = end; + copyBuffer(startTermBuffer, termBufferOffset, termBufferLength); + setOffset(start, end); } - /** Set the position increment. This determines the position of this token - * relative to the previous Token in a {@link TokenStream}, used in phrase - * searching. - * - *

The default value is one. - * - *

Some common uses for this are:

- * @param positionIncrement the distance from the prior term - * @see org.apache.lucene.index.TermPositions - */ - public void setPositionIncrement(int positionIncrement) { - if (positionIncrement < 0) - throw new IllegalArgumentException - ("Increment must be zero or greater: " + positionIncrement); - this.positionIncrement = positionIncrement; - } - - /** Returns the position increment of this Token. - * @see #setPositionIncrement - */ - public int getPositionIncrement() { - return positionIncrement; - } - - /** Sets the Token's term text. NOTE: for better - * indexing speed you should instead use the char[] - * termBuffer methods to set the term text. - * @deprecated use {@link #setTermBuffer(char[], int, int)} or - * {@link #setTermBuffer(String)} or - * {@link #setTermBuffer(String, int, int)}. - */ - public void setTermText(String text) { - termText = text; - termBuffer = null; - } - - /** Returns the Token's term text. - * - * @deprecated This method now has a performance penalty - * because the text is stored internally in a char[]. If - * possible, use {@link #termBuffer()} and {@link - * #termLength()} directly instead. If you really need a - * String, use {@link #term()} - */ - public final String termText() { - if (termText == null && termBuffer != null) - termText = new String(termBuffer, 0, termLength); - return termText; - } - - /** Returns the Token's term text. - * - * This method has a performance penalty - * because the text is stored internally in a char[]. If - * possible, use {@link #termBuffer()} and {@link - * #termLength()} directly instead. If you really need a - * String, use this method, which is nothing more than - * a convenience call to new String(token.termBuffer(), 0, token.termLength()) - */ - public final String term() { - if (termText != null) - return termText; - initTermBuffer(); - return new String(termBuffer, 0, termLength); - } - - /** Copies the contents of buffer, starting at offset for - * length characters, into the termBuffer array. - * @param buffer the buffer to copy - * @param offset the index in the buffer of the first character to copy - * @param length the number of characters to copy - */ - public final void setTermBuffer(char[] buffer, int offset, int length) { - termText = null; - char[] newCharBuffer = growTermBuffer(length); - if (newCharBuffer != null) { - termBuffer = newCharBuffer; - } - System.arraycopy(buffer, offset, termBuffer, 0, length); - termLength = length; - } - - /** Copies the contents of buffer into the termBuffer array. - * @param buffer the buffer to copy - */ - public final void setTermBuffer(String buffer) { - termText = null; - int length = buffer.length(); - char[] newCharBuffer = growTermBuffer(length); - if (newCharBuffer != null) { - termBuffer = newCharBuffer; - } - buffer.getChars(0, length, termBuffer, 0); - termLength = length; - } - - /** Copies the contents of buffer, starting at offset and continuing - * for length characters, into the termBuffer array. - * @param buffer the buffer to copy - * @param offset the index in the buffer of the first character to copy - * @param length the number of characters to copy - */ - public final void setTermBuffer(String buffer, int offset, int length) { - assert offset <= buffer.length(); - assert offset + length <= buffer.length(); - termText = null; - char[] newCharBuffer = growTermBuffer(length); - if (newCharBuffer != null) { - termBuffer = newCharBuffer; - } - buffer.getChars(offset, offset + length, termBuffer, 0); - termLength = length; - } - - /** Returns the internal termBuffer character array which - * you can then directly alter. If the array is too - * small for your token, use {@link - * #resizeTermBuffer(int)} to increase it. After - * altering the buffer be sure to call {@link - * #setTermLength} to record the number of valid - * characters that were placed into the termBuffer. */ - public final char[] termBuffer() { - initTermBuffer(); - return termBuffer; - } - - /** Grows the termBuffer to at least size newSize, preserving the - * existing content. Note: If the next operation is to change - * the contents of the term buffer use - * {@link #setTermBuffer(char[], int, int)}, - * {@link #setTermBuffer(String)}, or - * {@link #setTermBuffer(String, int, int)} - * to optimally combine the resize with the setting of the termBuffer. - * @param newSize minimum size of the new termBuffer - * @return newly created termBuffer with length >= newSize - */ - public char[] resizeTermBuffer(int newSize) { - char[] newCharBuffer = growTermBuffer(newSize); - if (termBuffer == null) { - // If there were termText, then preserve it. - // note that if termBuffer is null then newCharBuffer cannot be null - assert newCharBuffer != null; - if (termText != null) { - termText.getChars(0, termText.length(), newCharBuffer, 0); - } - termBuffer = newCharBuffer; - } else if (newCharBuffer != null) { - // Note: if newCharBuffer != null then termBuffer needs to grow. - // If there were a termBuffer, then preserve it - System.arraycopy(termBuffer, 0, newCharBuffer, 0, termBuffer.length); - termBuffer = newCharBuffer; - } - termText = null; - return termBuffer; - } - - /** Allocates a buffer char[] of at least newSize - * @param newSize minimum size of the buffer - * @return newly created buffer with length >= newSize or null if the current termBuffer is big enough - */ - private char[] growTermBuffer(int newSize) { - if (termBuffer != null) { - if (termBuffer.length >= newSize) - // Already big enough - return null; - else - // Not big enough; create a new array with slight - // over allocation: - return new char[ArrayUtil.getNextSize(newSize)]; - } else { - - // determine the best size - // The buffer is always at least MIN_BUFFER_SIZE - if (newSize < MIN_BUFFER_SIZE) { - newSize = MIN_BUFFER_SIZE; - } - - // If there is already a termText, then the size has to be at least that big - if (termText != null) { - int ttLength = termText.length(); - if (newSize < ttLength) { - newSize = ttLength; - } - } - - return new char[newSize]; - } - } - - // TODO: once we remove the deprecated termText() method - // and switch entirely to char[] termBuffer we don't need - // to use this method anymore - private void initTermBuffer() { - if (termBuffer == null) { - if (termText == null) { - termBuffer = new char[MIN_BUFFER_SIZE]; - termLength = 0; - } else { - int length = termText.length(); - if (length < MIN_BUFFER_SIZE) length = MIN_BUFFER_SIZE; - termBuffer = new char[length]; - termLength = termText.length(); - termText.getChars(0, termText.length(), termBuffer, 0); - termText = null; - } - } else if (termText != null) - termText = null; - } - - /** Return number of valid characters (length of the term) - * in the termBuffer array. */ - public final int termLength() { - initTermBuffer(); - return termLength; - } - - /** Set number of valid characters (length of the term) in - * the termBuffer array. Use this to truncate the termBuffer - * or to synchronize with external manipulation of the termBuffer. - * Note: to grow the size of the array, - * use {@link #resizeTermBuffer(int)} first. - * @param length the truncated length - */ - public final void setTermLength(int length) { - initTermBuffer(); - if (length > termBuffer.length) - throw new IllegalArgumentException("length " + length + " exceeds the size of the termBuffer (" + termBuffer.length + ")"); - termLength = length; - } - - /** Returns this Token's starting offset, the position of the first character - corresponding to this token in the source text. - - Note that the difference between endOffset() and startOffset() may not be - equal to termText.length(), as the term text may have been altered by a - stemmer or some other filter. */ - public final int startOffset() { - return startOffset; - } - - /** Set the starting offset. - @see #startOffset() */ - public void setStartOffset(int offset) { - this.startOffset = offset; - } - - /** Returns this Token's ending offset, one greater than the position of the - last character corresponding to this token in the source text. The length - of the token in the source text is (endOffset - startOffset). */ - public final int endOffset() { - return endOffset; - } - - /** Set the ending offset. - @see #endOffset() */ - public void setEndOffset(int offset) { - this.endOffset = offset; - } - - /** Returns this Token's lexical type. Defaults to "word". */ - public final String type() { - return type; - } - - /** Set the lexical type. - @see #type() */ - public final void setType(String type) { - this.type = type; - } - /** - * EXPERIMENTAL: While we think this is here to stay, we may want to change it to be a long. - *

- * - * Get the bitset for any bits that have been set. This is completely distinct from {@link #type()}, although they do share similar purposes. - * The flags can be used to encode information about the token for use by other {@link org.apache.lucene.analysis.TokenFilter}s. - * - * - * @return The bits + * {@inheritDoc} + * @see FlagsAttribute */ + @Override public int getFlags() { return flags; } /** - * @see #getFlags() + * {@inheritDoc} + * @see FlagsAttribute */ + @Override public void setFlags(int flags) { this.flags = flags; } /** - * Returns this Token's payload. - */ - public Payload getPayload() { + * {@inheritDoc} + * @see PayloadAttribute + */ + @Override + public BytesRef getPayload() { return this.payload; } - /** - * Sets this Token's payload. + /** + * {@inheritDoc} + * @see PayloadAttribute */ - public void setPayload(Payload payload) { + @Override + public void setPayload(BytesRef payload) { this.payload = payload; } - public String toString() { - StringBuffer sb = new StringBuffer(); - sb.append('('); - initTermBuffer(); - if (termBuffer == null) - sb.append("null"); - else - sb.append(termBuffer, 0, termLength); - sb.append(',').append(startOffset).append(',').append(endOffset); - if (!type.equals("word")) - sb.append(",type=").append(type); - if (positionIncrement != 1) - sb.append(",posIncr=").append(positionIncrement); - sb.append(')'); - return sb.toString(); - } - - /** Resets the term text, payload, flags, and positionIncrement to default. - * Other fields such as startOffset, endOffset and the token type are - * not reset since they are normally overwritten by the tokenizer. */ + /** Resets the term text, payload, flags, positionIncrement, positionLength, + * startOffset, endOffset and token type to default. + */ + @Override public void clear() { - payload = null; - // Leave termBuffer to allow re-use - termLength = 0; - termText = null; - positionIncrement = 1; + super.clear(); flags = 0; - // startOffset = endOffset = 0; - // type = DEFAULT_TYPE; + payload = null; } - public Object clone() { - try { - Token t = (Token)super.clone(); - // Do a deep clone - if (termBuffer != null) { - t.termBuffer = (char[]) termBuffer.clone(); - } - if (payload != null) { - t.setPayload((Payload) payload.clone()); - } - return t; - } catch (CloneNotSupportedException e) { - throw new RuntimeException(e); // shouldn't happen + @Override + public Token clone() { + Token t = (Token)super.clone(); + // Do a deep clone + if (payload != null) { + t.payload = payload.clone(); } - } - - /** Makes a clone, but replaces the term buffer & - * start/end offset in the process. This is more - * efficient than doing a full clone (and then calling - * setTermBuffer) because it saves a wasted copy of the old - * termBuffer. */ - public Token clone(char[] newTermBuffer, int newTermOffset, int newTermLength, int newStartOffset, int newEndOffset) { - final Token t = new Token(newTermBuffer, newTermOffset, newTermLength, newStartOffset, newEndOffset); - t.positionIncrement = positionIncrement; - t.flags = flags; - t.type = type; - if (payload != null) - t.payload = (Payload) payload.clone(); return t; } + @Override public boolean equals(Object obj) { if (obj == this) return true; if (obj instanceof Token) { - Token other = (Token) obj; - - initTermBuffer(); - other.initTermBuffer(); - - if (termLength == other.termLength && - startOffset == other.startOffset && - endOffset == other.endOffset && - flags == other.flags && - positionIncrement == other.positionIncrement && - subEqual(type, other.type) && - subEqual(payload, other.payload)) { - for(int i=0;iToken as implementation for the basic + * attributes and return the default impl (with "Impl" appended) for all other + * attributes. + * @since 3.0 + */ + public static final AttributeFactory TOKEN_ATTRIBUTE_FACTORY = + AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, Token.class); } Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/TokenFilter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/TokenFilter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/TokenFilter.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/TokenFilter.java 16 Dec 2014 11:31:57 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,30 +19,54 @@ import java.io.IOException; -/** A TokenFilter is a TokenStream whose input is another token stream. +/** A TokenFilter is a TokenStream whose input is another TokenStream.

- This is an abstract class. - NOTE: subclasses must override {@link #next(Token)}. It's - also OK to instead override {@link #next()} but that - method is now deprecated in favor of {@link #next(Token)}. + This is an abstract class; subclasses must override {@link #incrementToken()}. + @see TokenStream */ public abstract class TokenFilter extends TokenStream { /** The source of tokens for this filter. */ - protected TokenStream input; + protected final TokenStream input; /** Construct a token stream filtering the given input. */ protected TokenFilter(TokenStream input) { + super(input); this.input = input; } - - /** Close the input TokenStream. */ + + /** + * {@inheritDoc} + *

+ * NOTE: + * The default implementation chains the call to the input TokenStream, so + * be sure to call super.end() first when overriding this method. + */ + @Override + public void end() throws IOException { + input.end(); + } + + /** + * {@inheritDoc} + *

+ * NOTE: + * The default implementation chains the call to the input TokenStream, so + * be sure to call super.close() when overriding this method. + */ + @Override public void close() throws IOException { input.close(); } - /** Reset the filter as well as the input TokenStream. */ + /** + * {@inheritDoc} + *

+ * NOTE: + * The default implementation chains the call to the input TokenStream, so + * be sure to call super.reset() when overriding this method. + */ + @Override public void reset() throws IOException { - super.reset(); input.reset(); } } Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/TokenStream.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/TokenStream.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/TokenStream.java 17 Aug 2012 14:55:07 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/TokenStream.java 16 Dec 2014 11:31:57 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,94 +17,192 @@ * limitations under the License. */ -import org.apache.lucene.index.Payload; - import java.io.IOException; +import java.io.Closeable; +import java.lang.reflect.Modifier; -/** A TokenStream enumerates the sequence of tokens, either from - fields of a document or from query text. -

- This is an abstract class. Concrete subclasses are: -

- NOTE: subclasses must override {@link #next(Token)}. It's - also OK to instead override {@link #next()} but that - method is now deprecated in favor of {@link #next(Token)}. - */ +import org.apache.lucene.analysis.tokenattributes.PackedTokenAttributeImpl; +import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute; +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.IndexWriter; +import org.apache.lucene.util.Attribute; +import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeImpl; +import org.apache.lucene.util.AttributeSource; -public abstract class TokenStream { +/** + * A TokenStream enumerates the sequence of tokens, either from + * {@link Field}s of a {@link Document} or from query text. + *

+ * This is an abstract class; concrete subclasses are: + *

+ * A new TokenStream API has been introduced with Lucene 2.9. This API + * has moved from being {@link Token}-based to {@link Attribute}-based. While + * {@link Token} still exists in 2.9 as a convenience class, the preferred way + * to store the information of a {@link Token} is to use {@link AttributeImpl}s. + *

+ * TokenStream now extends {@link AttributeSource}, which provides + * access to all of the token {@link Attribute}s for the TokenStream. + * Note that only one instance per {@link AttributeImpl} is created and reused + * for every token. This approach reduces object creation and allows local + * caching of references to the {@link AttributeImpl}s. See + * {@link #incrementToken()} for further details. + *

+ * The workflow of the new TokenStream API is as follows: + *

    + *
  1. Instantiation of TokenStream/{@link TokenFilter}s which add/get + * attributes to/from the {@link AttributeSource}. + *
  2. The consumer calls {@link TokenStream#reset()}. + *
  3. The consumer retrieves attributes from the stream and stores local + * references to all attributes it wants to access. + *
  4. The consumer calls {@link #incrementToken()} until it returns false + * consuming the attributes after each call. + *
  5. The consumer calls {@link #end()} so that any end-of-stream operations + * can be performed. + *
  6. The consumer calls {@link #close()} to release any resource when finished + * using the TokenStream. + *
+ * To make sure that filters and consumers know which attributes are available, + * the attributes must be added during instantiation. Filters and consumers are + * not required to check for availability of attributes in + * {@link #incrementToken()}. + *

+ * You can find some example code for the new API in the analysis package level + * Javadoc. + *

+ * Sometimes it is desirable to capture a current state of a TokenStream, + * e.g., for buffering purposes (see {@link CachingTokenFilter}, + * TeeSinkTokenFilter). For this usecase + * {@link AttributeSource#captureState} and {@link AttributeSource#restoreState} + * can be used. + *

The {@code TokenStream}-API in Lucene is based on the decorator pattern. + * Therefore all non-abstract subclasses must be final or have at least a final + * implementation of {@link #incrementToken}! This is checked when Java + * assertions are enabled. + */ +public abstract class TokenStream extends AttributeSource implements Closeable { + + /** Default {@link AttributeFactory} instance that should be used for TokenStreams. */ + public static final AttributeFactory DEFAULT_TOKEN_ATTRIBUTE_FACTORY = + AttributeFactory.getStaticImplementation(AttributeFactory.DEFAULT_ATTRIBUTE_FACTORY, PackedTokenAttributeImpl.class); - /** Returns the next token in the stream, or null at EOS. - * @deprecated The returned Token is a "full private copy" (not - * re-used across calls to next()) but will be slower - * than calling {@link #next(Token)} instead.. */ - public Token next() throws IOException { - final Token reusableToken = new Token(); - Token nextToken = next(reusableToken); - - if (nextToken != null) { - Payload p = nextToken.getPayload(); - if (p != null) { - nextToken.setPayload((Payload) p.clone()); - } + /** + * A TokenStream using the default attribute factory. + */ + protected TokenStream() { + super(DEFAULT_TOKEN_ATTRIBUTE_FACTORY); + assert assertFinal(); + } + + /** + * A TokenStream that uses the same attributes as the supplied one. + */ + protected TokenStream(AttributeSource input) { + super(input); + assert assertFinal(); + } + + /** + * A TokenStream using the supplied AttributeFactory for creating new {@link Attribute} instances. + */ + protected TokenStream(AttributeFactory factory) { + super(factory); + assert assertFinal(); + } + + private boolean assertFinal() { + try { + final Class clazz = getClass(); + if (!clazz.desiredAssertionStatus()) + return true; + assert clazz.isAnonymousClass() || + (clazz.getModifiers() & (Modifier.FINAL | Modifier.PRIVATE)) != 0 || + Modifier.isFinal(clazz.getMethod("incrementToken").getModifiers()) : + "TokenStream implementation classes or at least their incrementToken() implementation must be final"; + return true; + } catch (NoSuchMethodException nsme) { + return false; } - - return nextToken; } - - /** Returns the next token in the stream, or null at EOS. - * When possible, the input Token should be used as the - * returned Token (this gives fastest tokenization - * performance), but this is not required and a new Token - * may be returned. Callers may re-use a single Token - * instance for successive calls to this method. - *

- * This implicitly defines a "contract" between - * consumers (callers of this method) and - * producers (implementations of this method - * that are the source for tokens): - *

- * Also, the producer must make no assumptions about a - * Token after it has been returned: the caller may - * arbitrarily change it. If the producer needs to hold - * onto the token for subsequent calls, it must clone() - * it before storing it. - * Note that a {@link TokenFilter} is considered a consumer. - * @param reusableToken a Token that may or may not be used to - * return; this parameter should never be null (the callee - * is not required to check for null before using it, but it is a - * good idea to assert that it is not null.) - * @return next token in the stream or null if end-of-stream was hit + + /** + * Consumers (i.e., {@link IndexWriter}) use this method to advance the stream to + * the next token. Implementing classes must implement this method and update + * the appropriate {@link AttributeImpl}s with the attributes of the next + * token. + *

+ * The producer must make no assumptions about the attributes after the method + * has been returned: the caller may arbitrarily change it. If the producer + * needs to preserve the state for subsequent calls, it can use + * {@link #captureState} to create a copy of the current attribute state. + *

+ * This method is called for every token of a document, so an efficient + * implementation is crucial for good performance. To avoid calls to + * {@link #addAttribute(Class)} and {@link #getAttribute(Class)}, + * references to all {@link AttributeImpl}s that this stream uses should be + * retrieved during instantiation. + *

+ * To ensure that filters and consumers know which attributes are available, + * the attributes must be added during instantiation. Filters and consumers + * are not required to check for availability of attributes in + * {@link #incrementToken()}. + * + * @return false for end of stream; true otherwise */ - public Token next(final Token reusableToken) throws IOException { - // We don't actually use inputToken, but still add this assert - assert reusableToken != null; - return next(); + public abstract boolean incrementToken() throws IOException; + + /** + * This method is called by the consumer after the last token has been + * consumed, after {@link #incrementToken()} returned false + * (using the new TokenStream API). Streams implementing the old API + * should upgrade to use this feature. + *

+ * This method can be used to perform any end-of-stream operations, such as + * setting the final offset of a stream. The final offset of a stream might + * differ from the offset of the last token eg in case one or more whitespaces + * followed after the last token, but a WhitespaceTokenizer was used. + *

+ * Additionally any skipped positions (such as those removed by a stopfilter) + * can be applied to the position increment, or any adjustment of other + * attributes where the end-of-stream value may be important. + *

+ * If you override this method, always call {@code super.end()}. + * + * @throws IOException If an I/O error occurs + */ + public void end() throws IOException { + clearAttributes(); // LUCENE-3849: don't consume dirty atts + PositionIncrementAttribute posIncAtt = getAttribute(PositionIncrementAttribute.class); + if (posIncAtt != null) { + posIncAtt.setPositionIncrement(0); + } } - /** Resets this stream to the beginning. This is an - * optional operation, so subclasses may or may not - * implement this method. Reset() is not needed for - * the standard indexing process. However, if the Tokens - * of a TokenStream are intended to be consumed more than - * once, it is necessary to implement reset(). Note that - * if your TokenStream caches tokens and feeds them back - * again after a reset, it is imperative that you - * clone the tokens when you store them away (on the - * first pass) as well as when you return them (on future - * passes after reset()). + /** + * This method is called by a consumer before it begins consumption using + * {@link #incrementToken()}. + *

+ * Resets this stream to a clean state. Stateful implementations must implement + * this method so that they can be reused, just as if they had been created fresh. + *

+ * If you override this method, always call {@code super.reset()}, otherwise + * some internal state will not be correctly reset (e.g., {@link Tokenizer} will + * throw {@link IllegalStateException} on further usage). */ public void reset() throws IOException {} - /** Releases resources associated with this stream. */ + /** Releases resources associated with this stream. + *

+ * If you override this method, always call {@code super.close()}, otherwise + * some internal state will not be correctly reset (e.g., {@link Tokenizer} will + * throw {@link IllegalStateException} on reuse). + */ + @Override public void close() throws IOException {} + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/TokenStreamToAutomaton.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/Tokenizer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/Tokenizer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/Tokenizer.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/Tokenizer.java 16 Dec 2014 11:31:57 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.analysis; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,43 +17,104 @@ * limitations under the License. */ +import org.apache.lucene.util.AttributeFactory; +import org.apache.lucene.util.AttributeSource; + import java.io.Reader; import java.io.IOException; /** A Tokenizer is a TokenStream whose input is a Reader.

- This is an abstract class. + This is an abstract class; subclasses must override {@link #incrementToken()}

- NOTE: subclasses must override {@link #next(Token)}. It's - also OK to instead override {@link #next()} but that - method is now deprecated in favor of {@link #next(Token)}. -

- NOTE: subclasses overriding {@link #next(Token)} must - call {@link Token#clear()}. + NOTE: Subclasses overriding {@link #incrementToken()} must + call {@link AttributeSource#clearAttributes()} before + setting attributes. */ - -public abstract class Tokenizer extends TokenStream { +public abstract class Tokenizer extends TokenStream { /** The text source for this Tokenizer. */ - protected Reader input; + protected Reader input = ILLEGAL_STATE_READER; + + /** Pending reader: not actually assigned to input until reset() */ + private Reader inputPending = ILLEGAL_STATE_READER; - /** Construct a tokenizer with null input. */ - protected Tokenizer() {} - /** Construct a token stream processing the given input. */ protected Tokenizer(Reader input) { - this.input = input; + if (input == null) { + throw new NullPointerException("input must not be null"); + } + this.inputPending = input; } + + /** Construct a token stream processing the given input using the given AttributeFactory. */ + protected Tokenizer(AttributeFactory factory, Reader input) { + super(factory); + if (input == null) { + throw new NullPointerException("input must not be null"); + } + this.inputPending = input; + } - /** By default, closes the input Reader. */ + /** + * {@inheritDoc} + *

+ * NOTE: + * The default implementation closes the input Reader, so + * be sure to call super.close() when overriding this method. + */ + @Override public void close() throws IOException { input.close(); + // LUCENE-2387: don't hold onto Reader after close, so + // GC can reclaim + inputPending = input = ILLEGAL_STATE_READER; } + + /** Return the corrected offset. If {@link #input} is a {@link CharFilter} subclass + * this method calls {@link CharFilter#correctOffset}, else returns currentOff. + * @param currentOff offset as seen in the output + * @return corrected offset based on the input + * @see CharFilter#correctOffset + */ + protected final int correctOffset(int currentOff) { + return (input instanceof CharFilter) ? ((CharFilter) input).correctOffset(currentOff) : currentOff; + } - /** Expert: Reset the tokenizer to a new reader. Typically, an - * analyzer (in its reusableTokenStream method) will use + /** Expert: Set a new reader on the Tokenizer. Typically, an + * analyzer (in its tokenStream method) will use * this to re-use a previously created tokenizer. */ - public void reset(Reader input) throws IOException { - this.input = input; + public final void setReader(Reader input) throws IOException { + if (input == null) { + throw new NullPointerException("input must not be null"); + } else if (this.input != ILLEGAL_STATE_READER) { + throw new IllegalStateException("TokenStream contract violation: close() call missing"); + } + this.inputPending = input; + assert setReaderTestPoint(); } + + @Override + public void reset() throws IOException { + super.reset(); + input = inputPending; + inputPending = ILLEGAL_STATE_READER; + } + + // only used by assert, for testing + boolean setReaderTestPoint() { + return true; + } + + private static final Reader ILLEGAL_STATE_READER = new Reader() { + @Override + public int read(char[] cbuf, int off, int len) { + throw new IllegalStateException("TokenStream contract violation: reset()/close() call missing, " + + "reset() called multiple times, or subclass does not call super.reset(). " + + "Please see Javadocs of TokenStream class for more information about the correct consuming workflow."); + } + + @Override + public void close() {} + }; } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/WhitespaceAnalyzer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/WhitespaceTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/analysis/WordlistLoader.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/analysis/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/analysis/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/analysis/package.html 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/analysis/package.html 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -18,13 +18,12 @@ -

API and code to convert text into indexable/searchable tokens. Covers {@link org.apache.lucene.analysis.Analyzer} and related classes.

Parsing? Tokenization? Analysis!

-Lucene, indexing and search library, accepts only plain text input. +Lucene, an indexing and search library, accepts only plain text input.

Parsing

@@ -34,20 +33,29 @@

Tokenization

-Plain text passed to Lucene for indexing goes through a process generally called tokenization – namely breaking of the -input text into small indexing elements – -{@link org.apache.lucene.analysis.Token Tokens}. -The way input text is broken into tokens very -much dictates further capabilities of search upon that text. +Plain text passed to Lucene for indexing goes through a process generally called tokenization. Tokenization is the process +of breaking input text into small indexing elements – tokens. +The way input text is broken into tokens heavily influences how people will then be able to search for that text. For instance, sentences beginnings and endings can be identified to provide for more accurate phrase and proximity searches (though sentence identification is not provided by Lucene).

-In some cases simply breaking the input text into tokens is not enough – a deeper Analysis is needed, -providing for several functions, including (but not limited to): + In some cases simply breaking the input text into tokens is not enough + – a deeper Analysis may be needed. Lucene includes both + pre- and post-tokenization analysis facilities. +

+

+ Pre-tokenization analysis can include (but is not limited to) stripping + HTML markup, and transforming or removing text matching arbitrary patterns + or sets of fixed strings. +

+

+ There are many post-tokenization steps that can be done, including + (but not limited to): +

- */ - public Field(String name, String value, Store store, Index index, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (value == null) - throw new NullPointerException("value cannot be null"); - if (name.length() == 0 && value.length() == 0) - throw new IllegalArgumentException("name and value cannot both be empty"); - if (index == Index.NO && store == Store.NO) - throw new IllegalArgumentException("it doesn't make sense to have a field that " - + "is neither indexed nor stored"); - if (index == Index.NO && termVector != TermVector.NO) - throw new IllegalArgumentException("cannot store term vector information " - + "for a field that is not indexed"); - - this.name = name.intern(); // field names are interned - this.fieldsData = value; - - if (store == Store.YES){ - this.isStored = true; - this.isCompressed = false; - } - else if (store == Store.COMPRESS) { - this.isStored = true; - this.isCompressed = true; - } - else if (store == Store.NO){ - this.isStored = false; - this.isCompressed = false; - } - else - throw new IllegalArgumentException("unknown store parameter " + store); - - if (index == Index.NO) { - this.isIndexed = false; - this.isTokenized = false; - } else if (index == Index.ANALYZED) { - this.isIndexed = true; - this.isTokenized = true; - } else if (index == Index.NOT_ANALYZED) { - this.isIndexed = true; - this.isTokenized = false; - } else if (index == Index.NOT_ANALYZED_NO_NORMS) { - this.isIndexed = true; - this.isTokenized = false; - this.omitNorms = true; - } else if (index == Index.ANALYZED_NO_NORMS) { - this.isIndexed = true; - this.isTokenized = true; - this.omitNorms = true; - } else { - throw new IllegalArgumentException("unknown index parameter " + index); - } - - this.isBinary = false; - - setStoreTermVector(termVector); + * + * @deprecated Use {@link StringField}, {@link TextField} instead. */ + @Deprecated + public Field(String name, String value, Store store, Index index, TermVector termVector) { + this(name, value, translateFieldType(store, index, termVector)); } /** * Create a tokenized and indexed field that is not stored. Term vectors will * not be stored. The Reader is read only when the Document is added to the index, - * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)} + * i.e. you may not close the Reader until {@link IndexWriter#addDocument} * has been called. * * @param name The name of the field * @param reader The reader with the content * @throws NullPointerException if name or reader is null + * + * @deprecated Use {@link TextField} instead. */ + @Deprecated public Field(String name, Reader reader) { this(name, reader, TermVector.NO); } /** * Create a tokenized and indexed field that is not stored, optionally with * storing term vectors. The Reader is read only when the Document is added to the index, - * i.e. you may not close the Reader until {@link IndexWriter#addDocument(Document)} + * i.e. you may not close the Reader until {@link IndexWriter#addDocument} * has been called. * * @param name The name of the field * @param reader The reader with the content * @param termVector Whether term vector should be stored * @throws NullPointerException if name or reader is null + * + * @deprecated Use {@link TextField} instead. */ + @Deprecated public Field(String name, Reader reader, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (reader == null) - throw new NullPointerException("reader cannot be null"); - - this.name = name.intern(); // field names are interned - this.fieldsData = reader; - - this.isStored = false; - this.isCompressed = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + this(name, reader, translateFieldType(Store.NO, Index.ANALYZED, termVector)); } /** * Create a tokenized and indexed field that is not stored. Term vectors will * not be stored. This is useful for pre-analyzed fields. * The TokenStream is read only when the Document is added to the index, - * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} + * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument} * has been called. * * @param name The name of the field * @param tokenStream The TokenStream with the content * @throws NullPointerException if name or tokenStream is null + * + * @deprecated Use {@link TextField} instead */ + @Deprecated public Field(String name, TokenStream tokenStream) { this(name, tokenStream, TermVector.NO); } - + /** * Create a tokenized and indexed field that is not stored, optionally with * storing term vectors. This is useful for pre-analyzed fields. * The TokenStream is read only when the Document is added to the index, - * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument(Document)} + * i.e. you may not close the TokenStream until {@link IndexWriter#addDocument} * has been called. * * @param name The name of the field * @param tokenStream The TokenStream with the content * @param termVector Whether term vector should be stored * @throws NullPointerException if name or tokenStream is null + * + * @deprecated Use {@link TextField} instead */ + @Deprecated public Field(String name, TokenStream tokenStream, TermVector termVector) { - if (name == null) - throw new NullPointerException("name cannot be null"); - if (tokenStream == null) - throw new NullPointerException("tokenStream cannot be null"); - - this.name = name.intern(); // field names are interned - this.fieldsData = tokenStream; - - this.isStored = false; - this.isCompressed = false; - - this.isIndexed = true; - this.isTokenized = true; - - this.isBinary = false; - - setStoreTermVector(termVector); + this(name, tokenStream, translateFieldType(Store.NO, Index.ANALYZED, termVector)); } - /** * Create a stored field with binary value. Optionally the value may be compressed. * * @param name The name of the field * @param value The binary value - * @param store How value should be stored (compressed or not) - * @throws IllegalArgumentException if store is Store.NO + * + * @deprecated Use {@link StoredField} instead. */ - public Field(String name, byte[] value, Store store) { - this(name, value, 0, value.length, store); + @Deprecated + public Field(String name, byte[] value) { + this(name, value, translateFieldType(Store.YES, Index.NO, TermVector.NO)); } /** @@ -438,39 +1029,11 @@ * @param value The binary value * @param offset Starting offset in value where this Field's bytes are * @param length Number of bytes to use for this Field, starting at offset - * @param store How value should be stored (compressed or not) - * @throws IllegalArgumentException if store is Store.NO + * + * @deprecated Use {@link StoredField} instead. */ - public Field(String name, byte[] value, int offset, int length, Store store) { - - if (name == null) - throw new IllegalArgumentException("name cannot be null"); - if (value == null) - throw new IllegalArgumentException("value cannot be null"); - - this.name = name.intern(); - fieldsData = value; - - if (store == Store.YES) { - isStored = true; - isCompressed = false; - } - else if (store == Store.COMPRESS) { - isStored = true; - isCompressed = true; - } - else if (store == Store.NO) - throw new IllegalArgumentException("binary values can't be unstored"); - else - throw new IllegalArgumentException("unknown store parameter " + store); - - isIndexed = false; - isTokenized = false; - - isBinary = true; - binaryLength = length; - binaryOffset = offset; - - setStoreTermVector(TermVector.NO); + @Deprecated + public Field(String name, byte[] value, int offset, int length) { + this(name, value, offset, length, translateFieldType(Store.YES, Index.NO, TermVector.NO)); } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/FieldSelector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/FieldSelectorResult.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/FieldType.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/Fieldable.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/FloatDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/FloatField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/IntDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/IntField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/LoadFirstFieldSelector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/LongDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/LongField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/MapFieldSelector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/NumberTools.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/NumericDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/PackedLongDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/SetBasedFieldSelector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/ShortDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/SortedBytesDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/SortedDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/SortedNumericDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/SortedSetDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/StoredField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/StraightBytesDocValuesField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/StringField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/document/TextField.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/document/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/document/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/document/package.html 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/document/package.html 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -22,33 +22,26 @@

The logical representation of a {@link org.apache.lucene.document.Document} for indexing and searching.

The document package provides the user level logical representation of content to be indexed and searched. The -package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.document.Fieldable}s.

-

Document and Fieldable

-

A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.document.Fieldable}s. A - {@link org.apache.lucene.document.Fieldable} is a logical representation of a user's content that needs to be indexed or stored. - {@link org.apache.lucene.document.Fieldable}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, - stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.document.Fieldable} +package also provides utilities for working with {@link org.apache.lucene.document.Document}s and {@link org.apache.lucene.index.IndexableField}s.

+

Document and IndexableField

+

A {@link org.apache.lucene.document.Document} is a collection of {@link org.apache.lucene.index.IndexableField}s. A + {@link org.apache.lucene.index.IndexableField} is a logical representation of a user's content that needs to be indexed or stored. + {@link org.apache.lucene.index.IndexableField}s have a number of properties that tell Lucene how to treat the content (like indexed, tokenized, + stored, etc.) See the {@link org.apache.lucene.document.Field} implementation of {@link org.apache.lucene.index.IndexableField} for specifics on these properties.

Note: it is common to refer to {@link org.apache.lucene.document.Document}s having {@link org.apache.lucene.document.Field}s, even though technically they have -{@link org.apache.lucene.document.Fieldable}s.

+{@link org.apache.lucene.index.IndexableField}s.

Working with Documents

First and foremost, a {@link org.apache.lucene.document.Document} is something created by the user application. It is your job to create Documents based on the content of the files you are working with in your application (Word, txt, PDF, Excel or any other format.) How this is done is completely up to you. That being said, there are many tools available in other projects that can make - the process of taking a file and converting it into a Lucene {@link org.apache.lucene.document.Document}. To see an example of this, - take a look at the Lucene demo and the associated source code - for extracting content from HTML. + the process of taking a file and converting it into a Lucene {@link org.apache.lucene.document.Document}.

-

The {@link org.apache.lucene.document.DateTools} and {@link org.apache.lucene.document.NumberTools} classes are utility -classes to make dates, times and longs searchable (remember, Lucene only searches text).

-

The {@link org.apache.lucene.document.FieldSelector} class provides a mechanism to tell Lucene how to load Documents from -storage. If no FieldSelector is used, all Fieldables on a Document will be loaded. As an example of the FieldSelector usage, consider - the common use case of -displaying search results on a web page and then having users click through to see the full document. In this scenario, it is often - the case that there are many small fields and one or two large fields (containing the contents of the original file). Before the FieldSelector, -the full Document had to be loaded, including the large fields, in order to display the results. Now, using the FieldSelector, one -can {@link org.apache.lucene.document.FieldSelectorResult#LAZY_LOAD} the large fields, thus only loading the large fields -when a user clicks on the actual link to view the original content.

+

The {@link org.apache.lucene.document.DateTools} is a utility class to make dates and times searchable +(remember, Lucene only searches text). {@link org.apache.lucene.document.IntField}, {@link org.apache.lucene.document.LongField}, +{@link org.apache.lucene.document.FloatField} and {@link org.apache.lucene.document.DoubleField} are a special helper class +to simplify indexing of numeric values (and also dates) for fast range range queries with {@link org.apache.lucene.search.NumericRangeQuery} +(using a special sortable string representation of numeric values).

Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/AtomicReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/AtomicReaderContext.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/AutomatonTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BaseCompositeReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BinaryDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BinaryDocValuesFieldUpdates.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BinaryDocValuesWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BitsSlice.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BufferedDeletes.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BufferedUpdates.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/BufferedUpdatesStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ByteBlockPool.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceReader.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceReader.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceReader.java 17 Aug 2012 14:54:58 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceReader.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,16 +17,18 @@ * limitations under the License. */ -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; import java.io.IOException; +import org.apache.lucene.store.DataInput; +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.ByteBlockPool; + /* IndexInput that knows how to read the byte slices written * by Posting and PostingVector. We read the bytes in * each slice until we hit the end of that slice at which * point we read the forwarding address of the next slice * and then jump to it.*/ -final class ByteSliceReader extends IndexInput { +final class ByteSliceReader extends DataInput { ByteBlockPool pool; int bufferUpto; byte[] buffer; @@ -47,16 +49,16 @@ this.endIndex = endIndex; level = 0; - bufferUpto = startIndex / DocumentsWriter.BYTE_BLOCK_SIZE; - bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE; + bufferUpto = startIndex / ByteBlockPool.BYTE_BLOCK_SIZE; + bufferOffset = bufferUpto * ByteBlockPool.BYTE_BLOCK_SIZE; buffer = pool.buffers[bufferUpto]; - upto = startIndex & DocumentsWriter.BYTE_BLOCK_MASK; + upto = startIndex & ByteBlockPool.BYTE_BLOCK_MASK; - final int firstSize = ByteBlockPool.levelSizeArray[0]; + final int firstSize = ByteBlockPool.LEVEL_SIZE_ARRAY[0]; if (startIndex+firstSize >= endIndex) { // There is only this one slice to read - limit = endIndex & DocumentsWriter.BYTE_BLOCK_MASK; + limit = endIndex & ByteBlockPool.BYTE_BLOCK_MASK; } else limit = upto+firstSize-4; } @@ -66,6 +68,7 @@ return upto + bufferOffset == endIndex; } + @Override public byte readByte() { assert !eof(); assert upto <= limit; @@ -74,7 +77,7 @@ return buffer[upto++]; } - public long writeTo(IndexOutput out) throws IOException { + public long writeTo(DataOutput out) throws IOException { long size = 0; while(true) { if (limit + bufferOffset == endIndex) { @@ -97,14 +100,14 @@ // Skip to our next slice final int nextIndex = ((buffer[limit]&0xff)<<24) + ((buffer[1+limit]&0xff)<<16) + ((buffer[2+limit]&0xff)<<8) + (buffer[3+limit]&0xff); - level = ByteBlockPool.nextLevelArray[level]; - final int newSize = ByteBlockPool.levelSizeArray[level]; + level = ByteBlockPool.NEXT_LEVEL_ARRAY[level]; + final int newSize = ByteBlockPool.LEVEL_SIZE_ARRAY[level]; - bufferUpto = nextIndex / DocumentsWriter.BYTE_BLOCK_SIZE; - bufferOffset = bufferUpto * DocumentsWriter.BYTE_BLOCK_SIZE; + bufferUpto = nextIndex / ByteBlockPool.BYTE_BLOCK_SIZE; + bufferOffset = bufferUpto * ByteBlockPool.BYTE_BLOCK_SIZE; buffer = pool.buffers[bufferUpto]; - upto = nextIndex & DocumentsWriter.BYTE_BLOCK_MASK; + upto = nextIndex & ByteBlockPool.BYTE_BLOCK_MASK; if (nextIndex + newSize >= endIndex) { // We are advancing to the final slice @@ -117,6 +120,7 @@ } } + @Override public void readBytes(byte[] b, int offset, int len) { while(len > 0) { final int numLeft = limit-upto; @@ -134,10 +138,4 @@ } } } - - public long getFilePointer() {throw new RuntimeException("not implemented");} - public long length() {throw new RuntimeException("not implemented");} - public void seek(long pos) {throw new RuntimeException("not implemented");} - public void close() {throw new RuntimeException("not implemented");} -} - +} \ No newline at end of file Index: 3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceWriter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceWriter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceWriter.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/ByteSliceWriter.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,9 @@ package org.apache.lucene.index; -/** +import org.apache.lucene.store.DataOutput; +import org.apache.lucene.util.ByteBlockPool; + +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -24,7 +27,7 @@ * posting list for many terms in RAM. */ -final class ByteSliceWriter { +final class ByteSliceWriter extends DataOutput { private byte[] slice; private int upto; @@ -38,16 +41,17 @@ /** * Set up the writer to write at address. - */ + */ public void init(int address) { - slice = pool.buffers[address >> DocumentsWriter.BYTE_BLOCK_SHIFT]; + slice = pool.buffers[address >> ByteBlockPool.BYTE_BLOCK_SHIFT]; assert slice != null; - upto = address & DocumentsWriter.BYTE_BLOCK_MASK; + upto = address & ByteBlockPool.BYTE_BLOCK_MASK; offset0 = address; assert upto < slice.length; } /** Write byte into byte slice stream */ + @Override public void writeByte(byte b) { assert slice != null; if (slice[upto] != 0) { @@ -60,6 +64,7 @@ assert upto != slice.length; } + @Override public void writeBytes(final byte[] b, int offset, final int len) { final int offsetEnd = offset + len; while(offset < offsetEnd) { @@ -76,14 +81,6 @@ } public int getAddress() { - return upto + (offset0 & DocumentsWriter.BYTE_BLOCK_NOT_MASK); + return upto + (offset0 & DocumentsWriterPerThread.BYTE_BLOCK_NOT_MASK); } - - public void writeVInt(int i) { - while ((i & ~0x7F) != 0) { - writeByte((byte)((i & 0x7f) | 0x80)); - i >>>= 7; - } - writeByte((byte) i); - } -} +} \ No newline at end of file Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/CharBlockPool.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/CheckIndex.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/CheckIndex.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/CheckIndex.java 17 Aug 2012 14:55:01 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/CheckIndex.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,19 +17,39 @@ * limitations under the License. */ -import org.apache.lucene.store.FSDirectory; +import java.io.File; +import java.io.IOException; +import java.io.PrintStream; +import java.text.NumberFormat; +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.PostingsFormat; +import org.apache.lucene.codecs.blocktree.FieldReader; +import org.apache.lucene.codecs.blocktree.Stats; +import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.CheckIndex.Status.DocValuesStatus; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; -import org.apache.lucene.document.Document; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.CommandLineUtil; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongBitSet; +import org.apache.lucene.util.Version; -import java.text.NumberFormat; -import java.io.PrintStream; -import java.io.IOException; -import java.util.Collection; -import java.util.Iterator; -import java.util.List; -import java.util.ArrayList; -import org.apache.lucene.document.Fieldable; // for javadoc /** * Basic tool and API to check the health of an index and @@ -39,30 +59,25 @@ *

As this tool checks every byte in the index, on a large * index it can take quite a long time to run. * - *

WARNING: this tool and API is new and - * experimental and is subject to suddenly change in the - * next release. Please make a complete backup of your + * @lucene.experimental Please make a complete backup of your * index before using this to fix your index! */ public class CheckIndex { - /** Default PrintStream for all CheckIndex instances. - * @deprecated Use {@link #setInfoStream} per instance, - * instead. */ - public static PrintStream out = null; - private PrintStream infoStream; private Directory dir; /** * Returned from {@link #checkIndex()} detailing the health and status of the index. * - *

WARNING: this API is new and experimental and is - * subject to suddenly change in the next release. + * @lucene.experimental **/ public static class Status { + Status() { + } + /** True if no problems were found with the index. */ public boolean clean; @@ -81,25 +96,24 @@ /** Number of segments in the index. */ public int numSegments; - /** String description of the version of the index. */ - public String segmentFormat; - /** Empty unless you passed specific segments list to check as optional 3rd argument. * @see CheckIndex#checkIndex(List) */ - public List/**/ segmentsChecked = new ArrayList(); + public List segmentsChecked = new ArrayList<>(); /** True if the index was created with a newer version of Lucene than the CheckIndex tool. */ public boolean toolOutOfDate; /** List of {@link SegmentInfoStatus} instances, detailing status of each segment. */ - public List/* segmentInfos = new ArrayList<>(); /** Directory index is in. */ public Directory dir; - /** SegmentInfos instance containing only segments that - * had no problems (this is used with the {@link - * CheckIndex#fix} method to repair the index. */ + /** + * SegmentInfos instance containing only segments that + * had no problems (this is used with the {@link CheckIndex#fixIndex} + * method to repair the index. + */ SegmentInfos newSegments; /** How many documents will be lost to bad segments. */ @@ -113,16 +127,31 @@ * argument). */ public boolean partial; + /** The greatest segment name. */ + public int maxSegmentName; + + /** Whether the SegmentInfos.counter is greater than any of the segments' names. */ + public boolean validCounter; + + /** Holds the userData of the last commit in the index */ + public Map userData; + /** Holds the status of each segment in the index. * See {@link #segmentInfos}. * - *

WARNING: this API is new and experimental and is - * subject to suddenly change in the next release. + * @lucene.experimental */ public static class SegmentInfoStatus { + + SegmentInfoStatus() { + } + /** Name of the segment. */ public String name; + /** Codec used to read this segment. */ + public Codec codec; + /** Document count (does not take deletions into account). */ public int docCount; @@ -152,76 +181,203 @@ /** True if this segment has pending deletions. */ public boolean hasDeletions; - /** Name of the current deletions file name. */ - public String deletionsFileName; + /** Current deletions generation. */ + public long deletionsGen; /** Number of deleted documents. */ public int numDeleted; - /** True if we were able to open a SegmentReader on this + /** True if we were able to open an AtomicReader on this * segment. */ public boolean openReaderPassed; /** Number of fields in this segment. */ int numFields; - /** True if at least one of the fields in this segment - * does not omitTf. - * @see Fieldable#setOmitTf */ - public boolean hasProx; + /** Map that includes certain + * debugging details that IndexWriter records into + * each segment it creates */ + public Map diagnostics; + + /** Status for testing of field norms (null if field norms could not be tested). */ + public FieldNormStatus fieldNormStatus; + + /** Status for testing of indexed terms (null if indexed terms could not be tested). */ + public TermIndexStatus termIndexStatus; + + /** Status for testing of stored fields (null if stored fields could not be tested). */ + public StoredFieldStatus storedFieldStatus; + + /** Status for testing of term vectors (null if term vectors could not be tested). */ + public TermVectorStatus termVectorStatus; + + /** Status for testing of DocValues (null if DocValues could not be tested). */ + public DocValuesStatus docValuesStatus; } + + /** + * Status from testing field norms. + */ + public static final class FieldNormStatus { + private FieldNormStatus() { + } + + /** Number of fields successfully tested */ + public long totFields = 0L; + + /** Exception thrown during term index test (null on success) */ + public Throwable error = null; + } + + /** + * Status from testing term index. + */ + public static final class TermIndexStatus { + + TermIndexStatus() { + } + + /** Number of terms with at least one live doc. */ + public long termCount = 0L; + + /** Number of terms with zero live docs docs. */ + public long delTermCount = 0L; + + /** Total frequency across all terms. */ + public long totFreq = 0L; + + /** Total number of positions. */ + public long totPos = 0L; + + /** Exception thrown during term index test (null on success) */ + public Throwable error = null; + + /** Holds details of block allocations in the block + * tree terms dictionary (this is only set if the + * {@link PostingsFormat} for this segment uses block + * tree. */ + public Map blockTreeStats = null; + } + + /** + * Status from testing stored fields. + */ + public static final class StoredFieldStatus { + + StoredFieldStatus() { + } + + /** Number of documents tested. */ + public int docCount = 0; + + /** Total number of stored fields tested. */ + public long totFields = 0; + + /** Exception thrown during stored fields test (null on success) */ + public Throwable error = null; + } + + /** + * Status from testing stored fields. + */ + public static final class TermVectorStatus { + + TermVectorStatus() { + } + + /** Number of documents tested. */ + public int docCount = 0; + + /** Total number of term vectors tested. */ + public long totVectors = 0; + + /** Exception thrown during term vector test (null on success) */ + public Throwable error = null; + } + + /** + * Status from testing DocValues + */ + public static final class DocValuesStatus { + + DocValuesStatus() { + } + + /** Total number of docValues tested. */ + public long totalValueFields; + + /** Total number of numeric fields */ + public long totalNumericFields; + + /** Total number of binary fields */ + public long totalBinaryFields; + + /** Total number of sorted fields */ + public long totalSortedFields; + + /** Total number of sortednumeric fields */ + public long totalSortedNumericFields; + + /** Total number of sortedset fields */ + public long totalSortedSetFields; + + /** Exception thrown during doc values test (null on success) */ + public Throwable error = null; + } } /** Create a new CheckIndex on the directory. */ public CheckIndex(Directory dir) { this.dir = dir; - infoStream = out; + infoStream = null; } - /** Set infoStream where messages should go. If null, no - * messages are printed */ - public void setInfoStream(PrintStream out) { - infoStream = out; + private boolean crossCheckTermVectors; + + /** If true, term vectors are compared against postings to + * make sure they are the same. This will likely + * drastically increase time it takes to run CheckIndex! */ + public void setCrossCheckTermVectors(boolean v) { + crossCheckTermVectors = v; } - private void msg(String msg) { - if (infoStream != null) - infoStream.println(msg); + /** See {@link #setCrossCheckTermVectors}. */ + public boolean getCrossCheckTermVectors() { + return crossCheckTermVectors; } - private static class MySegmentTermDocs extends SegmentTermDocs { + private boolean failFast; - int delCount; + /** If true, just throw the original exception immediately when + * corruption is detected, rather than continuing to iterate to other + * segments looking for more corruption. */ + public void setFailFast(boolean v) { + failFast = v; + } - MySegmentTermDocs(SegmentReader p) { - super(p); - } + /** See {@link #setFailFast}. */ + public boolean getFailFast() { + return failFast; + } - public void seek(Term term) throws IOException { - super.seek(term); - delCount = 0; - } + private boolean verbose; - protected void skippingDoc() throws IOException { - delCount++; - } + /** Set infoStream where messages should go. If null, no + * messages are printed. If verbose is true then more + * details are printed. */ + public void setInfoStream(PrintStream out, boolean verbose) { + infoStream = out; + this.verbose = verbose; } - /** Returns true if index is clean, else false. - * @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex()} instead */ - public static boolean check(Directory dir, boolean doFix) throws IOException { - return check(dir, doFix, null); + /** Set infoStream where messages should go. See {@link #setInfoStream(PrintStream,boolean)}. */ + public void setInfoStream(PrintStream out) { + setInfoStream(out, false); } - /** Returns true if index is clean, else false. - * @deprecated Please instantiate a CheckIndex and then use {@link #checkIndex(List)} instead */ - public static boolean check(Directory dir, boolean doFix, List onlySegments) throws IOException { - CheckIndex checker = new CheckIndex(dir); - Status status = checker.checkIndex(onlySegments); - if (doFix && !status.clean) - checker.fixIndex(status); - - return status.clean; + private static void msg(PrintStream out, String msg) { + if (out != null) + out.println(msg); } /** Returns a {@link Status} instance detailing @@ -236,7 +392,7 @@ public Status checkIndex() throws IOException { return checkIndex(null); } - + /** Returns a {@link Status} instance detailing * the state of the index. * @@ -249,28 +405,54 @@ *

WARNING: make sure * you only call this when the index is not opened by any * writer. */ - public Status checkIndex(List onlySegments) throws IOException { - NumberFormat nf = NumberFormat.getInstance(); + public Status checkIndex(List onlySegments) throws IOException { + NumberFormat nf = NumberFormat.getInstance(Locale.ROOT); SegmentInfos sis = new SegmentInfos(); Status result = new Status(); result.dir = dir; try { sis.read(dir); } catch (Throwable t) { - msg("ERROR: could not read any segments file in directory"); + if (failFast) { + IOUtils.reThrow(t); + } + msg(infoStream, "ERROR: could not read any segments file in directory"); result.missingSegments = true; if (infoStream != null) t.printStackTrace(infoStream); return result; } + // find the oldest and newest segment versions + Version oldest = null; + Version newest = null; + String oldSegs = null; + for (SegmentCommitInfo si : sis) { + Version version = si.info.getVersion(); + if (version == null) { + // pre-3.1 segment + oldSegs = "pre-3.1"; + } else { + if (oldest == null || version.onOrAfter(oldest) == false) { + oldest = version; + } + if (newest == null || version.onOrAfter(newest)) { + newest = version; + } + } + } + final int numSegments = sis.size(); - final String segmentsFileName = sis.getCurrentSegmentFileName(); + final String segmentsFileName = sis.getSegmentsFileName(); + // note: we only read the format byte (required preamble) here! IndexInput input = null; try { - input = dir.openInput(segmentsFileName); + input = dir.openInput(segmentsFileName, IOContext.READONCE); } catch (Throwable t) { - msg("ERROR: could not open segments file in directory"); + if (failFast) { + IOUtils.reThrow(t); + } + msg(infoStream, "ERROR: could not open segments file in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.cantOpenSegments = true; @@ -280,7 +462,10 @@ try { format = input.readInt(); } catch (Throwable t) { - msg("ERROR: could not read segment file version in directory"); + if (failFast) { + IOUtils.reThrow(t); + } + msg(infoStream, "ERROR: could not read segment file version in directory"); if (infoStream != null) t.printStackTrace(infoStream); result.missingSegmentVersion = true; @@ -293,227 +478,217 @@ String sFormat = ""; boolean skip = false; - if (format == SegmentInfos.FORMAT) - sFormat = "FORMAT [Lucene Pre-2.1]"; - if (format == SegmentInfos.FORMAT_LOCKLESS) - sFormat = "FORMAT_LOCKLESS [Lucene 2.1]"; - else if (format == SegmentInfos.FORMAT_SINGLE_NORM_FILE) - sFormat = "FORMAT_SINGLE_NORM_FILE [Lucene 2.2]"; - else if (format == SegmentInfos.FORMAT_SHARED_DOC_STORE) - sFormat = "FORMAT_SHARED_DOC_STORE [Lucene 2.3]"; - else { - if (format == SegmentInfos.FORMAT_CHECKSUM) - sFormat = "FORMAT_CHECKSUM [Lucene 2.4]"; - else if (format == SegmentInfos.FORMAT_DEL_COUNT) - sFormat = "FORMAT_DEL_COUNT [Lucene 2.4]"; - else if (format == SegmentInfos.FORMAT_HAS_PROX) - sFormat = "FORMAT_HAS_PROX [Lucene 2.4]"; - else if (format < SegmentInfos.CURRENT_FORMAT) { - sFormat = "int=" + format + " [newer version of Lucene than this tool]"; - skip = true; + result.segmentsFileName = segmentsFileName; + result.numSegments = numSegments; + result.userData = sis.getUserData(); + String userDataString; + if (sis.getUserData().size() > 0) { + userDataString = " userData=" + sis.getUserData(); + } else { + userDataString = ""; + } + + String versionString = ""; + if (oldSegs != null) { + if (newest != null) { + versionString = "versions=[" + oldSegs + " .. " + newest + "]"; } else { - sFormat = format + " [Lucene 1.3 or prior]"; + versionString = "version=" + oldSegs; } + } else if (newest != null) { // implies oldest != null + versionString = oldest.equals(newest) ? ( "version=" + oldest ) : ("versions=[" + oldest + " .. " + newest + "]"); } - msg("Segments file=" + segmentsFileName + " numSegments=" + numSegments + " version=" + sFormat); - result.segmentsFileName = segmentsFileName; - result.numSegments = numSegments; - result.segmentFormat = sFormat; + msg(infoStream, "Segments file=" + segmentsFileName + " numSegments=" + numSegments + + " " + versionString + " format=" + sFormat + userDataString); if (onlySegments != null) { result.partial = true; - if (infoStream != null) + if (infoStream != null) { infoStream.print("\nChecking only these segments:"); - Iterator it = onlySegments.iterator(); - while (it.hasNext()) { - if (infoStream != null) - infoStream.print(" " + it.next()); + for (String s : onlySegments) { + infoStream.print(" " + s); + } } result.segmentsChecked.addAll(onlySegments); - msg(":"); + msg(infoStream, ":"); } if (skip) { - msg("\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); + msg(infoStream, "\nERROR: this index appears to be created by a newer version of Lucene than this tool was compiled on; please re-compile this tool on the matching version of Lucene; exiting"); result.toolOutOfDate = true; return result; } - result.newSegments = (SegmentInfos) sis.clone(); + result.newSegments = sis.clone(); result.newSegments.clear(); + result.maxSegmentName = -1; for(int i=0;i result.maxSegmentName) { + result.maxSegmentName = segmentName; + } + if (onlySegments != null && !onlySegments.contains(info.info.name)) { continue; + } Status.SegmentInfoStatus segInfoStat = new Status.SegmentInfoStatus(); result.segmentInfos.add(segInfoStat); - msg(" " + (1+i) + " of " + numSegments + ": name=" + info.name + " docCount=" + info.docCount); - segInfoStat.name = info.name; - segInfoStat.docCount = info.docCount; + msg(infoStream, " " + (1+i) + " of " + numSegments + ": name=" + info.info.name + " docCount=" + info.info.getDocCount()); + segInfoStat.name = info.info.name; + segInfoStat.docCount = info.info.getDocCount(); + + final Version version = info.info.getVersion(); + if (info.info.getDocCount() <= 0 && version != null && version.onOrAfter(Version.LUCENE_4_5_0)) { + throw new RuntimeException("illegal number of documents: maxDoc=" + info.info.getDocCount()); + } - int toLoseDocCount = info.docCount; + int toLoseDocCount = info.info.getDocCount(); - SegmentReader reader = null; + AtomicReader reader = null; try { - msg(" compound=" + info.getUseCompoundFile()); - segInfoStat.compound = info.getUseCompoundFile(); - msg(" hasProx=" + info.getHasProx()); - segInfoStat.hasProx = info.getHasProx(); - msg(" numFiles=" + info.files().size()); + msg(infoStream, " version=" + (version == null ? "3.0" : version)); + final Codec codec = info.info.getCodec(); + msg(infoStream, " codec=" + codec); + segInfoStat.codec = codec; + msg(infoStream, " compound=" + info.info.getUseCompoundFile()); + segInfoStat.compound = info.info.getUseCompoundFile(); + msg(infoStream, " numFiles=" + info.files().size()); segInfoStat.numFiles = info.files().size(); - msg(" size (MB)=" + nf.format(info.sizeInBytes()/(1024.*1024.))); segInfoStat.sizeMB = info.sizeInBytes()/(1024.*1024.); - - - final int docStoreOffset = info.getDocStoreOffset(); - if (docStoreOffset != -1) { - msg(" docStoreOffset=" + docStoreOffset); - segInfoStat.docStoreOffset = docStoreOffset; - msg(" docStoreSegment=" + info.getDocStoreSegment()); - segInfoStat.docStoreSegment = info.getDocStoreSegment(); - msg(" docStoreIsCompoundFile=" + info.getDocStoreIsCompoundFile()); - segInfoStat.docStoreCompoundFile = info.getDocStoreIsCompoundFile(); + if (info.info.getAttribute(Lucene3xSegmentInfoFormat.DS_OFFSET_KEY) == null) { + // don't print size in bytes if its a 3.0 segment with shared docstores + msg(infoStream, " size (MB)=" + nf.format(segInfoStat.sizeMB)); } - final String delFileName = info.getDelFileName(); - if (delFileName == null){ - msg(" no deletions"); + Map diagnostics = info.info.getDiagnostics(); + segInfoStat.diagnostics = diagnostics; + if (diagnostics.size() > 0) { + msg(infoStream, " diagnostics = " + diagnostics); + } + + if (!info.hasDeletions()) { + msg(infoStream, " no deletions"); segInfoStat.hasDeletions = false; } else{ - msg(" has deletions [delFileName=" + delFileName + "]"); + msg(infoStream, " has deletions [delGen=" + info.getDelGen() + "]"); segInfoStat.hasDeletions = true; - segInfoStat.deletionsFileName = delFileName; + segInfoStat.deletionsGen = info.getDelGen(); } if (infoStream != null) infoStream.print(" test: open reader........."); - reader = SegmentReader.get(info); + reader = new SegmentReader(info, DirectoryReader.DEFAULT_TERMS_INDEX_DIVISOR, IOContext.DEFAULT); + msg(infoStream, "OK"); + + segInfoStat.openReaderPassed = true; + + if (infoStream != null) + infoStream.print(" test: check integrity....."); + reader.checkIntegrity(); + msg(infoStream, "OK"); + + if (infoStream != null) + infoStream.print(" test: check live docs....."); final int numDocs = reader.numDocs(); toLoseDocCount = numDocs; if (reader.hasDeletions()) { - if (info.docCount - numDocs != info.getDelCount()){ - throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); + if (reader.numDocs() != info.info.getDocCount() - info.getDelCount()) { + throw new RuntimeException("delete count mismatch: info=" + (info.info.getDocCount() - info.getDelCount()) + " vs reader=" + reader.numDocs()); } - segInfoStat.numDeleted = info.docCount - numDocs; - msg("OK [" + (segInfoStat.numDeleted) + " deleted docs]"); - } else { - if (info.getDelCount() != 0){ - throw new RuntimeException("delete count mismatch: info=" + info.getDelCount() + " vs reader=" + (info.docCount - numDocs)); + if ((info.info.getDocCount()-reader.numDocs()) > reader.maxDoc()) { + throw new RuntimeException("too many deleted docs: maxDoc()=" + reader.maxDoc() + " vs del count=" + (info.info.getDocCount()-reader.numDocs())); } - msg("OK"); - } - - if (infoStream != null) - infoStream.print(" test: fields, norms......."); - Collection fieldNames = reader.getFieldNames(IndexReader.FieldOption.ALL); - Iterator it = fieldNames.iterator(); - while(it.hasNext()) { - final String fieldName = (String) it.next(); - byte[] b = reader.norms(fieldName); - if (b.length != info.docCount) - throw new RuntimeException("norms for field \"" + fieldName + "\" is length " + b.length + " != maxDoc " + info.docCount); - - } - msg("OK [" + fieldNames.size() + " fields]"); - segInfoStat.numFields = fieldNames.size(); - if (infoStream != null) - infoStream.print(" test: terms, freq, prox..."); - final TermEnum termEnum = reader.terms(); - final TermPositions termPositions = reader.termPositions(); - - // Used only to count up # deleted docs for this - // term - final MySegmentTermDocs myTermDocs = new MySegmentTermDocs(reader); - - long termCount = 0; - long totFreq = 0; - long totPos = 0; - while(termEnum.next()) { - termCount++; - final Term term = termEnum.term(); - final int docFreq = termEnum.docFreq(); - termPositions.seek(term); - int lastDoc = -1; - int freq0 = 0; - totFreq += docFreq; - while(termPositions.next()) { - freq0++; - final int doc = termPositions.doc(); - final int freq = termPositions.freq(); - if (doc <= lastDoc) - throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); - lastDoc = doc; - if (freq <= 0) - throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); - - int lastPos = -1; - totPos += freq; - for(int j=0;j= 0); + + if (hasFreqs != expectedHasFreqs) { + throw new RuntimeException("field \"" + field + "\" should have hasFreqs=" + expectedHasFreqs + " but got " + hasFreqs); + } + + if (hasFreqs == false) { + if (terms.getSumTotalTermFreq() != -1) { + throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but Terms.getSumTotalTermFreq()=" + terms.getSumTotalTermFreq() + " (should be -1)"); + } + } + + if (!isVectors) { + final boolean expectedHasPositions = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + if (hasPositions != expectedHasPositions) { + throw new RuntimeException("field \"" + field + "\" should have hasPositions=" + expectedHasPositions + " but got " + hasPositions); + } + + final boolean expectedHasPayloads = fieldInfo.hasPayloads(); + if (hasPayloads != expectedHasPayloads) { + throw new RuntimeException("field \"" + field + "\" should have hasPayloads=" + expectedHasPayloads + " but got " + hasPayloads); + } + + final boolean expectedHasOffsets = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + if (hasOffsets != expectedHasOffsets) { + throw new RuntimeException("field \"" + field + "\" should have hasOffsets=" + expectedHasOffsets + " but got " + hasOffsets); + } + } + + final TermsEnum termsEnum = terms.iterator(null); + + boolean hasOrd = true; + final long termCountStart = status.delTermCount + status.termCount; + + BytesRefBuilder lastTerm = null; + + Comparator termComp = terms.getComparator(); + + long sumTotalTermFreq = 0; + long sumDocFreq = 0; + long upto = 0; + FixedBitSet visitedDocs = new FixedBitSet(maxDoc); + while(true) { + + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + + assert term.isValid(); + + // make sure terms arrive in order according to + // the comp + if (lastTerm == null) { + lastTerm = new BytesRefBuilder(); + lastTerm.copyBytes(term); + } else { + if (termComp.compare(lastTerm.get(), term) >= 0) { + throw new RuntimeException("terms out of order: lastTerm=" + lastTerm + " term=" + term); + } + lastTerm.copyBytes(term); + } + + if (minTerm == null) { + // We checked this above: + assert maxTerm == null; + throw new RuntimeException("field=\"" + field + "\": invalid term: term=" + term + ", minTerm=" + minTerm); + } + + if (term.compareTo(minTerm) < 0) { + throw new RuntimeException("field=\"" + field + "\": invalid term: term=" + term + ", minTerm=" + minTerm); + } + + if (term.compareTo(maxTerm) > 0) { + throw new RuntimeException("field=\"" + field + "\": invalid term: term=" + term + ", maxTerm=" + maxTerm); + } + + final int docFreq = termsEnum.docFreq(); + if (docFreq <= 0) { + throw new RuntimeException("docfreq: " + docFreq + " is out of bounds"); + } + sumDocFreq += docFreq; + + docs = termsEnum.docs(liveDocs, docs); + postings = termsEnum.docsAndPositions(liveDocs, postings); + + if (hasFreqs == false) { + if (termsEnum.totalTermFreq() != -1) { + throw new RuntimeException("field \"" + field + "\" hasFreqs is false, but TermsEnum.totalTermFreq()=" + termsEnum.totalTermFreq() + " (should be -1)"); + } + } + + if (hasOrd) { + long ord = -1; + try { + ord = termsEnum.ord(); + } catch (UnsupportedOperationException uoe) { + hasOrd = false; + } + + if (hasOrd) { + final long ordExpected = status.delTermCount + status.termCount - termCountStart; + if (ord != ordExpected) { + throw new RuntimeException("ord mismatch: TermsEnum has ord=" + ord + " vs actual=" + ordExpected); + } + } + } + + final DocsEnum docs2; + if (postings != null) { + docs2 = postings; + } else { + docs2 = docs; + } + + int lastDoc = -1; + int docCount = 0; + long totalTermFreq = 0; + while(true) { + final int doc = docs2.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + status.totFreq++; + visitedDocs.set(doc); + int freq = -1; + if (hasFreqs) { + freq = docs2.freq(); + if (freq <= 0) { + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " is out of bounds"); + } + status.totPos += freq; + totalTermFreq += freq; + } else { + // When a field didn't index freq, it must + // consistently "lie" and pretend that freq was + // 1: + if (docs2.freq() != 1) { + throw new RuntimeException("term " + term + ": doc " + doc + ": freq " + freq + " != 1 when Terms.hasFreqs() is false"); + } + } + docCount++; + + if (doc <= lastDoc) { + throw new RuntimeException("term " + term + ": doc " + doc + " <= lastDoc " + lastDoc); + } + if (doc >= maxDoc) { + throw new RuntimeException("term " + term + ": doc " + doc + " >= maxDoc " + maxDoc); + } + + lastDoc = doc; + + int lastPos = -1; + int lastOffset = 0; + if (hasPositions) { + for(int j=0;j(); + } + status.blockTreeStats.put(field, stats); + } + + if (sumTotalTermFreq != 0) { + final long v = fields.terms(field).getSumTotalTermFreq(); + if (v != -1 && sumTotalTermFreq != v) { + throw new RuntimeException("sumTotalTermFreq for field " + field + "=" + v + " != recomputed sumTotalTermFreq=" + sumTotalTermFreq); + } + } + + if (sumDocFreq != 0) { + final long v = fields.terms(field).getSumDocFreq(); + if (v != -1 && sumDocFreq != v) { + throw new RuntimeException("sumDocFreq for field " + field + "=" + v + " != recomputed sumDocFreq=" + sumDocFreq); + } + } + + if (fieldTerms != null) { + final int v = fieldTerms.getDocCount(); + if (v != -1 && visitedDocs.cardinality() != v) { + throw new RuntimeException("docCount for field " + field + "=" + v + " != recomputed docCount=" + visitedDocs.cardinality()); + } + } + + // Test seek to last term: + if (lastTerm != null) { + if (termsEnum.seekCeil(lastTerm.get()) != TermsEnum.SeekStatus.FOUND) { + throw new RuntimeException("seek to last term " + lastTerm + " failed"); + } + + int expectedDocFreq = termsEnum.docFreq(); + DocsEnum d = termsEnum.docs(null, null, DocsEnum.FLAG_NONE); + int docFreq = 0; + while (d.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + docFreq++; + } + if (docFreq != expectedDocFreq) { + throw new RuntimeException("docFreq for last term " + lastTerm + "=" + expectedDocFreq + " != recomputed docFreq=" + docFreq); + } + } + + // check unique term count + long termCount = -1; + + if ((status.delTermCount+status.termCount)-termCountStart > 0) { + termCount = fields.terms(field).size(); + + if (termCount != -1 && termCount != status.delTermCount + status.termCount - termCountStart) { + throw new RuntimeException("termCount mismatch " + (status.delTermCount + termCount) + " vs " + (status.termCount - termCountStart)); + } + } + + // Test seeking by ord + if (hasOrd && status.termCount-termCountStart > 0) { + int seekCount = (int) Math.min(10000L, termCount); + if (seekCount > 0) { + BytesRef[] seekTerms = new BytesRef[seekCount]; + + // Seek by ord + for(int i=seekCount-1;i>=0;i--) { + long ord = i*(termCount/seekCount); + termsEnum.seekExact(ord); + seekTerms[i] = BytesRef.deepCopyOf(termsEnum.term()); + } + + // Seek by term + long totDocCount = 0; + for(int i=seekCount-1;i>=0;i--) { + if (termsEnum.seekCeil(seekTerms[i]) != TermsEnum.SeekStatus.FOUND) { + throw new RuntimeException("seek to existing term " + seekTerms[i] + " failed"); + } + + docs = termsEnum.docs(liveDocs, docs, DocsEnum.FLAG_NONE); + if (docs == null) { + throw new RuntimeException("null DocsEnum from to existing term " + seekTerms[i]); + } + + while(docs.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + totDocCount++; + } + } + + long totDocCountNoDeletes = 0; + long totDocFreq = 0; + for(int i=0;i totDocCountNoDeletes) { + throw new RuntimeException("more postings with deletes=" + totDocCount + " than without=" + totDocCountNoDeletes); + } + + if (totDocCountNoDeletes != totDocFreq) { + throw new RuntimeException("docfreqs=" + totDocFreq + " != recomputed docfreqs=" + totDocCountNoDeletes); + } + } + } + } + } + + int fieldCount = fields.size(); + + if (fieldCount != -1) { + if (fieldCount < 0) { + throw new RuntimeException("invalid fieldCount: " + fieldCount); + } + if (fieldCount != computedFieldCount) { + throw new RuntimeException("fieldCount mismatch " + fieldCount + " vs recomputed field count " + computedFieldCount); + } + } + + // for most implementations, this is boring (just the sum across all fields) + // but codecs that don't work per-field like preflex actually implement this, + // but don't implement it on Terms, so the check isn't redundant. + long uniqueTermCountAllFields = fields.getUniqueTermCount(); + + if (uniqueTermCountAllFields != -1 && status.termCount + status.delTermCount != uniqueTermCountAllFields) { + throw new RuntimeException("termCount mismatch " + uniqueTermCountAllFields + " vs " + (status.termCount + status.delTermCount)); + } + + if (doPrint) { + msg(infoStream, "OK [" + status.termCount + " terms; " + status.totFreq + " terms/docs pairs; " + status.totPos + " tokens]"); + } + + if (verbose && status.blockTreeStats != null && infoStream != null && status.termCount > 0) { + for(Map.Entry ent : status.blockTreeStats.entrySet()) { + infoStream.println(" field \"" + ent.getKey() + "\":"); + infoStream.println(" " + ent.getValue().toString().replace("\n", "\n ")); + } + } + + return status; + } + + /** + * Test the term index. + * @lucene.experimental + */ + public static Status.TermIndexStatus testPostings(AtomicReader reader, PrintStream infoStream) throws IOException { + return testPostings(reader, infoStream, false, false); + } + /** + * Test the term index. + * @lucene.experimental + */ + public static Status.TermIndexStatus testPostings(AtomicReader reader, PrintStream infoStream, boolean verbose, boolean failFast) throws IOException { + + // TODO: we should go and verify term vectors match, if + // crossCheckTermVectors is on... + + Status.TermIndexStatus status; + final int maxDoc = reader.maxDoc(); + final Bits liveDocs = reader.getLiveDocs(); + + try { + if (infoStream != null) { + infoStream.print(" test: terms, freq, prox..."); + } + + final Fields fields = reader.fields(); + final FieldInfos fieldInfos = reader.getFieldInfos(); + status = checkFields(fields, liveDocs, maxDoc, fieldInfos, true, false, infoStream, verbose); + if (liveDocs != null) { + if (infoStream != null) { + infoStream.print(" test (ignoring deletes): terms, freq, prox..."); + } + checkFields(fields, null, maxDoc, fieldInfos, true, false, infoStream, verbose); + } + } catch (Throwable e) { + if (failFast) { + IOUtils.reThrow(e); + } + msg(infoStream, "ERROR: " + e); + status = new Status.TermIndexStatus(); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + + return status; + } + + /** + * Test stored fields. + * @lucene.experimental + */ + public static Status.StoredFieldStatus testStoredFields(AtomicReader reader, PrintStream infoStream, boolean failFast) throws IOException { + final Status.StoredFieldStatus status = new Status.StoredFieldStatus(); + + try { + if (infoStream != null) { + infoStream.print(" test: stored fields......."); + } + + // Scan stored fields for all documents + final Bits liveDocs = reader.getLiveDocs(); + for (int j = 0; j < reader.maxDoc(); ++j) { + // Intentionally pull even deleted documents to + // make sure they too are not corrupt: + Document doc = reader.document(j); + if (liveDocs == null || liveDocs.get(j)) { + status.docCount++; + status.totFields += doc.getFields().size(); + } + } + + // Validate docCount + if (status.docCount != reader.numDocs()) { + throw new RuntimeException("docCount=" + status.docCount + " but saw " + status.docCount + " undeleted docs"); + } + + msg(infoStream, "OK [" + status.totFields + " total field count; avg " + + NumberFormat.getInstance(Locale.ROOT).format((((float) status.totFields)/status.docCount)) + " fields per doc]"); + } catch (Throwable e) { + if (failFast) { + IOUtils.reThrow(e); + } + msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + + return status; + } + + /** + * Test docvalues. + * @lucene.experimental + */ + public static Status.DocValuesStatus testDocValues(AtomicReader reader, + PrintStream infoStream, + boolean failFast) throws IOException { + final Status.DocValuesStatus status = new Status.DocValuesStatus(); + try { + if (infoStream != null) { + infoStream.print(" test: docvalues..........."); + } + for (FieldInfo fieldInfo : reader.getFieldInfos()) { + if (fieldInfo.hasDocValues()) { + status.totalValueFields++; + checkDocValues(fieldInfo, reader, infoStream, status); + } else { + if (reader.getBinaryDocValues(fieldInfo.name) != null || + reader.getNumericDocValues(fieldInfo.name) != null || + reader.getSortedDocValues(fieldInfo.name) != null || + reader.getSortedSetDocValues(fieldInfo.name) != null || + reader.getDocsWithField(fieldInfo.name) != null) { + throw new RuntimeException("field: " + fieldInfo.name + " has docvalues but should omit them!"); + } + } + } + + msg(infoStream, "OK [" + status.totalValueFields + " docvalues fields; " + + status.totalBinaryFields + " BINARY; " + + status.totalNumericFields + " NUMERIC; " + + status.totalSortedFields + " SORTED; " + + status.totalSortedNumericFields + " SORTED_NUMERIC; " + + status.totalSortedSetFields + " SORTED_SET]"); + } catch (Throwable e) { + if (failFast) { + IOUtils.reThrow(e); + } + msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + return status; + } + + private static void checkBinaryDocValues(String fieldName, AtomicReader reader, BinaryDocValues dv, Bits docsWithField) { + for (int i = 0; i < reader.maxDoc(); i++) { + final BytesRef term = dv.get(i); + assert term.isValid(); + if (docsWithField.get(i) == false && term.length > 0) { + throw new RuntimeException("dv for field: " + fieldName + " is missing but has value=" + term + " for doc: " + i); + } + } + } + + private static void checkSortedDocValues(String fieldName, AtomicReader reader, SortedDocValues dv, Bits docsWithField) { + checkBinaryDocValues(fieldName, reader, dv, docsWithField); + final int maxOrd = dv.getValueCount()-1; + FixedBitSet seenOrds = new FixedBitSet(dv.getValueCount()); + int maxOrd2 = -1; + for (int i = 0; i < reader.maxDoc(); i++) { + int ord = dv.getOrd(i); + if (ord == -1) { + if (docsWithField.get(i)) { + throw new RuntimeException("dv for field: " + fieldName + " has -1 ord but is not marked missing for doc: " + i); + } + } else if (ord < -1 || ord > maxOrd) { + throw new RuntimeException("ord out of bounds: " + ord); + } else { + if (!docsWithField.get(i)) { + throw new RuntimeException("dv for field: " + fieldName + " is missing but has ord=" + ord + " for doc: " + i); + } + maxOrd2 = Math.max(maxOrd2, ord); + seenOrds.set(ord); + } + } + if (maxOrd != maxOrd2) { + throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); + } + if (seenOrds.cardinality() != dv.getValueCount()) { + throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality()); + } + BytesRef lastValue = null; + for (int i = 0; i <= maxOrd; i++) { + final BytesRef term = dv.lookupOrd(i); + assert term.isValid(); + if (lastValue != null) { + if (term.compareTo(lastValue) <= 0) { + throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term); + } + } + lastValue = BytesRef.deepCopyOf(term); + } + } + + private static void checkSortedSetDocValues(String fieldName, AtomicReader reader, SortedSetDocValues dv, Bits docsWithField) { + final long maxOrd = dv.getValueCount()-1; + LongBitSet seenOrds = new LongBitSet(dv.getValueCount()); + long maxOrd2 = -1; + for (int i = 0; i < reader.maxDoc(); i++) { + dv.setDocument(i); + long lastOrd = -1; + long ord; + if (docsWithField.get(i)) { + int ordCount = 0; + while ((ord = dv.nextOrd()) != SortedSetDocValues.NO_MORE_ORDS) { + if (ord <= lastOrd) { + throw new RuntimeException("ords out of order: " + ord + " <= " + lastOrd + " for doc: " + i); + } + if (ord < 0 || ord > maxOrd) { + throw new RuntimeException("ord out of bounds: " + ord); + } + if (dv instanceof RandomAccessOrds) { + long ord2 = ((RandomAccessOrds)dv).ordAt(ordCount); + if (ord != ord2) { + throw new RuntimeException("ordAt(" + ordCount + ") inconsistent, expected=" + ord + ",got=" + ord2 + " for doc: " + i); + } + } + lastOrd = ord; + maxOrd2 = Math.max(maxOrd2, ord); + seenOrds.set(ord); + ordCount++; + } + if (ordCount == 0) { + throw new RuntimeException("dv for field: " + fieldName + " has no ordinals but is not marked missing for doc: " + i); + } + if (dv instanceof RandomAccessOrds) { + long ordCount2 = ((RandomAccessOrds)dv).cardinality(); + if (ordCount != ordCount2) { + throw new RuntimeException("cardinality inconsistent, expected=" + ordCount + ",got=" + ordCount2 + " for doc: " + i); + } + } + } else { + long o = dv.nextOrd(); + if (o != SortedSetDocValues.NO_MORE_ORDS) { + throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has ord=" + o + " for doc: " + i); + } + if (dv instanceof RandomAccessOrds) { + long ordCount2 = ((RandomAccessOrds)dv).cardinality(); + if (ordCount2 != 0) { + throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has cardinality " + ordCount2 + " for doc: " + i); + } + } + } + } + if (maxOrd != maxOrd2) { + throw new RuntimeException("dv for field: " + fieldName + " reports wrong maxOrd=" + maxOrd + " but this is not the case: " + maxOrd2); + } + if (seenOrds.cardinality() != dv.getValueCount()) { + throw new RuntimeException("dv for field: " + fieldName + " has holes in its ords, valueCount=" + dv.getValueCount() + " but only used: " + seenOrds.cardinality()); + } + + BytesRef lastValue = null; + for (long i = 0; i <= maxOrd; i++) { + final BytesRef term = dv.lookupOrd(i); + assert term.isValid(); + if (lastValue != null) { + if (term.compareTo(lastValue) <= 0) { + throw new RuntimeException("dv for field: " + fieldName + " has ords out of order: " + lastValue + " >=" + term); + } + } + lastValue = BytesRef.deepCopyOf(term); + } + } + + private static void checkSortedNumericDocValues(String fieldName, AtomicReader reader, SortedNumericDocValues ndv, Bits docsWithField) { + for (int i = 0; i < reader.maxDoc(); i++) { + ndv.setDocument(i); + int count = ndv.count(); + if (docsWithField.get(i)) { + if (count == 0) { + throw new RuntimeException("dv for field: " + fieldName + " is not marked missing but has zero count for doc: " + i); + } + long previous = Long.MIN_VALUE; + for (int j = 0; j < count; j++) { + long value = ndv.valueAt(j); + if (value < previous) { + throw new RuntimeException("values out of order: " + value + " < " + previous + " for doc: " + i); + } + previous = value; + } + } else { + if (count != 0) { + throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has count=" + count + " for doc: " + i); + } + } + } + } + + private static void checkNumericDocValues(String fieldName, AtomicReader reader, NumericDocValues ndv, Bits docsWithField) { + for (int i = 0; i < reader.maxDoc(); i++) { + long value = ndv.get(i); + if (docsWithField.get(i) == false && value != 0) { + throw new RuntimeException("dv for field: " + fieldName + " is marked missing but has value=" + value + " for doc: " + i); + } + } + } + + private static void checkDocValues(FieldInfo fi, AtomicReader reader, PrintStream infoStream, DocValuesStatus status) throws Exception { + Bits docsWithField = reader.getDocsWithField(fi.name); + if (docsWithField == null) { + throw new RuntimeException(fi.name + " docsWithField does not exist"); + } else if (docsWithField.length() != reader.maxDoc()) { + throw new RuntimeException(fi.name + " docsWithField has incorrect length: " + docsWithField.length() + ",expected: " + reader.maxDoc()); + } + switch(fi.getDocValuesType()) { + case SORTED: + status.totalSortedFields++; + checkSortedDocValues(fi.name, reader, reader.getSortedDocValues(fi.name), docsWithField); + if (reader.getBinaryDocValues(fi.name) != null || + reader.getNumericDocValues(fi.name) != null || + reader.getSortedNumericDocValues(fi.name) != null || + reader.getSortedSetDocValues(fi.name) != null) { + throw new RuntimeException(fi.name + " returns multiple docvalues types!"); + } + break; + case SORTED_NUMERIC: + status.totalSortedNumericFields++; + checkSortedNumericDocValues(fi.name, reader, reader.getSortedNumericDocValues(fi.name), docsWithField); + if (reader.getBinaryDocValues(fi.name) != null || + reader.getNumericDocValues(fi.name) != null || + reader.getSortedSetDocValues(fi.name) != null || + reader.getSortedDocValues(fi.name) != null) { + throw new RuntimeException(fi.name + " returns multiple docvalues types!"); + } + break; + case SORTED_SET: + status.totalSortedSetFields++; + checkSortedSetDocValues(fi.name, reader, reader.getSortedSetDocValues(fi.name), docsWithField); + if (reader.getBinaryDocValues(fi.name) != null || + reader.getNumericDocValues(fi.name) != null || + reader.getSortedNumericDocValues(fi.name) != null || + reader.getSortedDocValues(fi.name) != null) { + throw new RuntimeException(fi.name + " returns multiple docvalues types!"); + } + break; + case BINARY: + status.totalBinaryFields++; + checkBinaryDocValues(fi.name, reader, reader.getBinaryDocValues(fi.name), docsWithField); + if (reader.getNumericDocValues(fi.name) != null || + reader.getSortedDocValues(fi.name) != null || + reader.getSortedNumericDocValues(fi.name) != null || + reader.getSortedSetDocValues(fi.name) != null) { + throw new RuntimeException(fi.name + " returns multiple docvalues types!"); + } + break; + case NUMERIC: + status.totalNumericFields++; + checkNumericDocValues(fi.name, reader, reader.getNumericDocValues(fi.name), docsWithField); + if (reader.getBinaryDocValues(fi.name) != null || + reader.getSortedDocValues(fi.name) != null || + reader.getSortedNumericDocValues(fi.name) != null || + reader.getSortedSetDocValues(fi.name) != null) { + throw new RuntimeException(fi.name + " returns multiple docvalues types!"); + } + break; + default: + throw new AssertionError(); + } + } + + private static void checkNorms(FieldInfo fi, AtomicReader reader, PrintStream infoStream) throws IOException { + switch(fi.getNormType()) { + case NUMERIC: + checkNumericDocValues(fi.name, reader, reader.getNormValues(fi.name), new Bits.MatchAllBits(reader.maxDoc())); + break; + default: + throw new AssertionError("wtf: " + fi.getNormType()); + } + } + + /** + * Test term vectors. + * @lucene.experimental + */ + public static Status.TermVectorStatus testTermVectors(AtomicReader reader, PrintStream infoStream) throws IOException { + return testTermVectors(reader, infoStream, false, false, false); + } + + /** + * Test term vectors. + * @lucene.experimental + */ + public static Status.TermVectorStatus testTermVectors(AtomicReader reader, PrintStream infoStream, boolean verbose, boolean crossCheckTermVectors, boolean failFast) throws IOException { + final Status.TermVectorStatus status = new Status.TermVectorStatus(); + final FieldInfos fieldInfos = reader.getFieldInfos(); + final Bits onlyDocIsDeleted = new FixedBitSet(1); + + try { + if (infoStream != null) { + infoStream.print(" test: term vectors........"); + } + + DocsEnum docs = null; + DocsAndPositionsEnum postings = null; + + // Only used if crossCheckTermVectors is true: + DocsEnum postingsDocs = null; + DocsAndPositionsEnum postingsPostings = null; + + final Bits liveDocs = reader.getLiveDocs(); + + final Fields postingsFields; + // TODO: testTermsIndex + if (crossCheckTermVectors) { + postingsFields = reader.fields(); + } else { + postingsFields = null; + } + + TermsEnum termsEnum = null; + TermsEnum postingsTermsEnum = null; + + for (int j = 0; j < reader.maxDoc(); ++j) { + // Intentionally pull/visit (but don't count in + // stats) deleted documents to make sure they too + // are not corrupt: + Fields tfv = reader.getTermVectors(j); + + // TODO: can we make a IS(FIR) that searches just + // this term vector... to pass for searcher? + + if (tfv != null) { + // First run with no deletions: + checkFields(tfv, null, 1, fieldInfos, false, true, infoStream, verbose); + + // Again, with the one doc deleted: + checkFields(tfv, onlyDocIsDeleted, 1, fieldInfos, false, true, infoStream, verbose); + + // Only agg stats if the doc is live: + final boolean doStats = liveDocs == null || liveDocs.get(j); + + if (doStats) { + status.docCount++; + } + + for(String field : tfv) { + if (doStats) { + status.totVectors++; + } + + // Make sure FieldInfo thinks this field is vector'd: + final FieldInfo fieldInfo = fieldInfos.fieldInfo(field); + if (!fieldInfo.hasVectors()) { + throw new RuntimeException("docID=" + j + " has term vectors for field=" + field + " but FieldInfo has storeTermVector=false"); + } + + if (crossCheckTermVectors) { + Terms terms = tfv.terms(field); + termsEnum = terms.iterator(termsEnum); + final boolean postingsHasFreq = fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + final boolean postingsHasPayload = fieldInfo.hasPayloads(); + final boolean vectorsHasPayload = terms.hasPayloads(); + + Terms postingsTerms = postingsFields.terms(field); + if (postingsTerms == null) { + throw new RuntimeException("vector field=" + field + " does not exist in postings; doc=" + j); + } + postingsTermsEnum = postingsTerms.iterator(postingsTermsEnum); + + final boolean hasProx = terms.hasOffsets() || terms.hasPositions(); + BytesRef term = null; + while ((term = termsEnum.next()) != null) { + + if (hasProx) { + postings = termsEnum.docsAndPositions(null, postings); + assert postings != null; + docs = null; + } else { + docs = termsEnum.docs(null, docs); + assert docs != null; + postings = null; + } + + final DocsEnum docs2; + if (hasProx) { + assert postings != null; + docs2 = postings; + } else { + assert docs != null; + docs2 = docs; + } + + final DocsEnum postingsDocs2; + if (!postingsTermsEnum.seekExact(term)) { + throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); + } + postingsPostings = postingsTermsEnum.docsAndPositions(null, postingsPostings); + if (postingsPostings == null) { + // Term vectors were indexed w/ pos but postings were not + postingsDocs = postingsTermsEnum.docs(null, postingsDocs); + if (postingsDocs == null) { + throw new RuntimeException("vector term=" + term + " field=" + field + " does not exist in postings; doc=" + j); + } + } + + if (postingsPostings != null) { + postingsDocs2 = postingsPostings; + } else { + postingsDocs2 = postingsDocs; + } + + final int advanceDoc = postingsDocs2.advance(j); + if (advanceDoc != j) { + throw new RuntimeException("vector term=" + term + " field=" + field + ": doc=" + j + " was not found in postings (got: " + advanceDoc + ")"); + } + + final int doc = docs2.nextDoc(); + + if (doc != 0) { + throw new RuntimeException("vector for doc " + j + " didn't return docID=0: got docID=" + doc); + } + + if (postingsHasFreq) { + final int tf = docs2.freq(); + if (postingsHasFreq && postingsDocs2.freq() != tf) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": freq=" + tf + " differs from postings freq=" + postingsDocs2.freq()); + } + + if (hasProx) { + for (int i = 0; i < tf; i++) { + int pos = postings.nextPosition(); + if (postingsPostings != null) { + int postingsPos = postingsPostings.nextPosition(); + if (terms.hasPositions() && pos != postingsPos) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": pos=" + pos + " differs from postings pos=" + postingsPos); + } + } + + // Call the methods to at least make + // sure they don't throw exc: + final int startOffset = postings.startOffset(); + final int endOffset = postings.endOffset(); + // TODO: these are too anal...? + /* + if (endOffset < startOffset) { + throw new RuntimeException("vector startOffset=" + startOffset + " is > endOffset=" + endOffset); + } + if (startOffset < lastStartOffset) { + throw new RuntimeException("vector startOffset=" + startOffset + " is < prior startOffset=" + lastStartOffset); + } + lastStartOffset = startOffset; + */ + + if (postingsPostings != null) { + final int postingsStartOffset = postingsPostings.startOffset(); + + final int postingsEndOffset = postingsPostings.endOffset(); + if (startOffset != -1 && postingsStartOffset != -1 && startOffset != postingsStartOffset) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": startOffset=" + startOffset + " differs from postings startOffset=" + postingsStartOffset); + } + if (endOffset != -1 && postingsEndOffset != -1 && endOffset != postingsEndOffset) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + ": endOffset=" + endOffset + " differs from postings endOffset=" + postingsEndOffset); + } + } + + BytesRef payload = postings.getPayload(); + + if (payload != null) { + assert vectorsHasPayload; + } + + if (postingsHasPayload && vectorsHasPayload) { + assert postingsPostings != null; + + if (payload == null) { + // we have payloads, but not at this position. + // postings has payloads too, it should not have one at this position + if (postingsPostings.getPayload() != null) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has no payload but postings does: " + postingsPostings.getPayload()); + } + } else { + // we have payloads, and one at this position + // postings should also have one at this position, with the same bytes. + if (postingsPostings.getPayload() == null) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but postings does not."); + } + BytesRef postingsPayload = postingsPostings.getPayload(); + if (!payload.equals(postingsPayload)) { + throw new RuntimeException("vector term=" + term + " field=" + field + " doc=" + j + " has payload=" + payload + " but differs from postings payload=" + postingsPayload); + } + } + } + } + } + } + } + } + } + } + } + float vectorAvg = status.docCount == 0 ? 0 : status.totVectors / (float)status.docCount; + msg(infoStream, "OK [" + status.totVectors + " total vector count; avg " + + NumberFormat.getInstance(Locale.ROOT).format(vectorAvg) + " term/freq vector fields per doc]"); + } catch (Throwable e) { + if (failFast) { + IOUtils.reThrow(e); + } + msg(infoStream, "ERROR [" + String.valueOf(e.getMessage()) + "]"); + status.error = e; + if (infoStream != null) { + e.printStackTrace(infoStream); + } + } + + return status; + } + /** Repairs the index using previously returned result * from {@link #checkIndex}. Note that this does not * remove any of the unreferenced files after it's done; @@ -551,6 +1946,7 @@ public void fixIndex(Status result) throws IOException { if (result.partial) throw new IllegalArgumentException("can only fix an index that was fully checked (this status checked a subset of segments)"); + result.newSegments.changed(); result.newSegments.commit(result.dir); } @@ -571,7 +1967,7 @@

Run it like this:

-    java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]
+    java -ea:org.apache.lucene... org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-verbose] [-segment X] [-segment Y]
     
  • -fix: actually write a new segments_N file, removing any problematic segments @@ -598,42 +1994,61 @@ This tool exits with exit code 1 if the index cannot be opened or has any corruption, else 0. */ - public static void main(String[] args) throws IOException { + public static void main(String[] args) throws IOException, InterruptedException { boolean doFix = false; - List onlySegments = new ArrayList(); + boolean doCrossCheckTermVectors = false; + boolean verbose = false; + List onlySegments = new ArrayList<>(); String indexPath = null; + String dirImpl = null; int i = 0; while(i < args.length) { - if (args[i].equals("-fix")) { + String arg = args[i]; + if ("-fix".equals(arg)) { doFix = true; - i++; - } else if (args[i].equals("-segment")) { + } else if ("-crossCheckTermVectors".equals(arg)) { + doCrossCheckTermVectors = true; + } else if (arg.equals("-verbose")) { + verbose = true; + } else if (arg.equals("-segment")) { if (i == args.length-1) { System.out.println("ERROR: missing name for -segment option"); System.exit(1); } - onlySegments.add(args[i+1]); - i += 2; + i++; + onlySegments.add(args[i]); + } else if ("-dir-impl".equals(arg)) { + if (i == args.length - 1) { + System.out.println("ERROR: missing value for -dir-impl option"); + System.exit(1); + } + i++; + dirImpl = args[i]; } else { if (indexPath != null) { System.out.println("ERROR: unexpected extra argument '" + args[i] + "'"); System.exit(1); } indexPath = args[i]; - i++; } + i++; } if (indexPath == null) { System.out.println("\nERROR: index path not specified"); - System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-segment X] [-segment Y]\n" + + System.out.println("\nUsage: java org.apache.lucene.index.CheckIndex pathToIndex [-fix] [-crossCheckTermVectors] [-segment X] [-segment Y] [-dir-impl X]\n" + "\n" + " -fix: actually write a new segments_N file, removing any problematic segments\n" + + " -crossCheckTermVectors: verifies that term vectors match postings; THIS IS VERY SLOW!\n" + + " -codec X: when fixing, codec to write the new segments_N file with\n" + + " -verbose: print additional details\n" + " -segment X: only check the specified segments. This can be specified multiple\n" + " times, to check more than one segment, eg '-segment _2 -segment _a'.\n" + " You can't use this with the -fix option\n" + - "\n" + + " -dir-impl X: use a specific " + FSDirectory.class.getSimpleName() + " implementation. " + + "If no package is specified the " + FSDirectory.class.getPackage().getName() + " package will be used.\n" + + "\n" + "**WARNING**: -fix should only be used on an emergency basis as it will cause\n" + "documents (perhaps many) to be permanently removed from the index. Always make\n" + "a backup copy of your index before running this! Do not run this tool on an index\n" + @@ -663,17 +2078,25 @@ System.out.println("\nOpening index @ " + indexPath + "\n"); Directory dir = null; try { - dir = FSDirectory.getDirectory(indexPath); + if (dirImpl == null) { + dir = FSDirectory.open(new File(indexPath)); + } else { + dir = CommandLineUtil.newFSDirectory(dirImpl, new File(indexPath)); + } } catch (Throwable t) { System.out.println("ERROR: could not open directory \"" + indexPath + "\"; exiting"); t.printStackTrace(System.out); System.exit(1); } CheckIndex checker = new CheckIndex(dir); - checker.setInfoStream(System.out); + checker.setCrossCheckTermVectors(doCrossCheckTermVectors); + checker.setInfoStream(System.out, verbose); Status result = checker.checkIndex(onlySegments); + if (result.missingSegments) { + System.exit(1); + } if (!result.clean) { if (!doFix) { @@ -682,25 +2105,19 @@ System.out.println("WARNING: " + result.totLoseDocCount + " documents will be lost\n"); System.out.println("NOTE: will write new segments file in 5 seconds; this will remove " + result.totLoseDocCount + " docs from the index. THIS IS YOUR LAST CHANCE TO CTRL+C!"); for(int s=0;s<5;s++) { - try { - Thread.sleep(1000); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - s--; - continue; - } + Thread.sleep(1000); System.out.println(" " + (5-s) + "..."); } System.out.println("Writing..."); checker.fixIndex(result); System.out.println("OK"); - System.out.println("Wrote new segments file \"" + result.newSegments.getCurrentSegmentFileName() + "\""); + System.out.println("Wrote new segments file \"" + result.newSegments.getSegmentsFileName() + "\""); } } System.out.println(""); final int exitCode; - if (result != null && result.clean == true) + if (result.clean == true) exitCode = 0; else exitCode = 1; Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/CoalescedUpdates.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/CompositeReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/CompositeReaderContext.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/CompoundFileReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/CompoundFileWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/ConcurrentMergeScheduler.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/ConcurrentMergeScheduler.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/ConcurrentMergeScheduler.java 17 Aug 2012 14:54:59 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/ConcurrentMergeScheduler.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,60 +18,113 @@ */ import org.apache.lucene.store.Directory; +import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.util.CollectionUtil; import java.io.IOException; import java.util.List; import java.util.ArrayList; +import java.util.Comparator; /** A {@link MergeScheduler} that runs each merge using a - * separate thread, up until a maximum number of threads - * ({@link #setMaxThreadCount}) at which when a merge is - * needed, the thread(s) that are updating the index will - * pause until one or more merges completes. This is a - * simple way to use concurrency in the indexing process - * without having to create and manage application level - * threads. */ - + * separate thread. + * + *

    Specify the max number of threads that may run at + * once, and the maximum number of simultaneous merges + * with {@link #setMaxMergesAndThreads}.

    + * + *

    If the number of merges exceeds the max number of threads + * then the largest merges are paused until one of the smaller + * merges completes.

    + * + *

    If more than {@link #getMaxMergeCount} merges are + * requested then this class will forcefully throttle the + * incoming threads by pausing until one more more merges + * complete.

    + */ public class ConcurrentMergeScheduler extends MergeScheduler { private int mergeThreadPriority = -1; - protected List mergeThreads = new ArrayList(); + /** List of currently active {@link MergeThread}s. */ + protected final List mergeThreads = new ArrayList<>(); + + /** + * Default {@code maxThreadCount}. + * We default to 1: tests on spinning-magnet drives showed slower + * indexing performance if more than one merge thread runs at + * once (though on an SSD it was faster) + */ + public static final int DEFAULT_MAX_THREAD_COUNT = 1; + + /** Default {@code maxMergeCount}. */ + public static final int DEFAULT_MAX_MERGE_COUNT = 2; - // Max number of threads allowed to be merging at once - private int maxThreadCount = 3; + // Max number of merge threads allowed to be running at + // once. When there are more merges then this, we + // forcefully pause the larger ones, letting the smaller + // ones run, up until maxMergeCount merges at which point + // we forcefully pause incoming threads (that presumably + // are the ones causing so much merging). + private int maxThreadCount = DEFAULT_MAX_THREAD_COUNT; - private List exceptions = new ArrayList(); + // Max number of merges we accept before forcefully + // throttling the incoming threads + private int maxMergeCount = DEFAULT_MAX_MERGE_COUNT; + + /** {@link Directory} that holds the index. */ protected Directory dir; - private boolean closed; + /** {@link IndexWriter} that owns this instance. */ protected IndexWriter writer; + + /** How many {@link MergeThread}s have kicked off (this is use + * to name them). */ protected int mergeThreadCount; + /** Sole constructor, with all settings set to default + * values. */ public ConcurrentMergeScheduler() { - if (allInstances != null) { - // Only for testing - addMyself(); - } } - /** Sets the max # simultaneous threads that may be - * running. If a merge is necessary yet we already have - * this many threads running, the incoming thread (that - * is calling add/updateDocument) will block until - * a merge thread has completed. */ - public void setMaxThreadCount(int count) { - if (count < 1) - throw new IllegalArgumentException("count should be at least 1"); - maxThreadCount = count; + /** + * Sets the maximum number of merge threads and simultaneous merges allowed. + * + * @param maxMergeCount the max # simultaneous merges that are allowed. + * If a merge is necessary yet we already have this many + * threads running, the incoming thread (that is calling + * add/updateDocument) will block until a merge thread + * has completed. Note that we will only run the + * smallest maxThreadCount merges at a time. + * @param maxThreadCount the max # simultaneous merge threads that should + * be running at once. This must be <= maxMergeCount + */ + public void setMaxMergesAndThreads(int maxMergeCount, int maxThreadCount) { + if (maxThreadCount < 1) { + throw new IllegalArgumentException("maxThreadCount should be at least 1"); + } + if (maxMergeCount < 1) { + throw new IllegalArgumentException("maxMergeCount should be at least 1"); + } + if (maxThreadCount > maxMergeCount) { + throw new IllegalArgumentException("maxThreadCount should be <= maxMergeCount (= " + maxMergeCount + ")"); + } + this.maxThreadCount = maxThreadCount; + this.maxMergeCount = maxMergeCount; } - /** Get the max # simultaneous threads that may be - * running. @see #setMaxThreadCount. */ + /** Returns {@code maxThreadCount}. + * + * @see #setMaxMergesAndThreads(int, int) */ public int getMaxThreadCount() { return maxThreadCount; } + /** See {@link #setMaxMergesAndThreads}. */ + public int getMaxMergeCount() { + return maxMergeCount; + } + /** Return the priority that merge threads run at. By * default the priority is 1 plus the priority of (ie, * slightly higher priority than) the first thread that @@ -81,23 +134,118 @@ return mergeThreadPriority; } - /** Return the priority that merge threads run at. */ + /** Set the base priority that merge threads run at. + * Note that CMS may increase priority of some merge + * threads beyond this base priority. It's best not to + * set this any higher than + * Thread.MAX_PRIORITY-maxThreadCount, so that CMS has + * room to set relative priority among threads. */ public synchronized void setMergeThreadPriority(int pri) { if (pri > Thread.MAX_PRIORITY || pri < Thread.MIN_PRIORITY) throw new IllegalArgumentException("priority must be in range " + Thread.MIN_PRIORITY + " .. " + Thread.MAX_PRIORITY + " inclusive"); mergeThreadPriority = pri; + updateMergeThreads(); + } - final int numThreads = mergeThreadCount(); - for(int i=0;i compareByMergeDocCount = new Comparator() { + @Override + public int compare(MergeThread t1, MergeThread t2) { + final MergePolicy.OneMerge m1 = t1.getCurrentMerge(); + final MergePolicy.OneMerge m2 = t2.getCurrentMerge(); + + final int c1 = m1 == null ? Integer.MAX_VALUE : m1.totalDocCount; + final int c2 = m2 == null ? Integer.MAX_VALUE : m2.totalDocCount; + + return c2 - c1; } + }; + + /** + * Called whenever the running merges have changed, to pause & unpause + * threads. This method sorts the merge threads by their merge size in + * descending order and then pauses/unpauses threads from first to last -- + * that way, smaller merges are guaranteed to run before larger ones. + */ + protected synchronized void updateMergeThreads() { + + // Only look at threads that are alive & not in the + // process of stopping (ie have an active merge): + final List activeMerges = new ArrayList<>(); + + int threadIdx = 0; + while (threadIdx < mergeThreads.size()) { + final MergeThread mergeThread = mergeThreads.get(threadIdx); + if (!mergeThread.isAlive()) { + // Prune any dead threads + mergeThreads.remove(threadIdx); + continue; + } + if (mergeThread.getCurrentMerge() != null) { + activeMerges.add(mergeThread); + } + threadIdx++; + } + + // Sort the merge threads in descending order. + CollectionUtil.timSort(activeMerges, compareByMergeDocCount); + + int pri = mergeThreadPriority; + final int activeMergeCount = activeMerges.size(); + for (threadIdx=0;threadIdx + * if (verbose()) { + * message("your message"); + * } + * + */ + protected boolean verbose() { + return writer != null && writer.infoStream.isEnabled("CMS"); } + + /** + * Outputs the given message - this method assumes {@link #verbose()} was + * called and returned true. + */ + protected void message(String message) { + writer.infoStream.message("CMS", message); + } private synchronized void initMergeThreadPriority() { if (mergeThreadPriority == -1) { @@ -109,37 +257,60 @@ } } + @Override public void close() { - closed = true; + sync(); } - public synchronized void sync() { - while(mergeThreadCount() > 0) { - message("now wait for threads; currently " + mergeThreads.size() + " still running"); - final int count = mergeThreads.size(); - for(int i=0;i= maxMergeCount) { + // This means merging has fallen too far behind: we + // have already created maxMergeCount threads, and + // now there's at least one more merge pending. + // Note that only maxThreadCount of + // those created merge threads will actually be + // running; the rest will be paused (see + // updateMergeThreads). We stall this producer + // thread to prevent creation of new segments, + // until merging has caught up: + startStallTime = System.currentTimeMillis(); + if (verbose()) { + message(" too many merges; stalling..."); + } + try { + wait(); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); + } + } + if (verbose()) { + if (startStallTime != 0) { + message(" stalled for " + (System.currentTimeMillis()-startStallTime) + " msec"); + } + } + MergePolicy.OneMerge merge = writer.getNextMerge(); if (merge == null) { - message(" no more merges pending; now return"); + if (verbose()) { + message(" no more merges pending; now return"); + } return; } - // We do this w/ the primary thread to keep - // deterministic assignment of segment names - writer.mergeInit(merge); - - synchronized(this) { - while (mergeThreadCount() >= maxThreadCount) { - message(" too many merge threads running; stalling..."); - try { - wait(); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } + boolean success = false; + try { + if (verbose()) { + message(" consider merge " + writer.segString(merge.segments)); } - message(" consider merge " + merge.segString(dir)); - - assert mergeThreadCount() < maxThreadCount; - // OK to spawn a new merge thread to handle this // merge: final MergeThread merger = getMergeThread(writer, merge); mergeThreads.add(merger); - message(" launch new thread [" + merger.getName() + "]"); + if (verbose()) { + message(" launch new thread [" + merger.getName() + "]"); + } + merger.start(); + + // Must call this after starting the thread else + // the new thread is removed from mergeThreads + // (since it's not alive yet): + updateMergeThreads(); + + success = true; + } finally { + if (!success) { + writer.mergeFinish(merge); + } } } } /** Does the actual merge, by calling {@link IndexWriter#merge} */ - protected void doMerge(MergePolicy.OneMerge merge) - throws IOException { + protected void doMerge(MergePolicy.OneMerge merge) throws IOException { writer.merge(merge); } @@ -214,25 +414,44 @@ return thread; } + /** Runs a merge thread, which may run one or more merges + * in sequence. */ protected class MergeThread extends Thread { - IndexWriter writer; + IndexWriter tWriter; MergePolicy.OneMerge startMerge; MergePolicy.OneMerge runningMerge; + private volatile boolean done; - public MergeThread(IndexWriter writer, MergePolicy.OneMerge startMerge) throws IOException { - this.writer = writer; + /** Sole constructor. */ + public MergeThread(IndexWriter writer, MergePolicy.OneMerge startMerge) { + this.tWriter = writer; this.startMerge = startMerge; } + /** Record the currently running merge. */ public synchronized void setRunningMerge(MergePolicy.OneMerge merge) { runningMerge = merge; } + /** Return the currently running merge. */ public synchronized MergePolicy.OneMerge getRunningMerge() { return runningMerge; } + /** Return the current merge, or null if this {@code + * MergeThread} is done. */ + public synchronized MergePolicy.OneMerge getCurrentMerge() { + if (done) { + return null; + } else if (runningMerge != null) { + return runningMerge; + } else { + return startMerge; + } + } + + /** Set the priority of this thread. */ public void setThreadPriority(int pri) { try { setPriority(pri); @@ -245,6 +464,7 @@ } } + @Override public void run() { // First time through the while loop we do the merge @@ -253,101 +473,78 @@ try { - message(" merge thread: start"); + if (verbose()) { + message(" merge thread: start"); + } while(true) { setRunningMerge(merge); doMerge(merge); // Subsequent times through the loop we do any new // merge that writer says is necessary: - merge = writer.getNextMerge(); + merge = tWriter.getNextMerge(); + + // Notify here in case any threads were stalled; + // they will notice that the pending merge has + // been pulled and possibly resume: + synchronized(ConcurrentMergeScheduler.this) { + ConcurrentMergeScheduler.this.notifyAll(); + } + if (merge != null) { - writer.mergeInit(merge); - message(" merge thread: do another merge " + merge.segString(dir)); - } else + updateMergeThreads(); + if (verbose()) { + message(" merge thread: do another merge " + tWriter.segString(merge.segments)); + } + } else { break; + } } - message(" merge thread: done"); + if (verbose()) { + message(" merge thread: done"); + } } catch (Throwable exc) { // Ignore the exception if it was due to abort: if (!(exc instanceof MergePolicy.MergeAbortedException)) { - synchronized(ConcurrentMergeScheduler.this) { - exceptions.add(exc); - } - + //System.out.println(Thread.currentThread().getName() + ": CMS: exc"); + //exc.printStackTrace(System.out); if (!suppressExceptions) { // suppressExceptions is normally only set during // testing. - anyExceptions = true; handleMergeException(exc); } } } finally { + done = true; synchronized(ConcurrentMergeScheduler.this) { + updateMergeThreads(); ConcurrentMergeScheduler.this.notifyAll(); - boolean removed = mergeThreads.remove(this); - assert removed; } } } - - public String toString() { - MergePolicy.OneMerge merge = getRunningMerge(); - if (merge == null) - merge = startMerge; - return "merge thread: " + merge.segString(dir); - } } /** Called when an exception is hit in a background merge * thread */ protected void handleMergeException(Throwable exc) { + try { + // When an exception is hit during merge, IndexWriter + // removes any partial files and then allows another + // merge to run. If whatever caused the error is not + // transient then the exception will keep happening, + // so, we sleep here to avoid saturating CPU in such + // cases: + Thread.sleep(250); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); + } throw new MergePolicy.MergeException(exc, dir); } - static boolean anyExceptions = false; - - /** Used for testing */ - public static boolean anyUnhandledExceptions() { - synchronized(allInstances) { - final int count = allInstances.size(); - // Make sure all outstanding threads are done so we see - // any exceptions they may produce: - for(int i=0;i events; - PrintStream infoStream; - int maxFieldLength = IndexWriter.DEFAULT_MAX_FIELD_LENGTH; - Similarity similarity; - - List newFiles; - - static class DocState { - DocumentsWriter docWriter; - Analyzer analyzer; - int maxFieldLength; - PrintStream infoStream; - Similarity similarity; - int docID; - Document doc; - String maxTermPrefix; - - // Only called by asserts - public boolean testPoint(String name) { - return docWriter.writer.testPoint(name); - } - } - - static class FlushState { - DocumentsWriter docWriter; - Directory directory; - String segmentName; - String docStoreSegmentName; - int numDocsInRAM; - int numDocsInStore; - Collection flushedFiles; - - public String segmentFileName(String ext) { - return segmentName + "." + ext; - } - } - - /** Consumer returns this on each doc. This holds any - * state that must be flushed synchronized "in docID - * order". We gather these and flush them in order. */ - abstract static class DocWriter { - DocWriter next; - int docID; - abstract void finish() throws IOException; - abstract void abort(); - abstract long sizeInBytes(); - - void setNext(DocWriter next) { - this.next = next; - } - }; - - final DocConsumer consumer; - - // Deletes done after the last flush; these are discarded - // on abort - private BufferedDeletes deletesInRAM = new BufferedDeletes(); - - // Deletes done before the last flush; these are still - // kept on abort - private BufferedDeletes deletesFlushed = new BufferedDeletes(); - - // The max number of delete terms that can be buffered before - // they must be flushed to disk. - private int maxBufferedDeleteTerms = IndexWriter.DEFAULT_MAX_BUFFERED_DELETE_TERMS; - - // How much RAM we can use before flushing. This is 0 if - // we are flushing by doc count instead. - private long ramBufferSize = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024); - private long waitQueuePauseBytes = (long) (ramBufferSize*0.1); - private long waitQueueResumeBytes = (long) (ramBufferSize*0.05); - - // If we've allocated 5% over our RAM budget, we then - // free down to 95% - private long freeTrigger = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024*1.05); - private long freeLevel = (long) (IndexWriter.DEFAULT_RAM_BUFFER_SIZE_MB*1024*1024*0.95); - - // Flush @ this number of docs. If ramBufferSize is - // non-zero we will flush by RAM usage instead. - private int maxBufferedDocs = IndexWriter.DEFAULT_MAX_BUFFERED_DOCS; - - private int flushedDocCount; // How many docs already flushed to index - - synchronized void updateFlushedDocCount(int n) { - flushedDocCount += n; - } - synchronized int getFlushedDocCount() { - return flushedDocCount; - } - synchronized void setFlushedDocCount(int n) { - flushedDocCount = n; - } - - private boolean closed; - - DocumentsWriter(Directory directory, IndexWriter writer) throws IOException { + + DocumentsWriter(IndexWriter writer, LiveIndexWriterConfig config, Directory directory) { this.directory = directory; + this.config = config; + this.infoStream = config.getInfoStream(); + this.perThreadPool = config.getIndexerThreadPool(); + flushPolicy = config.getFlushPolicy(); this.writer = writer; - this.similarity = writer.getSimilarity(); - flushedDocCount = writer.maxDoc(); - - /* - This is the current indexing chain: - - DocConsumer / DocConsumerPerThread - --> code: DocFieldProcessor / DocFieldProcessorPerThread - --> DocFieldConsumer / DocFieldConsumerPerThread / DocFieldConsumerPerField - --> code: DocFieldConsumers / DocFieldConsumersPerThread / DocFieldConsumersPerField - --> code: DocInverter / DocInverterPerThread / DocInverterPerField - --> InvertedDocConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: TermsHash / TermsHashPerThread / TermsHashPerField - --> TermsHashConsumer / TermsHashConsumerPerThread / TermsHashConsumerPerField - --> code: FreqProxTermsWriter / FreqProxTermsWriterPerThread / FreqProxTermsWriterPerField - --> code: TermVectorsTermsWriter / TermVectorsTermsWriterPerThread / TermVectorsTermsWriterPerField - --> InvertedDocEndConsumer / InvertedDocConsumerPerThread / InvertedDocConsumerPerField - --> code: NormsWriter / NormsWriterPerThread / NormsWriterPerField - --> code: StoredFieldsWriter / StoredFieldsWriterPerThread / StoredFieldsWriterPerField - */ - - // TODO FI: this should be something the user can pass in - // Build up indexing chain: - final TermsHashConsumer termVectorsWriter = new TermVectorsTermsWriter(this); - final TermsHashConsumer freqProxWriter = new FreqProxTermsWriter(); - - final InvertedDocConsumer termsHash = new TermsHash(this, true, freqProxWriter, - new TermsHash(this, false, termVectorsWriter, null)); - final NormsWriter normsWriter = new NormsWriter(); - final DocInverter docInverter = new DocInverter(termsHash, normsWriter); - final StoredFieldsWriter fieldsWriter = new StoredFieldsWriter(this); - final DocFieldConsumers docFieldConsumers = new DocFieldConsumers(docInverter, fieldsWriter); - consumer = docFieldProcessor = new DocFieldProcessor(this, docFieldConsumers); + this.events = new ConcurrentLinkedQueue<>(); + flushControl = new DocumentsWriterFlushControl(this, config, writer.bufferedUpdatesStream); } - - /** Returns true if any of the fields in the current - * buffered docs have omitTf==false */ - boolean hasProx() { - return docFieldProcessor.fieldInfos.hasProx(); + + synchronized boolean deleteQueries(final Query... queries) throws IOException { + // TODO why is this synchronized? + final DocumentsWriterDeleteQueue deleteQueue = this.deleteQueue; + deleteQueue.addDelete(queries); + flushControl.doOnDelete(); + return applyAllDeletes(deleteQueue); } - /** If non-null, various details of indexing are printed - * here. */ - synchronized void setInfoStream(PrintStream infoStream) { - this.infoStream = infoStream; - for(int i=0;i newFilesSet = new HashSet<>(); try { - message("docWriter: now abort"); - - // Forcefully remove waiting ThreadStates from line - waitQueue.abort(); - - // Wait for all other threads to finish with - // DocumentsWriter: - pauseAllThreads(); - - try { - - assert 0 == waitQueue.numWaiting; - - waitQueue.waitingBytes = 0; - + deleteQueue.clear(); + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "abort"); + } + final int limit = perThreadPool.getActiveThreadState(); + for (int i = 0; i < limit; i++) { + final ThreadState perThread = perThreadPool.getThreadState(i); + perThread.lock(); try { - abortedFiles = openFiles(); - } catch (Throwable t) { - abortedFiles = null; + abortThreadState(perThread, newFilesSet); + } finally { + perThread.unlock(); } - - deletesInRAM.clear(); - - openFiles.clear(); - - for(int i=0;i 0; - - assert nextDocID == numDocsInRAM; - assert waitQueue.numWaiting == 0; - assert waitQueue.waitingBytes == 0; - - initFlushState(false); - - docStoreOffset = numDocsInStore; - - if (infoStream != null) - message("flush postings as segment " + flushState.segmentName + " numDocs=" + numDocsInRAM); - boolean success = false; - try { - - if (closeDocStore) { - assert flushState.docStoreSegmentName != null; - assert flushState.docStoreSegmentName.equals(flushState.segmentName); - closeDocStore(); - flushState.numDocsInStore = 0; + deleteQueue.clear(); + final int limit = perThreadPool.getMaxThreadStates(); + final Set newFilesSet = new HashSet<>(); + for (int i = 0; i < limit; i++) { + final ThreadState perThread = perThreadPool.getThreadState(i); + perThread.lock(); + abortThreadState(perThread, newFilesSet); } - - Collection threads = new HashSet(); - for(int i=0;i newFiles) { + assert perThread.isHeldByCurrentThread(); + if (perThread.isActive()) { // we might be closed + if (perThread.isInitialized()) { + try { + subtractFlushedNumDocs(perThread.dwpt.getNumDocsInRAM()); + perThread.dwpt.abort(newFiles); + } finally { + perThread.dwpt.checkAndResetHasAborted(); + flushControl.doOnAbort(perThread); + } + } else { + flushControl.doOnAbort(perThread); + } + } else { + assert closed; + } } + + final synchronized void unlockAllAfterAbortAll(IndexWriter indexWriter) { + assert indexWriter.holdsFullFlushLock(); + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "unlockAll"); + } + final int limit = perThreadPool.getMaxThreadStates(); + for (int i = 0; i < limit; i++) { + try { + final ThreadState perThread = perThreadPool.getThreadState(i); + if (perThread.isHeldByCurrentThread()) { + perThread.unlock(); + } + } catch(Throwable e) { + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "unlockAll: could not unlock state: " + i + " msg:" + e.getMessage()); + } + // ignore & keep on unlocking + } + } + } - /** Set flushPending if it is not already set and returns - * whether it was set. This is used by IndexWriter to - * trigger a single flush even when multiple threads are - * trying to do so. */ - synchronized boolean setFlushPending() { - if (flushPending) - return false; - else { - flushPending = true; - return true; + boolean anyChanges() { + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "anyChanges? numDocsInRam=" + numDocsInRAM.get() + + " deletes=" + anyDeletions() + " hasTickets:" + + ticketQueue.hasTickets() + " pendingChangesInFullFlush: " + + pendingChangesInCurrentFullFlush); } + /* + * changes are either in a DWPT or in the deleteQueue. + * yet if we currently flush deletes and / or dwpt there + * could be a window where all changes are in the ticket queue + * before they are published to the IW. ie we need to check if the + * ticket queue has any tickets. + */ + return numDocsInRAM.get() != 0 || anyDeletions() || ticketQueue.hasTickets() || pendingChangesInCurrentFullFlush; } + + public int getBufferedDeleteTermsSize() { + return deleteQueue.getBufferedUpdatesTermsSize(); + } - synchronized void clearFlushPending() { - flushPending = false; + //for testing + public int getNumBufferedDeleteTerms() { + return deleteQueue.numGlobalTermDeletes(); } - synchronized void pushDeletes() { - deletesFlushed.update(deletesInRAM); + public boolean anyDeletions() { + return deleteQueue.anyChanges(); } - synchronized void close() { + @Override + public void close() { closed = true; - notifyAll(); + flushControl.setClosed(); } - synchronized void initSegmentName(boolean onlyDocStore) { - if (segment == null && (!onlyDocStore || docStoreSegment == null)) { - segment = writer.newSegmentName(); - assert numDocsInRAM == 0; - } - if (docStoreSegment == null) { - docStoreSegment = segment; - assert numDocsInStore == 0; - } - } + private boolean preUpdate() throws IOException { + ensureOpen(); + boolean hasEvents = false; + if (flushControl.anyStalledThreads() || flushControl.numQueuedFlushes() > 0) { + // Help out flushing any queued DWPTs so we can un-stall: + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "DocumentsWriter has queued dwpt; will hijack this thread to flush pending segment(s)"); + } + do { + // Try pick up pending threads here if possible + DocumentsWriterPerThread flushingDWPT; + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + // Don't push the delete here since the update could fail! + hasEvents |= doFlush(flushingDWPT); + } + + if (infoStream.isEnabled("DW")) { + if (flushControl.anyStalledThreads()) { + infoStream.message("DW", "WARNING DocumentsWriter has stalled threads; waiting"); + } + } + + flushControl.waitIfStalled(); // block if stalled + } while (flushControl.numQueuedFlushes() != 0); // still queued DWPTs try help flushing - /** Returns a free (idle) ThreadState that may be used for - * indexing this one document. This call also pauses if a - * flush is pending. If delTerm is non-null then we - * buffer this deleted term after the thread state has - * been acquired. */ - synchronized DocumentsWriterThreadState getThreadState(Document doc, Term delTerm) throws IOException { - - // First, find a thread state. If this thread already - // has affinity to a specific ThreadState, use that one - // again. - DocumentsWriterThreadState state = (DocumentsWriterThreadState) threadBindings.get(Thread.currentThread()); - if (state == null) { - - // First time this thread has called us since last - // flush. Find the least loaded thread state: - DocumentsWriterThreadState minThreadState = null; - for(int i=0;i= MAX_THREAD_STATE)) { - state = minThreadState; - state.numThreads++; - } else { - // Just create a new "private" thread state - DocumentsWriterThreadState[] newArray = new DocumentsWriterThreadState[1+threadStates.length]; - if (threadStates.length > 0) - System.arraycopy(threadStates, 0, newArray, 0, threadStates.length); - state = newArray[threadStates.length] = new DocumentsWriterThreadState(this); - threadStates = newArray; - } - threadBindings.put(Thread.currentThread(), state); } + return hasEvents; + } - // Next, wait until my thread state is idle (in case - // it's shared with other threads) and for threads to - // not be paused nor a flush pending: - waitReady(state); - - // Allocate segment name if this is the first doc since - // last flush: - initSegmentName(false); - - state.isIdle = false; - - boolean success = false; - try { - state.docState.docID = nextDocID; - - assert writer.testPoint("DocumentsWriter.ThreadState.init start"); - - if (delTerm != null) { - addDeleteTerm(delTerm, state.docState.docID); - state.doFlushAfter = timeToFlushDeletes(); + private boolean postUpdate(DocumentsWriterPerThread flushingDWPT, boolean hasEvents) throws IOException { + hasEvents |= applyAllDeletes(deleteQueue); + if (flushingDWPT != null) { + hasEvents |= doFlush(flushingDWPT); + } else { + final DocumentsWriterPerThread nextPendingFlush = flushControl.nextPendingFlush(); + if (nextPendingFlush != null) { + hasEvents |= doFlush(nextPendingFlush); } - - assert writer.testPoint("DocumentsWriter.ThreadState.init after delTerm"); - - nextDocID++; - numDocsInRAM++; - - // We must at this point commit to flushing to ensure we - // always get N docs when we flush by doc count, even if - // > 1 thread is adding documents: - if (!flushPending && - maxBufferedDocs != IndexWriter.DISABLE_AUTO_FLUSH - && numDocsInRAM >= maxBufferedDocs) { - flushPending = true; - state.doFlushAfter = true; - } - - success = true; - } finally { - if (!success) { - // Forcefully idle this ThreadState: - state.isIdle = true; - notifyAll(); - if (state.doFlushAfter) { - state.doFlushAfter = false; - flushPending = false; - } - } } - return state; + return hasEvents; } - - /** Returns true if the caller (IndexWriter) should now - * flush. */ - boolean addDocument(Document doc, Analyzer analyzer) - throws CorruptIndexException, IOException { - return updateDocument(doc, analyzer, null); + + private final void ensureInitialized(ThreadState state) throws IOException { + if (state.isActive() && state.dwpt == null) { + final FieldInfos.Builder infos = new FieldInfos.Builder( + writer.globalFieldNumberMap); + state.dwpt = new DocumentsWriterPerThread(writer.newSegmentName(), + directory, config, infoStream, deleteQueue, infos, + writer.pendingNumDocs); + } } - boolean updateDocument(Term t, Document doc, Analyzer analyzer) - throws CorruptIndexException, IOException { - return updateDocument(doc, analyzer, t); - } + boolean updateDocuments(final Iterable> docs, final Analyzer analyzer, + final Term delTerm) throws IOException { + boolean hasEvents = preUpdate(); - boolean updateDocument(Document doc, Analyzer analyzer, Term delTerm) - throws CorruptIndexException, IOException { - - // This call is synchronized but fast - final DocumentsWriterThreadState state = getThreadState(doc, delTerm); - - final DocState docState = state.docState; - docState.doc = doc; - docState.analyzer = analyzer; - - boolean success = false; + final ThreadState perThread = flushControl.obtainAndLock(); + final DocumentsWriterPerThread flushingDWPT; + try { - // This call is not synchronized and does all the - // work - final DocWriter perDoc = state.consumer.processDocument(); - - // This call is synchronized but fast - finishDocument(state, perDoc); - success = true; - } finally { - if (!success) { - synchronized(this) { - - if (aborting) { - state.isIdle = true; - notifyAll(); - abort(); - } else { - skipDocWriter.docID = docState.docID; - boolean success2 = false; - try { - waitQueue.add(skipDocWriter); - success2 = true; - } finally { - if (!success2) { - state.isIdle = true; - notifyAll(); - abort(); - return false; - } - } - - state.isIdle = true; - notifyAll(); - - // If this thread state had decided to flush, we - // must clear it so another thread can flush - if (state.doFlushAfter) { - state.doFlushAfter = false; - flushPending = false; - notifyAll(); - } - - // Immediately mark this document as deleted - // since likely it was partially added. This - // keeps indexing as "all or none" (atomic) when - // adding a document: - addDeleteDocID(state.docState.docID); + if (!perThread.isActive()) { + ensureOpen(); + assert false: "perThread is not active but we are still open"; + } + ensureInitialized(perThread); + assert perThread.isInitialized(); + final DocumentsWriterPerThread dwpt = perThread.dwpt; + final int dwptNumDocs = dwpt.getNumDocsInRAM(); + try { + dwpt.updateDocuments(docs, analyzer, delTerm); + } finally { + // We don't know how many documents were actually + // counted as indexed, so we must subtract here to + // accumulate our separate counter: + numDocsInRAM.addAndGet(dwpt.getNumDocsInRAM() - dwptNumDocs); + if (dwpt.checkAndResetHasAborted()) { + if (!dwpt.pendingFilesToDelete().isEmpty()) { + putEvent(new DeleteNewFilesEvent(dwpt.pendingFilesToDelete())); } + subtractFlushedNumDocs(dwptNumDocs); + flushControl.doOnAbort(perThread); } } + final boolean isUpdate = delTerm != null; + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); + } finally { + perThreadPool.release(perThread); } - return state.doFlushAfter || timeToFlushDeletes(); + return postUpdate(flushingDWPT, hasEvents); } - // for testing - synchronized int getNumBufferedDeleteTerms() { - return deletesInRAM.numTerms; - } + boolean updateDocument(final Iterable doc, final Analyzer analyzer, + final Term delTerm) throws IOException { - // for testing - synchronized HashMap getBufferedDeleteTerms() { - return deletesInRAM.terms; - } + boolean hasEvents = preUpdate(); - /** Called whenever a merge has completed and the merged segments had deletions */ - synchronized void remapDeletes(SegmentInfos infos, int[][] docMaps, int[] delCounts, MergePolicy.OneMerge merge, int mergeDocCount) { - if (docMaps == null) - // The merged segments had no deletes so docIDs did not change and we have nothing to do - return; - MergeDocIDRemapper mapper = new MergeDocIDRemapper(infos, docMaps, delCounts, merge, mergeDocCount); - deletesInRAM.remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); - deletesFlushed.remap(mapper, infos, docMaps, delCounts, merge, mergeDocCount); - flushedDocCount -= mapper.docShift; - } + final ThreadState perThread = flushControl.obtainAndLock(); - synchronized private void waitReady(DocumentsWriterThreadState state) { - - while (!closed && ((state != null && !state.isIdle) || pauseThreads != 0 || flushPending || aborting)) { - try { - wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); + final DocumentsWriterPerThread flushingDWPT; + try { + if (!perThread.isActive()) { + ensureOpen(); + assert false: "perThread is not active but we are still open"; } - } - - if (closed) - throw new AlreadyClosedException("this IndexWriter is closed"); - } - - synchronized boolean bufferDeleteTerms(Term[] terms) throws IOException { - waitReady(null); - for (int i = 0; i < terms.length; i++) - addDeleteTerm(terms[i], numDocsInRAM); - return timeToFlushDeletes(); - } - - synchronized boolean bufferDeleteTerm(Term term) throws IOException { - waitReady(null); - addDeleteTerm(term, numDocsInRAM); - return timeToFlushDeletes(); - } - - synchronized boolean bufferDeleteQueries(Query[] queries) throws IOException { - waitReady(null); - for (int i = 0; i < queries.length; i++) - addDeleteQuery(queries[i], numDocsInRAM); - return timeToFlushDeletes(); - } - - synchronized boolean bufferDeleteQuery(Query query) throws IOException { - waitReady(null); - addDeleteQuery(query, numDocsInRAM); - return timeToFlushDeletes(); - } - - synchronized boolean deletesFull() { - return maxBufferedDeleteTerms != IndexWriter.DISABLE_AUTO_FLUSH - && ((deletesInRAM.numTerms + deletesInRAM.queries.size() + deletesInRAM.docIDs.size()) >= maxBufferedDeleteTerms); - } - - synchronized private boolean timeToFlushDeletes() { - return (bufferIsFull || deletesFull()) && setFlushPending(); - } - - void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { - this.maxBufferedDeleteTerms = maxBufferedDeleteTerms; - } - - int getMaxBufferedDeleteTerms() { - return maxBufferedDeleteTerms; - } - - synchronized boolean hasDeletes() { - return deletesFlushed.any(); - } - - synchronized boolean applyDeletes(SegmentInfos infos) throws IOException { - - if (!hasDeletes()) - return false; - - if (infoStream != null) - message("apply " + deletesFlushed.numTerms + " buffered deleted terms and " + - deletesFlushed.docIDs.size() + " deleted docIDs and " + - deletesFlushed.queries.size() + " deleted queries on " + - + infos.size() + " segments."); - - final int infosEnd = infos.size(); - - int docStart = 0; - boolean any = false; - for (int i = 0; i < infosEnd; i++) { - IndexReader reader = SegmentReader.get(infos.info(i), false); - boolean success = false; + ensureInitialized(perThread); + assert perThread.isInitialized(); + final DocumentsWriterPerThread dwpt = perThread.dwpt; + final int dwptNumDocs = dwpt.getNumDocsInRAM(); try { - any |= applyDeletes(reader, docStart); - docStart += reader.maxDoc(); - success = true; + dwpt.updateDocument(doc, analyzer, delTerm); } finally { - if (reader != null) { - try { - if (success) - reader.doCommit(); - } finally { - reader.doClose(); + // We don't know whether the document actually + // counted as being indexed, so we must subtract here to + // accumulate our separate counter: + numDocsInRAM.addAndGet(dwpt.getNumDocsInRAM() - dwptNumDocs); + if (dwpt.checkAndResetHasAborted()) { + if (!dwpt.pendingFilesToDelete().isEmpty()) { + putEvent(new DeleteNewFilesEvent(dwpt.pendingFilesToDelete())); } + subtractFlushedNumDocs(dwptNumDocs); + flushControl.doOnAbort(perThread); } } + final boolean isUpdate = delTerm != null; + flushingDWPT = flushControl.doAfterDocument(perThread, isUpdate); + } finally { + perThreadPool.release(perThread); } - deletesFlushed.clear(); - - return any; + return postUpdate(flushingDWPT, hasEvents); } - // Apply buffered delete terms, queries and docIDs to the - // provided reader - private final synchronized boolean applyDeletes(IndexReader reader, int docIDStart) - throws CorruptIndexException, IOException { - - final int docEnd = docIDStart + reader.maxDoc(); - boolean any = false; - - // Delete by term - Iterator iter = deletesFlushed.terms.entrySet().iterator(); - while (iter.hasNext()) { - Entry entry = (Entry) iter.next(); - Term term = (Term) entry.getKey(); - - TermDocs docs = reader.termDocs(term); - if (docs != null) { - int limit = ((BufferedDeletes.Num) entry.getValue()).getNum(); + private boolean doFlush(DocumentsWriterPerThread flushingDWPT) throws IOException { + boolean hasEvents = false; + while (flushingDWPT != null) { + hasEvents = true; + boolean success = false; + SegmentFlushTicket ticket = null; + try { + assert currentFullFlushDelQueue == null + || flushingDWPT.deleteQueue == currentFullFlushDelQueue : "expected: " + + currentFullFlushDelQueue + "but was: " + flushingDWPT.deleteQueue + + " " + flushControl.isFullFlush(); + /* + * Since with DWPT the flush process is concurrent and several DWPT + * could flush at the same time we must maintain the order of the + * flushes before we can apply the flushed segment and the frozen global + * deletes it is buffering. The reason for this is that the global + * deletes mark a certain point in time where we took a DWPT out of + * rotation and freeze the global deletes. + * + * Example: A flush 'A' starts and freezes the global deletes, then + * flush 'B' starts and freezes all deletes occurred since 'A' has + * started. if 'B' finishes before 'A' we need to wait until 'A' is done + * otherwise the deletes frozen by 'B' are not applied to 'A' and we + * might miss to deletes documents in 'A'. + */ try { - while (docs.next()) { - int docID = docs.doc(); - if (docIDStart+docID >= limit) - break; - reader.deleteDocument(docID); - any = true; + // Each flush is assigned a ticket in the order they acquire the ticketQueue lock + ticket = ticketQueue.addFlushTicket(flushingDWPT); + + final int flushingDocsInRam = flushingDWPT.getNumDocsInRAM(); + boolean dwptSuccess = false; + try { + // flush concurrently without locking + final FlushedSegment newSegment = flushingDWPT.flush(); + ticketQueue.addSegment(ticket, newSegment); + dwptSuccess = true; + } finally { + subtractFlushedNumDocs(flushingDocsInRam); + if (!flushingDWPT.pendingFilesToDelete().isEmpty()) { + putEvent(new DeleteNewFilesEvent(flushingDWPT.pendingFilesToDelete())); + hasEvents = true; + } + if (!dwptSuccess) { + putEvent(new FlushFailedEvent(flushingDWPT.getSegmentInfo())); + hasEvents = true; + } } + // flush was successful once we reached this point - new seg. has been assigned to the ticket! + success = true; } finally { - docs.close(); + if (!success && ticket != null) { + // In the case of a failure make sure we are making progress and + // apply all the deletes since the segment flush failed since the flush + // ticket could hold global deletes see FlushTicket#canPublish() + ticketQueue.markTicketFailed(ticket); + } } + /* + * Now we are done and try to flush the ticket queue if the head of the + * queue has already finished the flush. + */ + if (ticketQueue.getTicketCount() >= perThreadPool.getActiveThreadState()) { + // This means there is a backlog: the one + // thread in innerPurge can't keep up with all + // other threads flushing segments. In this case + // we forcefully stall the producers. + putEvent(ForcedPurgeEvent.INSTANCE); + break; + } + } finally { + flushControl.doAfterFlush(flushingDWPT); + flushingDWPT.checkAndResetHasAborted(); } + + flushingDWPT = flushControl.nextPendingFlush(); } - - // Delete by docID - iter = deletesFlushed.docIDs.iterator(); - while(iter.hasNext()) { - int docID = ((Integer) iter.next()).intValue(); - if (docID >= docIDStart && docID < docEnd) { - reader.deleteDocument(docID-docIDStart); - any = true; - } + if (hasEvents) { + putEvent(MergePendingEvent.INSTANCE); } - - // Delete by query - IndexSearcher searcher = new IndexSearcher(reader); - iter = deletesFlushed.queries.entrySet().iterator(); - while(iter.hasNext()) { - Entry entry = (Entry) iter.next(); - Query query = (Query) entry.getKey(); - int limit = ((Integer) entry.getValue()).intValue(); - Weight weight = query.weight(searcher); - Scorer scorer = weight.scorer(reader); - while(scorer.next()) { - final int docID = scorer.doc(); - if (docIDStart + docID >= limit) - break; - reader.deleteDocument(docID); - any = true; + // If deletes alone are consuming > 1/2 our RAM + // buffer, force them all to apply now. This is to + // prevent too-frequent flushing of a long tail of + // tiny segments: + final double ramBufferSizeMB = config.getRAMBufferSizeMB(); + if (ramBufferSizeMB != IndexWriterConfig.DISABLE_AUTO_FLUSH && + flushControl.getDeleteBytesUsed() > (1024*1024*ramBufferSizeMB/2)) { + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "force apply deletes bytesUsed=" + flushControl.getDeleteBytesUsed() + " vs ramBuffer=" + (1024*1024*ramBufferSizeMB)); } + hasEvents = true; + if (!this.applyAllDeletes(deleteQueue)) { + putEvent(ApplyDeletesEvent.INSTANCE); + } } - searcher.close(); - return any; - } - // Buffer a term in bufferedDeleteTerms, which records the - // current number of documents buffered in ram so that the - // delete term will be applied to those documents as well - // as the disk segments. - synchronized private void addDeleteTerm(Term term, int docCount) { - BufferedDeletes.Num num = (BufferedDeletes.Num) deletesInRAM.terms.get(term); - final int docIDUpto = flushedDocCount + docCount; - if (num == null) - deletesInRAM.terms.put(term, new BufferedDeletes.Num(docIDUpto)); - else - num.setNum(docIDUpto); - deletesInRAM.numTerms++; + return hasEvents; } - - // Buffer a specific docID for deletion. Currently only - // used when we hit a exception when adding a document - synchronized private void addDeleteDocID(int docID) { - deletesInRAM.docIDs.add(new Integer(flushedDocCount+docID)); - } - - synchronized private void addDeleteQuery(Query query, int docID) { - deletesInRAM.queries.put(query, new Integer(flushedDocCount + docID)); - } - - synchronized boolean doBalanceRAM() { - return ramBufferSize != IndexWriter.DISABLE_AUTO_FLUSH && !bufferIsFull && (numBytesUsed >= ramBufferSize || numBytesAlloc >= freeTrigger); - } - - /** Does the synchronized work to finish/flush the - * inverted document. */ - private void finishDocument(DocumentsWriterThreadState perThread, DocWriter docWriter) throws IOException { - - if (doBalanceRAM()) - // Must call this w/o holding synchronized(this) else - // we'll hit deadlock: - balanceRAM(); - - synchronized(this) { - - assert docWriter == null || docWriter.docID == perThread.docState.docID; - - - if (aborting) { - - // We are currently aborting, and another thread is - // waiting for me to become idle. We just forcefully - // idle this threadState; it will be fully reset by - // abort() - if (docWriter != null) - try { - docWriter.abort(); - } catch (Throwable t) { - } - - perThread.isIdle = true; - notifyAll(); - return; - } - - final boolean doPause; - - if (docWriter != null) - doPause = waitQueue.add(docWriter); - else { - skipDocWriter.docID = perThread.docState.docID; - doPause = waitQueue.add(skipDocWriter); - } - - if (doPause) - waitForWaitQueue(); - - if (bufferIsFull && !flushPending) { - flushPending = true; - perThread.doFlushAfter = true; - } - - perThread.isIdle = true; - notifyAll(); + + final void subtractFlushedNumDocs(int numFlushed) { + int oldValue = numDocsInRAM.get(); + while (!numDocsInRAM.compareAndSet(oldValue, oldValue - numFlushed)) { + oldValue = numDocsInRAM.get(); } + assert numDocsInRAM.get() >= 0; } + + // for asserts + private volatile DocumentsWriterDeleteQueue currentFullFlushDelQueue = null; - synchronized void waitForWaitQueue() { - do { - try { - wait(); - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - } - } while (!waitQueue.doResume()); + // for asserts + private synchronized boolean setFlushingDeleteQueue(DocumentsWriterDeleteQueue session) { + currentFullFlushDelQueue = session; + return true; } - - private static class SkipDocWriter extends DocWriter { - void finish() { + + /* + * FlushAllThreads is synced by IW fullFlushLock. Flushing all threads is a + * two stage operation; the caller must ensure (in try/finally) that finishFlush + * is called after this method, to release the flush lock in DWFlushControl + */ + final boolean flushAllThreads(final IndexWriter indexWriter) + throws IOException { + final DocumentsWriterDeleteQueue flushingDeleteQueue; + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", "startFullFlush"); } - void abort() { + + synchronized (this) { + pendingChangesInCurrentFullFlush = anyChanges(); + flushingDeleteQueue = deleteQueue; + /* Cutover to a new delete queue. This must be synced on the flush control + * otherwise a new DWPT could sneak into the loop with an already flushing + * delete queue */ + flushControl.markForFullFlush(); // swaps the delQueue synced on FlushControl + assert setFlushingDeleteQueue(flushingDeleteQueue); } - long sizeInBytes() { - return 0; - } - } - final SkipDocWriter skipDocWriter = new SkipDocWriter(); - - long getRAMUsed() { - return numBytesUsed; - } - - long numBytesAlloc; - long numBytesUsed; - - NumberFormat nf = NumberFormat.getInstance(); - - // TODO FI: this is not flexible -- we can't hardwire - // extensions in here: - private long segmentSize(String segmentName) throws IOException { - // Used only when infoStream != null - assert infoStream != null; + assert currentFullFlushDelQueue != null; + assert currentFullFlushDelQueue != deleteQueue; - long size = directory.fileLength(segmentName + ".tii") + - directory.fileLength(segmentName + ".tis") + - directory.fileLength(segmentName + ".frq") + - directory.fileLength(segmentName + ".prx"); - - final String normFileName = segmentName + ".nrm"; - if (directory.fileExists(normFileName)) - size += directory.fileLength(normFileName); - - return size; - } - - // Coarse estimates used to measure RAM usage of buffered deletes - final static int OBJECT_HEADER_BYTES = 8; - final static int POINTER_NUM_BYTE = 4; - final static int INT_NUM_BYTE = 4; - final static int CHAR_NUM_BYTE = 2; - - /* Initial chunks size of the shared byte[] blocks used to - store postings data */ - final static int BYTE_BLOCK_SHIFT = 15; - final static int BYTE_BLOCK_SIZE = (int) (1 << BYTE_BLOCK_SHIFT); - final static int BYTE_BLOCK_MASK = BYTE_BLOCK_SIZE - 1; - final static int BYTE_BLOCK_NOT_MASK = ~BYTE_BLOCK_MASK; - - private class ByteBlockAllocator extends ByteBlockPool.Allocator { - - ArrayList freeByteBlocks = new ArrayList(); - - /* Allocate another byte[] from the shared pool */ - byte[] getByteBlock(boolean trackAllocations) { - synchronized(DocumentsWriter.this) { - final int size = freeByteBlocks.size(); - final byte[] b; - if (0 == size) { - // Always record a block allocated, even if - // trackAllocations is false. This is necessary - // because this block will be shared between - // things that don't track allocations (term - // vectors) and things that do (freq/prox - // postings). - numBytesAlloc += BYTE_BLOCK_SIZE; - b = new byte[BYTE_BLOCK_SIZE]; - } else - b = (byte[]) freeByteBlocks.remove(size-1); - if (trackAllocations) - numBytesUsed += BYTE_BLOCK_SIZE; - assert numBytesUsed <= numBytesAlloc; - return b; + boolean anythingFlushed = false; + try { + DocumentsWriterPerThread flushingDWPT; + // Help out with flushing: + while ((flushingDWPT = flushControl.nextPendingFlush()) != null) { + anythingFlushed |= doFlush(flushingDWPT); } + // If a concurrent flush is still in flight wait for it + flushControl.waitForFlush(); + if (!anythingFlushed && flushingDeleteQueue.anyChanges()) { // apply deletes if we did not flush any document + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", Thread.currentThread().getName() + ": flush naked frozen global deletes"); + } + ticketQueue.addDeletes(flushingDeleteQueue); + } + ticketQueue.forcePurge(indexWriter); + assert !flushingDeleteQueue.anyChanges() && !ticketQueue.hasTickets(); + } finally { + assert flushingDeleteQueue == currentFullFlushDelQueue; } + return anythingFlushed; + } + + final void finishFullFlush(boolean success) { + try { + if (infoStream.isEnabled("DW")) { + infoStream.message("DW", Thread.currentThread().getName() + " finishFullFlush success=" + success); + } + assert setFlushingDeleteQueue(null); + if (success) { + // Release the flush lock + flushControl.finishFullFlush(); + } else { + Set newFilesSet = new HashSet<>(); + flushControl.abortFullFlushes(newFilesSet); + putEvent(new DeleteNewFilesEvent(newFilesSet)); - /* Return byte[]'s to the pool */ - void recycleByteBlocks(byte[][] blocks, int start, int end) { - synchronized(DocumentsWriter.this) { - for(int i=start;i freeTrigger) { - - if (infoStream != null) - message(" RAM: now balance allocations: usedMB=" + toMB(numBytesUsed) + - " vs trigger=" + toMB(flushTrigger) + - " allocMB=" + toMB(numBytesAlloc) + - " vs trigger=" + toMB(freeTrigger) + - " byteBlockFree=" + toMB(byteBlockAllocator.freeByteBlocks.size()*BYTE_BLOCK_SIZE) + - " charBlockFree=" + toMB(freeCharBlocks.size()*CHAR_BLOCK_SIZE*CHAR_NUM_BYTE)); - - final long startBytesAlloc = numBytesAlloc; - - int iter = 0; - - // We free equally from each pool in 32 KB - // chunks until we are below our threshold - // (freeLevel) - - boolean any = true; - - while(numBytesAlloc > freeLevel) { - - synchronized(this) { - if (0 == byteBlockAllocator.freeByteBlocks.size() && 0 == freeCharBlocks.size() && 0 == freeIntBlocks.size() && !any) { - // Nothing else to free -- must flush now. - bufferIsFull = numBytesUsed > flushTrigger; - if (infoStream != null) { - if (numBytesUsed > flushTrigger) - message(" nothing to free; now set bufferIsFull"); - else - message(" nothing to free"); - } - assert numBytesUsed <= numBytesAlloc; - break; - } - - if ((0 == iter % 4) && byteBlockAllocator.freeByteBlocks.size() > 0) { - byteBlockAllocator.freeByteBlocks.remove(byteBlockAllocator.freeByteBlocks.size()-1); - numBytesAlloc -= BYTE_BLOCK_SIZE; - } - - if ((1 == iter % 4) && freeCharBlocks.size() > 0) { - freeCharBlocks.remove(freeCharBlocks.size()-1); - numBytesAlloc -= CHAR_BLOCK_SIZE * CHAR_NUM_BYTE; - } - - if ((2 == iter % 4) && freeIntBlocks.size() > 0) { - freeIntBlocks.remove(freeIntBlocks.size()-1); - numBytesAlloc -= INT_BLOCK_SIZE * INT_NUM_BYTE; - } - } - - if ((3 == iter % 4) && any) - // Ask consumer to free any recycled state - any = consumer.freeRAM(); - - iter++; - } - - if (infoStream != null) - message(" after free: freedMB=" + nf.format((startBytesAlloc-numBytesAlloc)/1024./1024.) + " usedMB=" + nf.format(numBytesUsed/1024./1024.) + " allocMB=" + nf.format(numBytesAlloc/1024./1024.)); - - } else { - // If we have not crossed the 100% mark, but have - // crossed the 95% mark of RAM we are actually - // using, go ahead and flush. This prevents - // over-allocating and then freeing, with every - // flush. - synchronized(this) { - - if (numBytesUsed > flushTrigger) { - if (infoStream != null) - message(" RAM: now flush @ usedMB=" + nf.format(numBytesUsed/1024./1024.) + - " allocMB=" + nf.format(numBytesAlloc/1024./1024.) + - " triggerMB=" + nf.format(flushTrigger/1024./1024.)); - - bufferIsFull = true; - } - } + + static final class MergePendingEvent implements Event { + static final Event INSTANCE = new MergePendingEvent(); + private int instCount = 0; + private MergePendingEvent() { + assert instCount == 0; + instCount++; } + + @Override + public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException { + writer.doAfterSegmentFlushed(triggerMerge, forcePurge); + } } - - final WaitQueue waitQueue = new WaitQueue(); - - private class WaitQueue { - DocWriter[] waiting; - int nextWriteDocID; - int nextWriteLoc; - int numWaiting; - long waitingBytes; - - public WaitQueue() { - waiting = new DocWriter[10]; + + static final class ForcedPurgeEvent implements Event { + static final Event INSTANCE = new ForcedPurgeEvent(); + private int instCount = 0; + private ForcedPurgeEvent() { + assert instCount == 0; + instCount++; } - - synchronized void reset() { - // NOTE: nextWriteLoc doesn't need to be reset - assert numWaiting == 0; - assert waitingBytes == 0; - nextWriteDocID = 0; + + @Override + public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException { + writer.purge(true); } - - synchronized boolean doResume() { - return waitingBytes <= waitQueueResumeBytes; + } + + static class FlushFailedEvent implements Event { + private final SegmentInfo info; + + public FlushFailedEvent(SegmentInfo info) { + this.info = info; } - - synchronized boolean doPause() { - return waitingBytes > waitQueuePauseBytes; + + @Override + public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException { + writer.flushFailed(info); } - - synchronized void abort() { - int count = 0; - for(int i=0;i files; + + public DeleteNewFilesEvent(Collection files) { + this.files = files; } - - private void writeDocument(DocWriter doc) throws IOException { - assert doc == skipDocWriter || nextWriteDocID == doc.docID; - boolean success = false; - try { - doc.finish(); - nextWriteDocID++; - numDocsInStore++; - nextWriteLoc++; - assert nextWriteLoc <= waiting.length; - if (nextWriteLoc == waiting.length) - nextWriteLoc = 0; - success = true; - } finally { - if (!success) - setAborting(); - } + + @Override + public void process(IndexWriter writer, boolean triggerMerge, boolean forcePurge) throws IOException { + writer.deleteNewFiles(files); } + } - synchronized public boolean add(DocWriter doc) throws IOException { - - assert doc.docID >= nextWriteDocID; - - if (doc.docID == nextWriteDocID) { - writeDocument(doc); - while(true) { - doc = waiting[nextWriteLoc]; - if (doc != null) { - numWaiting--; - waiting[nextWriteLoc] = null; - waitingBytes -= doc.sizeInBytes(); - writeDocument(doc); - } else - break; - } - } else { - - // I finished before documents that were added - // before me. This can easily happen when I am a - // small doc and the docs before me were large, or, - // just due to luck in the thread scheduling. Just - // add myself to the queue and when that large doc - // finishes, it will flush me: - int gap = doc.docID - nextWriteDocID; - if (gap >= waiting.length) { - // Grow queue - DocWriter[] newArray = new DocWriter[ArrayUtil.getNextSize(gap)]; - assert nextWriteLoc >= 0; - System.arraycopy(waiting, nextWriteLoc, newArray, 0, waiting.length-nextWriteLoc); - System.arraycopy(waiting, 0, newArray, waiting.length-nextWriteLoc, nextWriteLoc); - nextWriteLoc = 0; - waiting = newArray; - gap = doc.docID - nextWriteDocID; - } - - int loc = nextWriteLoc + gap; - if (loc >= waiting.length) - loc -= waiting.length; - - // We should only wrap one time - assert loc < waiting.length; - - // Nobody should be in my spot! - assert waiting[loc] == null; - waiting[loc] = doc; - numWaiting++; - waitingBytes += doc.sizeInBytes(); - } - - return doPause(); - } + public Queue eventQueue() { + return events; } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterDeleteQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterFlushControl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterFlushQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterPerThreadPool.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterStallControl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/DocumentsWriterThreadState.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/FieldInfo.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/FieldInfo.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/FieldInfo.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/FieldInfo.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,86 +17,327 @@ * limitations under the License. */ -final class FieldInfo { - String name; - boolean isIndexed; - int number; +import java.util.HashMap; +import java.util.Map; - // true if term vector for this field should be stored - boolean storeTermVector; - boolean storeOffsetWithTermVector; - boolean storePositionWithTermVector; +/** + * Access to the Field Info file that describes document fields and whether or + * not they are indexed. Each segment has a separate Field Info file. Objects + * of this class are thread-safe for multiple readers, but only one thread can + * be adding documents at a time, with no other reader or writer threads + * accessing this object. + **/ - boolean omitNorms; // omit norms associated with indexed fields - boolean omitTf; // omit tf - - boolean storePayloads; // whether this field stores payloads together with term positions +public final class FieldInfo { + /** Field's name */ + public final String name; + /** Internal field number */ + public final int number; - FieldInfo(String na, boolean tk, int nu, boolean storeTermVector, - boolean storePositionWithTermVector, boolean storeOffsetWithTermVector, - boolean omitNorms, boolean storePayloads, boolean omitTf) { - name = na; - isIndexed = tk; - number = nu; - this.storeTermVector = storeTermVector; - this.storeOffsetWithTermVector = storeOffsetWithTermVector; - this.storePositionWithTermVector = storePositionWithTermVector; - this.omitNorms = omitNorms; - this.storePayloads = storePayloads; - this.omitTf = omitTf; + private boolean indexed; + private DocValuesType docValueType; + + // True if any document indexed term vectors + private boolean storeTermVector; + + private DocValuesType normType; + private boolean omitNorms; // omit norms associated with indexed fields + private IndexOptions indexOptions; + private boolean storePayloads; // whether this field stores payloads together with term positions + + private Map attributes; + + private long dvGen; + + /** + * Controls how much information is stored in the postings lists. + * @lucene.experimental + */ + public static enum IndexOptions { + // NOTE: order is important here; FieldInfo uses this + // order to merge two conflicting IndexOptions (always + // "downgrades" by picking the lowest). + /** + * Only documents are indexed: term frequencies and positions are omitted. + * Phrase and other positional queries on the field will throw an exception, and scoring + * will behave as if any term in the document appears only once. + */ + // TODO: maybe rename to just DOCS? + DOCS_ONLY, + /** + * Only documents and term frequencies are indexed: positions are omitted. + * This enables normal scoring, except Phrase and other positional queries + * will throw an exception. + */ + DOCS_AND_FREQS, + /** + * Indexes documents, frequencies and positions. + * This is a typical default for full-text search: full scoring is enabled + * and positional queries are supported. + */ + DOCS_AND_FREQS_AND_POSITIONS, + /** + * Indexes documents, frequencies, positions and offsets. + * Character offsets are encoded alongside the positions. + */ + DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, } + + /** + * DocValues types. + * Note that DocValues is strongly typed, so a field cannot have different types + * across different documents. + */ + public static enum DocValuesType { + /** + * A per-document Number + */ + NUMERIC, + /** + * A per-document byte[]. Values may be larger than + * 32766 bytes, but different codecs may enforce their own limits. + */ + BINARY, + /** + * A pre-sorted byte[]. Fields with this type only store distinct byte values + * and store an additional offset pointer per document to dereference the shared + * byte[]. The stored byte[] is presorted and allows access via document id, + * ordinal and by-value. Values must be <= 32766 bytes. + */ + SORTED, + /** + * A pre-sorted Number[]. Fields with this type store numeric values in sorted + * order according to {@link Long#compare(long, long)}. + */ + SORTED_NUMERIC, + /** + * A pre-sorted Set<byte[]>. Fields with this type only store distinct byte values + * and store additional offset pointers per document to dereference the shared + * byte[]s. The stored byte[] is presorted and allows access via document id, + * ordinal and by-value. Values must be <= 32766 bytes. + */ + SORTED_SET + } - public Object clone() { - return new FieldInfo(name, isIndexed, number, storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); + /** + * Sole Constructor. + * + * @lucene.experimental + */ + public FieldInfo(String name, boolean indexed, int number, boolean storeTermVector, boolean omitNorms, + boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normsType, + long dvGen, Map attributes) { + this.name = name; + this.indexed = indexed; + this.number = number; + this.docValueType = docValues; + if (indexed) { + this.storeTermVector = storeTermVector; + this.storePayloads = storePayloads; + this.omitNorms = omitNorms; + this.indexOptions = indexOptions; + this.normType = !omitNorms ? normsType : null; + } else { // for non-indexed fields, leave defaults + this.storeTermVector = false; + this.storePayloads = false; + this.omitNorms = false; + this.indexOptions = null; + this.normType = null; + } + this.dvGen = dvGen; + this.attributes = attributes; + assert checkConsistency(); } - void update(boolean isIndexed, boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) { - if (this.isIndexed != isIndexed) { - this.isIndexed = true; // once indexed, always index + private boolean checkConsistency() { + if (!indexed) { + assert !storeTermVector; + assert !storePayloads; + assert !omitNorms; + assert normType == null; + assert indexOptions == null; + } else { + assert indexOptions != null; + if (omitNorms) { + assert normType == null; + } + // Cannot store payloads unless positions are indexed: + assert indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 || !this.storePayloads; } - if (this.storeTermVector != storeTermVector) { - this.storeTermVector = true; // once vector, always vector + + if (dvGen != -1) { + assert docValueType != null; } - if (this.storePositionWithTermVector != storePositionWithTermVector) { - this.storePositionWithTermVector = true; // once vector, always vector + + return true; + } + + void update(IndexableFieldType ft) { + update(ft.indexed(), false, ft.omitNorms(), false, ft.indexOptions()); + } + + // should only be called by FieldInfos#addOrUpdate + void update(boolean indexed, boolean storeTermVector, boolean omitNorms, boolean storePayloads, IndexOptions indexOptions) { + //System.out.println("FI.update field=" + name + " indexed=" + indexed + " omitNorms=" + omitNorms + " this.omitNorms=" + this.omitNorms); + this.indexed |= indexed; // once indexed, always indexed + if (indexed) { // if updated field data is not for indexing, leave the updates out + this.storeTermVector |= storeTermVector; // once vector, always vector + this.storePayloads |= storePayloads; + if (this.omitNorms != omitNorms) { + this.omitNorms = true; // if one require omitNorms at least once, it remains off for life + this.normType = null; + } + if (this.indexOptions != indexOptions) { + if (this.indexOptions == null) { + this.indexOptions = indexOptions; + } else { + // downgrade + this.indexOptions = this.indexOptions.compareTo(indexOptions) < 0 ? this.indexOptions : indexOptions; + } + if (this.indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) < 0) { + // cannot store payloads if we don't store positions: + this.storePayloads = false; + } + } } - if (this.storeOffsetWithTermVector != storeOffsetWithTermVector) { - this.storeOffsetWithTermVector = true; // once vector, always vector + assert checkConsistency(); + } + + void setDocValuesType(DocValuesType type) { + if (docValueType != null && docValueType != type) { + throw new IllegalArgumentException("cannot change DocValues type from " + docValueType + " to " + type + " for field \"" + name + "\""); } - if (this.omitNorms != omitNorms) { - this.omitNorms = false; // once norms are stored, always store + docValueType = type; + assert checkConsistency(); + } + + /** Returns IndexOptions for the field, or null if the field is not indexed */ + public IndexOptions getIndexOptions() { + return indexOptions; + } + + /** + * Returns true if this field has any docValues. + */ + public boolean hasDocValues() { + return docValueType != null; + } + + /** + * Returns {@link DocValuesType} of the docValues. this may be null if the field has no docvalues. + */ + public DocValuesType getDocValuesType() { + return docValueType; + } + + /** Sets the docValues generation of this field. */ + void setDocValuesGen(long dvGen) { + this.dvGen = dvGen; + assert checkConsistency(); + } + + /** + * Returns the docValues generation of this field, or -1 if no docValues + * updates exist for it. + */ + public long getDocValuesGen() { + return dvGen; + } + + /** + * Returns {@link DocValuesType} of the norm. this may be null if the field has no norms. + */ + public DocValuesType getNormType() { + return normType; + } + + void setStoreTermVectors() { + storeTermVector = true; + assert checkConsistency(); + } + + void setStorePayloads() { + if (indexed && indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0) { + storePayloads = true; } - if (this.omitTf != omitTf) { - this.omitTf = true; // if one require omitTf at least once, it remains off for life - } - if (this.storePayloads != storePayloads) { - this.storePayloads = true; - } + assert checkConsistency(); } - void update(FieldInfo other) { - if (isIndexed != other.isIndexed) { - isIndexed = true; // once indexed, always index + void setNormValueType(DocValuesType type) { + if (normType != null && normType != type) { + throw new IllegalArgumentException("cannot change Norm type from " + normType + " to " + type + " for field \"" + name + "\""); } - if (storeTermVector != other.storeTermVector) { - storeTermVector = true; // once vector, always vector + normType = type; + assert checkConsistency(); + } + + /** + * Returns true if norms are explicitly omitted for this field + */ + public boolean omitsNorms() { + return omitNorms; + } + + /** + * Returns true if this field actually has any norms. + */ + public boolean hasNorms() { + return normType != null; + } + + /** + * Returns true if this field is indexed. + */ + public boolean isIndexed() { + return indexed; + } + + /** + * Returns true if any payloads exist for this field. + */ + public boolean hasPayloads() { + return storePayloads; + } + + /** + * Returns true if any term vectors exist for this field. + */ + public boolean hasVectors() { + return storeTermVector; + } + + /** + * Get a codec attribute value, or null if it does not exist + */ + public String getAttribute(String key) { + if (attributes == null) { + return null; + } else { + return attributes.get(key); } - if (storePositionWithTermVector != other.storePositionWithTermVector) { - storePositionWithTermVector = true; // once vector, always vector + } + + /** + * Puts a codec attribute value. + *

    + * This is a key-value mapping for the field that the codec can use + * to store additional metadata, and will be available to the codec + * when reading the segment via {@link #getAttribute(String)} + *

    + * If a value already exists for the field, it will be replaced with + * the new value. + */ + public String putAttribute(String key, String value) { + if (attributes == null) { + attributes = new HashMap<>(); } - if (storeOffsetWithTermVector != other.storeOffsetWithTermVector) { - storeOffsetWithTermVector = true; // once vector, always vector - } - if (omitNorms != other.omitNorms) { - omitNorms = false; // once norms are stored, always store - } - if (this.omitTf != omitTf) { - this.omitTf = true; // if one require omitTf at least once, it remains off for life - } - if (storePayloads != other.storePayloads) { - storePayloads = true; - } + return attributes.put(key, value); } + + /** + * Returns internal codec attributes map. May be null if no mappings exist. + */ + public Map attributes() { + return attributes; + } } Index: 3rdParty_sources/lucene/org/apache/lucene/index/FieldInfos.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/FieldInfos.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/FieldInfos.java 17 Aug 2012 14:54:59 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/FieldInfos.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,311 +17,337 @@ * limitations under the License. */ -import org.apache.lucene.document.Document; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.Iterator; +import java.util.Map; +import java.util.SortedMap; +import java.util.TreeMap; -import java.io.IOException; -import java.util.*; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfo.IndexOptions; -/** Access to the Fieldable Info file that describes document fields and whether or - * not they are indexed. Each segment has a separate Fieldable Info file. Objects - * of this class are thread-safe for multiple readers, but only one thread can - * be adding documents at a time, with no other reader or writer threads - * accessing this object. +/** + * Collection of {@link FieldInfo}s (accessible by number or by name). + * @lucene.experimental */ -final class FieldInfos { +public class FieldInfos implements Iterable { + private final boolean hasFreq; + private final boolean hasProx; + private final boolean hasPayloads; + private final boolean hasOffsets; + private final boolean hasVectors; + private final boolean hasNorms; + private final boolean hasDocValues; - static final byte IS_INDEXED = 0x1; - static final byte STORE_TERMVECTOR = 0x2; - static final byte STORE_POSITIONS_WITH_TERMVECTOR = 0x4; - static final byte STORE_OFFSET_WITH_TERMVECTOR = 0x8; - static final byte OMIT_NORMS = 0x10; - static final byte STORE_PAYLOADS = 0x20; - static final byte OMIT_TF = 0x40; + private final SortedMap byNumber = new TreeMap<>(); + private final HashMap byName = new HashMap<>(); + private final Collection values; // for an unmodifiable iterator - private ArrayList byNumber = new ArrayList(); - private HashMap byName = new HashMap(); - - FieldInfos() { } - /** - * Construct a FieldInfos object using the directory and the name of the file - * IndexInput - * @param d The directory to open the IndexInput from - * @param name The name of the file to open the IndexInput from in the Directory - * @throws IOException + * Constructs a new FieldInfos from an array of FieldInfo objects */ - FieldInfos(Directory d, String name) throws IOException { - IndexInput input = d.openInput(name); - try { - read(input); - } finally { - input.close(); + public FieldInfos(FieldInfo[] infos) { + boolean hasVectors = false; + boolean hasProx = false; + boolean hasPayloads = false; + boolean hasOffsets = false; + boolean hasFreq = false; + boolean hasNorms = false; + boolean hasDocValues = false; + + for (FieldInfo info : infos) { + if (info.number < 0) { + throw new IllegalArgumentException("illegal field number: " + info.number + " for field " + info.name); + } + FieldInfo previous = byNumber.put(info.number, info); + if (previous != null) { + throw new IllegalArgumentException("duplicate field numbers: " + previous.name + " and " + info.name + " have: " + info.number); + } + previous = byName.put(info.name, info); + if (previous != null) { + throw new IllegalArgumentException("duplicate field names: " + previous.number + " and " + info.number + " have: " + info.name); + } + + hasVectors |= info.hasVectors(); + hasProx |= info.isIndexed() && info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + hasFreq |= info.isIndexed() && info.getIndexOptions() != IndexOptions.DOCS_ONLY; + hasOffsets |= info.isIndexed() && info.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + hasNorms |= info.hasNorms(); + hasDocValues |= info.hasDocValues(); + hasPayloads |= info.hasPayloads(); } + + this.hasVectors = hasVectors; + this.hasProx = hasProx; + this.hasPayloads = hasPayloads; + this.hasOffsets = hasOffsets; + this.hasFreq = hasFreq; + this.hasNorms = hasNorms; + this.hasDocValues = hasDocValues; + this.values = Collections.unmodifiableCollection(byNumber.values()); } - - /** - * Returns a deep clone of this FieldInfos instance. - */ - synchronized public Object clone() { - FieldInfos fis = new FieldInfos(); - final int numField = byNumber.size(); - for(int i=0;i iterator() { + return values.iterator(); } - synchronized public FieldInfo add(FieldInfo fieldInfo) { - FieldInfo fi = fieldInfo(fieldInfo.name); - if (fi == null) { - return addInternal(fieldInfo.name, fieldInfo.isIndexed, fieldInfo.storeTermVector, - fieldInfo.storePositionWithTermVector, fieldInfo.storeOffsetWithTermVector, - fieldInfo.omitNorms, fieldInfo.storePayloads, fieldInfo.omitTf); - } else { - fi.update(fieldInfo); - } - return fi; - } - - private FieldInfo addInternal(String name, boolean isIndexed, - boolean storeTermVector, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean omitNorms, boolean storePayloads, boolean omitTf) { - FieldInfo fi = - new FieldInfo(name, isIndexed, byNumber.size(), storeTermVector, storePositionWithTermVector, - storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); - byNumber.add(fi); - byName.put(name, fi); - return fi; - } - - public int fieldNumber(String fieldName) { - FieldInfo fi = fieldInfo(fieldName); - return (fi != null) ? fi.number : -1; - } - - public FieldInfo fieldInfo(String fieldName) { - return (FieldInfo) byName.get(fieldName); - } - /** - * Return the fieldName identified by its number. - * - * @param fieldNumber - * @return the fieldName or an empty string when the field - * with the given number doesn't exist. + * Return the fieldinfo object referenced by the field name + * @return the FieldInfo object or null when the given fieldName + * doesn't exist. */ - public String fieldName(int fieldNumber) { - FieldInfo fi = fieldInfo(fieldNumber); - return (fi != null) ? fi.name : ""; + public FieldInfo fieldInfo(String fieldName) { + return byName.get(fieldName); } /** * Return the fieldinfo object referenced by the fieldNumber. - * @param fieldNumber + * @param fieldNumber field's number. * @return the FieldInfo object or null when the given fieldNumber * doesn't exist. - */ + * @throws IllegalArgumentException if fieldNumber is negative + */ public FieldInfo fieldInfo(int fieldNumber) { - return (fieldNumber >= 0) ? (FieldInfo) byNumber.get(fieldNumber) : null; + if (fieldNumber < 0) { + throw new IllegalArgumentException("Illegal field number: " + fieldNumber); + } + return byNumber.get(fieldNumber); } + + static final class FieldNumbers { + + private final Map numberToName; + private final Map nameToNumber; + // We use this to enforce that a given field never + // changes DV type, even across segments / IndexWriter + // sessions: + private final Map docValuesType; - public int size() { - return byNumber.size(); - } + // TODO: we should similarly catch an attempt to turn + // norms back on after they were already ommitted; today + // we silently discard the norm but this is badly trappy + private int lowestUnassignedFieldNumber = -1; + + FieldNumbers() { + this.nameToNumber = new HashMap<>(); + this.numberToName = new HashMap<>(); + this.docValuesType = new HashMap<>(); + } + + /** + * Returns the global field number for the given field name. If the name + * does not exist yet it tries to add it with the given preferred field + * number assigned if possible otherwise the first unassigned field number + * is used as the field number. + */ + synchronized int addOrGet(String fieldName, int preferredFieldNumber, DocValuesType dvType) { + if (dvType != null) { + DocValuesType currentDVType = docValuesType.get(fieldName); + if (currentDVType == null) { + docValuesType.put(fieldName, dvType); + } else if (currentDVType != null && currentDVType != dvType) { + throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + fieldName + "\""); + } + } + Integer fieldNumber = nameToNumber.get(fieldName); + if (fieldNumber == null) { + final Integer preferredBoxed = Integer.valueOf(preferredFieldNumber); - public boolean hasVectors() { - boolean hasVectors = false; - for (int i = 0; i < size(); i++) { - if (fieldInfo(i).storeTermVector) { - hasVectors = true; - break; + if (preferredFieldNumber != -1 && !numberToName.containsKey(preferredBoxed)) { + // cool - we can use this number globally + fieldNumber = preferredBoxed; + } else { + // find a new FieldNumber + while (numberToName.containsKey(++lowestUnassignedFieldNumber)) { + // might not be up to date - lets do the work once needed + } + fieldNumber = lowestUnassignedFieldNumber; + } + + numberToName.put(fieldNumber, fieldName); + nameToNumber.put(fieldName, fieldNumber); } + + return fieldNumber.intValue(); } - return hasVectors; - } - public void write(Directory d, String name) throws IOException { - IndexOutput output = d.createOutput(name); - try { - write(output); - } finally { - output.close(); + synchronized void verifyConsistent(Integer number, String name, DocValuesType dvType) { + if (name.equals(numberToName.get(number)) == false) { + throw new IllegalArgumentException("field number " + number + " is already mapped to field name \"" + numberToName.get(number) + "\", not \"" + name + "\""); + } + if (number.equals(nameToNumber.get(name)) == false) { + throw new IllegalArgumentException("field name \"" + name + "\" is already mapped to field number \"" + nameToNumber.get(name) + "\", not \"" + number + "\""); + } + DocValuesType currentDVType = docValuesType.get(name); + if (dvType != null && currentDVType != null && dvType != currentDVType) { + throw new IllegalArgumentException("cannot change DocValues type from " + currentDVType + " to " + dvType + " for field \"" + name + "\""); + } } - } - public void write(IndexOutput output) throws IOException { - output.writeVInt(size()); - for (int i = 0; i < size(); i++) { - FieldInfo fi = fieldInfo(i); - byte bits = 0x0; - if (fi.isIndexed) bits |= IS_INDEXED; - if (fi.storeTermVector) bits |= STORE_TERMVECTOR; - if (fi.storePositionWithTermVector) bits |= STORE_POSITIONS_WITH_TERMVECTOR; - if (fi.storeOffsetWithTermVector) bits |= STORE_OFFSET_WITH_TERMVECTOR; - if (fi.omitNorms) bits |= OMIT_NORMS; - if (fi.storePayloads) bits |= STORE_PAYLOADS; - if (fi.omitTf) bits |= OMIT_TF; - - output.writeString(fi.name); - output.writeByte(bits); + /** + * Returns true if the {@code fieldName} exists in the map and is of the + * same {@code dvType}. + */ + synchronized boolean contains(String fieldName, DocValuesType dvType) { + // used by IndexWriter.updateNumericDocValue + if (!nameToNumber.containsKey(fieldName)) { + return false; + } else { + // only return true if the field has the same dvType as the requested one + return dvType == docValuesType.get(fieldName); + } } - } + + synchronized void clear() { + numberToName.clear(); + nameToNumber.clear(); + docValuesType.clear(); + } - private void read(IndexInput input) throws IOException { - int size = input.readVInt();//read in the size - for (int i = 0; i < size; i++) { - String name = input.readString().intern(); - byte bits = input.readByte(); - boolean isIndexed = (bits & IS_INDEXED) != 0; - boolean storeTermVector = (bits & STORE_TERMVECTOR) != 0; - boolean storePositionsWithTermVector = (bits & STORE_POSITIONS_WITH_TERMVECTOR) != 0; - boolean storeOffsetWithTermVector = (bits & STORE_OFFSET_WITH_TERMVECTOR) != 0; - boolean omitNorms = (bits & OMIT_NORMS) != 0; - boolean storePayloads = (bits & STORE_PAYLOADS) != 0; - boolean omitTf = (bits & OMIT_TF) != 0; - - addInternal(name, isIndexed, storeTermVector, storePositionsWithTermVector, storeOffsetWithTermVector, omitNorms, storePayloads, omitTf); - } + synchronized void setDocValuesType(int number, String name, DocValuesType dvType) { + verifyConsistent(number, name, dvType); + docValuesType.put(name, dvType); + } } + + static final class Builder { + private final HashMap byName = new HashMap<>(); + final FieldNumbers globalFieldNumbers; + Builder() { + this(new FieldNumbers()); + } + + /** + * Creates a new instance with the given {@link FieldNumbers}. + */ + Builder(FieldNumbers globalFieldNumbers) { + assert globalFieldNumbers != null; + this.globalFieldNumbers = globalFieldNumbers; + } + + public void add(FieldInfos other) { + for(FieldInfo fieldInfo : other){ + add(fieldInfo); + } + } + + /** NOTE: this method does not carry over termVector + * booleans nor docValuesType; the indexer chain + * (TermVectorsConsumerPerField, DocFieldProcessor) must + * set these fields when they succeed in consuming + * the document */ + public FieldInfo addOrUpdate(String name, IndexableFieldType fieldType) { + // TODO: really, indexer shouldn't even call this + // method (it's only called from DocFieldProcessor); + // rather, each component in the chain should update + // what it "owns". EG fieldType.indexOptions() should + // be updated by maybe FreqProxTermsWriterPerField: + return addOrUpdateInternal(name, -1, fieldType.indexed(), false, + fieldType.omitNorms(), false, + fieldType.indexOptions(), fieldType.docValueType(), null); + } + + private FieldInfo addOrUpdateInternal(String name, int preferredFieldNumber, boolean isIndexed, + boolean storeTermVector, + boolean omitNorms, boolean storePayloads, IndexOptions indexOptions, DocValuesType docValues, DocValuesType normType) { + FieldInfo fi = fieldInfo(name); + if (fi == null) { + // This field wasn't yet added to this in-RAM + // segment's FieldInfo, so now we get a global + // number for this field. If the field was seen + // before then we'll get the same name and number, + // else we'll allocate a new one: + final int fieldNumber = globalFieldNumbers.addOrGet(name, preferredFieldNumber, docValues); + fi = new FieldInfo(name, isIndexed, fieldNumber, storeTermVector, omitNorms, storePayloads, indexOptions, docValues, normType, -1, null); + assert !byName.containsKey(fi.name); + globalFieldNumbers.verifyConsistent(Integer.valueOf(fi.number), fi.name, fi.getDocValuesType()); + byName.put(fi.name, fi); + } else { + fi.update(isIndexed, storeTermVector, omitNorms, storePayloads, indexOptions); + + if (docValues != null) { + // Only pay the synchronization cost if fi does not already have a DVType + boolean updateGlobal = !fi.hasDocValues(); + if (updateGlobal) { + // Must also update docValuesType map so it's + // aware of this field's DocValueType. This will throw IllegalArgumentException if + // an illegal type change was attempted. + globalFieldNumbers.setDocValuesType(fi.number, name, docValues); + } + + fi.setDocValuesType(docValues); // this will also perform the consistency check. + } + + if (!fi.omitsNorms() && normType != null) { + fi.setNormValueType(normType); + } + } + return fi; + } + + public FieldInfo add(FieldInfo fi) { + // IMPORTANT - reuse the field number if possible for consistent field numbers across segments + return addOrUpdateInternal(fi.name, fi.number, fi.isIndexed(), fi.hasVectors(), + fi.omitsNorms(), fi.hasPayloads(), + fi.getIndexOptions(), fi.getDocValuesType(), fi.getNormType()); + } + + public FieldInfo fieldInfo(String fieldName) { + return byName.get(fieldName); + } + + final FieldInfos finish() { + return new FieldInfos(byName.values().toArray(new FieldInfo[byName.size()])); + } + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FieldInvertState.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FieldReaderException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FieldSortedTermVectorMapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/Fields.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FieldsReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FieldsWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FilterAtomicReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FilterDirectoryReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FilterIndexReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FilteredTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FlushByRamOrCountsPolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FlushPolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FreqProxFieldMergeState.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriter.java 17 Aug 2012 14:55:00 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriter.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,370 +17,91 @@ * limitations under the License. */ -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.UnicodeUtil; - import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.Map; import java.util.ArrayList; import java.util.List; -import java.util.Iterator; +import java.util.Map; -final class FreqProxTermsWriter extends TermsHashConsumer { +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.util.IOUtils; - public TermsHashConsumerPerThread addThread(TermsHashPerThread perThread) { - return new FreqProxTermsWriterPerThread(perThread); - } +final class FreqProxTermsWriter extends TermsHash { - void createPostings(RawPostingList[] postings, int start, int count) { - final int end = start + count; - for(int i=start;i fieldsToFlush, final SegmentWriteState state) throws IOException { + super.flush(fieldsToFlush, state); - void closeDocStore(DocumentsWriter.FlushState state) {} - void abort() {} + // Gather all fields that saw any postings: + List allFields = new ArrayList<>(); - - // TODO: would be nice to factor out morme of this, eg the - // FreqProxFieldMergeState, and code to visit all Fields - // under the same FieldInfo together, up into TermsHash*. - // Other writers would presumably share alot of this... - - public void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException { - - // Gather all FieldData's that have postings, across all - // ThreadStates - List allFields = new ArrayList(); - - Iterator it = threadsAndFields.entrySet().iterator(); - while(it.hasNext()) { - - Map.Entry entry = (Map.Entry) it.next(); - - Collection fields = (Collection) entry.getValue(); - - Iterator fieldsIt = fields.iterator(); - - while(fieldsIt.hasNext()) { - FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) fieldsIt.next(); - if (perField.termsHashPerField.numPostings > 0) - allFields.add(perField); + for (TermsHashPerField f : fieldsToFlush.values()) { + final FreqProxTermsWriterPerField perField = (FreqProxTermsWriterPerField) f; + if (perField.bytesHash.size() > 0) { + allFields.add(perField); } } - // Sort by field name - Collections.sort(allFields); final int numAllFields = allFields.size(); - final TermInfosWriter termsOut = new TermInfosWriter(state.directory, - state.segmentName, - fieldInfos, - state.docWriter.writer.getTermIndexInterval()); + // Sort by field name + CollectionUtil.introSort(allFields); - final IndexOutput freqOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.FREQ_EXTENSION)); - final IndexOutput proxOut; + final FieldsConsumer consumer = state.segmentInfo.getCodec().postingsFormat().fieldsConsumer(state); - if (fieldInfos.hasProx()) - proxOut = state.directory.createOutput(state.segmentFileName(IndexFileNames.PROX_EXTENSION)); - else - proxOut = null; + boolean success = false; - final DefaultSkipListWriter skipListWriter = new DefaultSkipListWriter(termsOut.skipInterval, - termsOut.maxSkipLevels, - state.numDocsInRAM, freqOut, proxOut); - - int start = 0; - while(start < numAllFields) { - final FieldInfo fieldInfo = ((FreqProxTermsWriterPerField) allFields.get(start)).fieldInfo; - final String fieldName = fieldInfo.name; - - int end = start+1; - while(end < numAllFields && ((FreqProxTermsWriterPerField) allFields.get(end)).fieldInfo.name.equals(fieldName)) - end++; + try { + TermsHash termsHash = null; - FreqProxTermsWriterPerField[] fields = new FreqProxTermsWriterPerField[end-start]; - for(int i=start;i IMPL: FormatPostingsTermsDictWriter + -> TermsConsumer + -> IMPL: FormatPostingsTermsDictWriter.TermsWriter + -> DocsConsumer + -> IMPL: FormatPostingsDocsWriter + -> PositionsConsumer + -> IMPL: FormatPostingsPositionsWriter + */ + + for (int fieldNumber = 0; fieldNumber < numAllFields; fieldNumber++) { + final FieldInfo fieldInfo = allFields.get(fieldNumber).fieldInfo; + + final FreqProxTermsWriterPerField fieldWriter = allFields.get(fieldNumber); - // Aggregate the storePayload as seen by the same - // field across multiple threads - fieldInfo.storePayloads |= fields[i-start].hasPayloads; - } - - // If this field has postings then add them to the - // segment - appendPostings(state, fields, termsOut, freqOut, proxOut, skipListWriter); - - for(int i=0;i - // IndexOutput - while(numBytes > 0) { - final int chunk; - if (numBytes > 4096) - chunk = 4096; - else - chunk = (int) numBytes; - srcIn.readBytes(copyByteBuffer, 0, chunk); - destIn.writeBytes(copyByteBuffer, 0, chunk); - numBytes -= chunk; - } - } - - /* Walk through all unique text tokens (Posting - * instances) found in this field and serialize them - * into a single RAM segment. */ - void appendPostings(final DocumentsWriter.FlushState flushState, - FreqProxTermsWriterPerField[] fields, - TermInfosWriter termsOut, - IndexOutput freqOut, - IndexOutput proxOut, - DefaultSkipListWriter skipListWriter) - throws CorruptIndexException, IOException { - - final int fieldNumber = fields[0].fieldInfo.number; - int numFields = fields.length; - - final FreqProxFieldMergeState[] mergeStates = new FreqProxFieldMergeState[numFields]; - - for(int i=0;i 0) { - - // Get the next term to merge - termStates[0] = mergeStates[0]; - int numToMerge = 1; - - for(int i=1;i 0) { - - if ((++df % skipInterval) == 0) { - skipListWriter.setSkipData(lastDoc, currentFieldStorePayloads, lastPayloadLength); - skipListWriter.bufferSkip(df); - } - - FreqProxFieldMergeState minState = termStates[0]; - for(int i=1;i lastDoc || df == 1; - - final ByteSliceReader prox = minState.prox; - - // Carefully copy over the prox + payload info, - // changing the format to match Lucene's segment - // format. - if (!currentFieldOmitTf) { - // omitTf == false so we do write positions & payload - assert proxOut != null; - for(int j=0;j 0) - copyBytes(prox, proxOut, payloadLength); - } else { - assert 0 == (code & 1); - proxOut.writeVInt(code>>1); - } - } //End for - - final int newDocCode = (doc-lastDoc)<<1; - - if (1 == termDocFreq) { - freqOut.writeVInt(newDocCode|1); - } else { - freqOut.writeVInt(newDocCode); - freqOut.writeVInt(termDocFreq); - } - } else { - // omitTf==true: we store only the docs, without - // term freq, positions, payloads - freqOut.writeVInt(doc-lastDoc); - } - - lastDoc = doc; - - if (!minState.nextDoc()) { - - // Remove from termStates - int upto = 0; - for(int i=0;i 0; - - // Done merging this term - - long skipPointer = skipListWriter.writeSkip(freqOut); - - // Write term - termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); - - // TODO: we could do this incrementally - UnicodeUtil.UTF16toUTF8(text, start, termsUTF8); - - // TODO: we could save O(n) re-scan of the term by - // computing the shared prefix with the last term - // while during the UTF8 encoding - termsOut.add(fieldNumber, - termsUTF8.result, - termsUTF8.length, - termInfo); } } - private final TermInfo termInfo = new TermInfo(); // minimize consing - - final UnicodeUtil.UTF8Result termsUTF8 = new UnicodeUtil.UTF8Result(); - - void files(Collection files) {} - - static final class PostingList extends RawPostingList { - int docFreq; // # times this term occurs in the current doc - int lastDocID; // Last docID where this term occurred - int lastDocCode; // Code for prior doc - int lastPosition; // Last position where this term occurred + @Override + public TermsHashPerField addField(FieldInvertState invertState, FieldInfo fieldInfo) { + return new FreqProxTermsWriterPerField(invertState, this, fieldInfo, nextTermsHash.addField(invertState, fieldInfo)); } - - int bytesPerPosting() { - return RawPostingList.BYTES_SIZE + 4 * DocumentsWriter.INT_NUM_BYTE; - } } Index: 3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriterPerField.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriterPerField.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriterPerField.java 17 Aug 2012 14:54:59 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriterPerField.java 16 Dec 2014 11:31:44 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,128 +18,485 @@ */ import java.io.IOException; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.analysis.Token; +import java.util.Comparator; +import java.util.Map; +import org.apache.lucene.analysis.tokenattributes.OffsetAttribute; +import org.apache.lucene.analysis.tokenattributes.PayloadAttribute; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.PostingsConsumer; +import org.apache.lucene.codecs.TermStats; +import org.apache.lucene.codecs.TermsConsumer; +import org.apache.lucene.index.FieldInfo.IndexOptions; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RamUsageEstimator; + // TODO: break into separate freq and prox writers as // codecs; make separate container (tii/tis/skip/*) that can // be configured as any number of files 1..N -final class FreqProxTermsWriterPerField extends TermsHashConsumerPerField implements Comparable { +final class FreqProxTermsWriterPerField extends TermsHashPerField { - final FreqProxTermsWriterPerThread perThread; - final TermsHashPerField termsHashPerField; - final FieldInfo fieldInfo; - final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState; - boolean omitTf; + private FreqProxPostingsArray freqProxPostingsArray; - public FreqProxTermsWriterPerField(TermsHashPerField termsHashPerField, FreqProxTermsWriterPerThread perThread, FieldInfo fieldInfo) { - this.termsHashPerField = termsHashPerField; - this.perThread = perThread; - this.fieldInfo = fieldInfo; - docState = termsHashPerField.docState; - fieldState = termsHashPerField.fieldState; - omitTf = fieldInfo.omitTf; - } + final boolean hasFreq; + final boolean hasProx; + final boolean hasOffsets; + PayloadAttribute payloadAttribute; + OffsetAttribute offsetAttribute; - int getStreamCount() { - if (fieldInfo.omitTf) - return 1; - else - return 2; + /** Set to true if any token had a payload in the current + * segment. */ + boolean sawPayloads; + + public FreqProxTermsWriterPerField(FieldInvertState invertState, TermsHash termsHash, FieldInfo fieldInfo, TermsHashPerField nextPerField) { + super(fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0 ? 2 : 1, invertState, termsHash, nextPerField, fieldInfo); + IndexOptions indexOptions = fieldInfo.getIndexOptions(); + assert indexOptions != null; + hasFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + hasProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; } - void finish() {} - - boolean hasPayloads; - - void skippingLongTerm(Token t) throws IOException {} - - public int compareTo(Object other0) { - FreqProxTermsWriterPerField other = (FreqProxTermsWriterPerField) other0; - return fieldInfo.name.compareTo(other.fieldInfo.name); + @Override + void finish() throws IOException { + super.finish(); + if (sawPayloads) { + fieldInfo.setStorePayloads(); + } } - void reset() { - // Record, up front, whether our in-RAM format will be - // with or without term freqs: - omitTf = fieldInfo.omitTf; + @Override + boolean start(IndexableField f, boolean first) { + super.start(f, first); + payloadAttribute = fieldState.payloadAttribute; + offsetAttribute = fieldState.offsetAttribute; + return true; } - boolean start(Fieldable[] fields, int count) { - for(int i=0;i 0) { + writeVInt(1, (proxCode<<1)|1); + writeVInt(1, payload.length); + writeBytes(1, payload.bytes, payload.offset, payload.length); + sawPayloads = true; + } else { + writeVInt(1, proxCode<<1); + } + } - final void writeProx(Token t, FreqProxTermsWriter.PostingList p, int proxCode) { - final Payload payload = t.getPayload(); - if (payload != null && payload.length > 0) { - termsHashPerField.writeVInt(1, (proxCode<<1)|1); - termsHashPerField.writeVInt(1, payload.length); - termsHashPerField.writeBytes(1, payload.data, payload.offset, payload.length); - hasPayloads = true; - } else - termsHashPerField.writeVInt(1, proxCode<<1); - p.lastPosition = fieldState.position; + assert postingsArray == freqProxPostingsArray; + freqProxPostingsArray.lastPositions[termID] = fieldState.position; } - final void newTerm(Token t, RawPostingList p0) { + void writeOffsets(int termID, int offsetAccum) { + final int startOffset = offsetAccum + offsetAttribute.startOffset(); + final int endOffset = offsetAccum + offsetAttribute.endOffset(); + assert startOffset - freqProxPostingsArray.lastOffsets[termID] >= 0; + writeVInt(1, startOffset - freqProxPostingsArray.lastOffsets[termID]); + writeVInt(1, endOffset - startOffset); + freqProxPostingsArray.lastOffsets[termID] = startOffset; + } + + @Override + void newTerm(final int termID) { // First time we're seeing this term since the last // flush - assert docState.testPoint("FreqProxTermsWriterPerField.newTerm start"); - FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0; - p.lastDocID = docState.docID; - if (omitTf) { - p.lastDocCode = docState.docID; + final FreqProxPostingsArray postings = freqProxPostingsArray; + + postings.lastDocIDs[termID] = docState.docID; + if (!hasFreq) { + assert postings.termFreqs == null; + postings.lastDocCodes[termID] = docState.docID; } else { - p.lastDocCode = docState.docID << 1; - p.docFreq = 1; - writeProx(t, p, fieldState.position); + postings.lastDocCodes[termID] = docState.docID << 1; + postings.termFreqs[termID] = 1; + if (hasProx) { + writeProx(termID, fieldState.position); + if (hasOffsets) { + writeOffsets(termID, fieldState.offset); + } + } else { + assert !hasOffsets; + } } + fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); + fieldState.uniqueTermCount++; } - final void addTerm(Token t, RawPostingList p0) { + @Override + void addTerm(final int termID) { + final FreqProxPostingsArray postings = freqProxPostingsArray; - assert docState.testPoint("FreqProxTermsWriterPerField.addTerm start"); + assert !hasFreq || postings.termFreqs[termID] > 0; - FreqProxTermsWriter.PostingList p = (FreqProxTermsWriter.PostingList) p0; + if (!hasFreq) { + assert postings.termFreqs == null; + if (docState.docID != postings.lastDocIDs[termID]) { + // New document; now encode docCode for previous doc: + assert docState.docID > postings.lastDocIDs[termID]; + writeVInt(0, postings.lastDocCodes[termID]); + postings.lastDocCodes[termID] = docState.docID - postings.lastDocIDs[termID]; + postings.lastDocIDs[termID] = docState.docID; + fieldState.uniqueTermCount++; + } + } else if (docState.docID != postings.lastDocIDs[termID]) { + assert docState.docID > postings.lastDocIDs[termID]:"id: "+docState.docID + " postings ID: "+ postings.lastDocIDs[termID] + " termID: "+termID; + // Term not yet seen in the current doc but previously + // seen in other doc(s) since the last flush - assert omitTf || p.docFreq > 0; + // Now that we know doc freq for previous doc, + // write it & lastDocCode + if (1 == postings.termFreqs[termID]) { + writeVInt(0, postings.lastDocCodes[termID]|1); + } else { + writeVInt(0, postings.lastDocCodes[termID]); + writeVInt(0, postings.termFreqs[termID]); + } - if (omitTf) { - if (docState.docID != p.lastDocID) { - assert docState.docID > p.lastDocID; - termsHashPerField.writeVInt(0, p.lastDocCode); - p.lastDocCode = docState.docID - p.lastDocID; - p.lastDocID = docState.docID; + // Init freq for the current document + postings.termFreqs[termID] = 1; + fieldState.maxTermFrequency = Math.max(1, fieldState.maxTermFrequency); + postings.lastDocCodes[termID] = (docState.docID - postings.lastDocIDs[termID]) << 1; + postings.lastDocIDs[termID] = docState.docID; + if (hasProx) { + writeProx(termID, fieldState.position); + if (hasOffsets) { + postings.lastOffsets[termID] = 0; + writeOffsets(termID, fieldState.offset); + } + } else { + assert !hasOffsets; } + fieldState.uniqueTermCount++; } else { - if (docState.docID != p.lastDocID) { - assert docState.docID > p.lastDocID; - // Term not yet seen in the current doc but previously - // seen in other doc(s) since the last flush + fieldState.maxTermFrequency = Math.max(fieldState.maxTermFrequency, ++postings.termFreqs[termID]); + if (hasProx) { + writeProx(termID, fieldState.position-postings.lastPositions[termID]); + if (hasOffsets) { + writeOffsets(termID, fieldState.offset); + } + } + } + } - // Now that we know doc freq for previous doc, - // write it & lastDocCode - if (1 == p.docFreq) - termsHashPerField.writeVInt(0, p.lastDocCode|1); - else { - termsHashPerField.writeVInt(0, p.lastDocCode); - termsHashPerField.writeVInt(0, p.docFreq); + @Override + public void newPostingsArray() { + freqProxPostingsArray = (FreqProxPostingsArray) postingsArray; + } + + @Override + ParallelPostingsArray createPostingsArray(int size) { + IndexOptions indexOptions = fieldInfo.getIndexOptions(); + assert indexOptions != null; + boolean hasFreq = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + boolean hasProx = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + boolean hasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + return new FreqProxPostingsArray(size, hasFreq, hasProx, hasOffsets); + } + + static final class FreqProxPostingsArray extends ParallelPostingsArray { + public FreqProxPostingsArray(int size, boolean writeFreqs, boolean writeProx, boolean writeOffsets) { + super(size); + if (writeFreqs) { + termFreqs = new int[size]; + } + lastDocIDs = new int[size]; + lastDocCodes = new int[size]; + if (writeProx) { + lastPositions = new int[size]; + if (writeOffsets) { + lastOffsets = new int[size]; } - p.docFreq = 1; - p.lastDocCode = (docState.docID - p.lastDocID) << 1; - p.lastDocID = docState.docID; - writeProx(t, p, fieldState.position); } else { - p.docFreq++; - writeProx(t, p, fieldState.position-p.lastPosition); + assert !writeOffsets; } + //System.out.println("PA init freqs=" + writeFreqs + " pos=" + writeProx + " offs=" + writeOffsets); } + + int termFreqs[]; // # times this term occurs in the current doc + int lastDocIDs[]; // Last docID where this term occurred + int lastDocCodes[]; // Code for prior doc + int lastPositions[]; // Last position where this term occurred + int lastOffsets[]; // Last endOffset where this term occurred + + @Override + ParallelPostingsArray newInstance(int size) { + return new FreqProxPostingsArray(size, termFreqs != null, lastPositions != null, lastOffsets != null); + } + + @Override + void copyTo(ParallelPostingsArray toArray, int numToCopy) { + assert toArray instanceof FreqProxPostingsArray; + FreqProxPostingsArray to = (FreqProxPostingsArray) toArray; + + super.copyTo(toArray, numToCopy); + + System.arraycopy(lastDocIDs, 0, to.lastDocIDs, 0, numToCopy); + System.arraycopy(lastDocCodes, 0, to.lastDocCodes, 0, numToCopy); + if (lastPositions != null) { + assert to.lastPositions != null; + System.arraycopy(lastPositions, 0, to.lastPositions, 0, numToCopy); + } + if (lastOffsets != null) { + assert to.lastOffsets != null; + System.arraycopy(lastOffsets, 0, to.lastOffsets, 0, numToCopy); + } + if (termFreqs != null) { + assert to.termFreqs != null; + System.arraycopy(termFreqs, 0, to.termFreqs, 0, numToCopy); + } + } + + @Override + int bytesPerPosting() { + int bytes = ParallelPostingsArray.BYTES_PER_POSTING + 2 * RamUsageEstimator.NUM_BYTES_INT; + if (lastPositions != null) { + bytes += RamUsageEstimator.NUM_BYTES_INT; + } + if (lastOffsets != null) { + bytes += RamUsageEstimator.NUM_BYTES_INT; + } + if (termFreqs != null) { + bytes += RamUsageEstimator.NUM_BYTES_INT; + } + + return bytes; + } } - public void abort() {} -} + /* Walk through all unique text tokens (Posting + * instances) found in this field and serialize them + * into a single RAM segment. */ + void flush(String fieldName, FieldsConsumer consumer, final SegmentWriteState state) + throws IOException { + BytesRefBuilder payload = null; + + if (!fieldInfo.isIndexed()) { + return; // nothing to flush, don't bother the codec with the unindexed field + } + + final TermsConsumer termsConsumer = consumer.addField(fieldInfo); + final Comparator termComp = termsConsumer.getComparator(); + + // CONFUSING: this.indexOptions holds the index options + // that were current when we first saw this field. But + // it's possible this has changed, eg when other + // documents are indexed that cause a "downgrade" of the + // IndexOptions. So we must decode the in-RAM buffer + // according to this.indexOptions, but then write the + // new segment to the directory according to + // currentFieldIndexOptions: + final IndexOptions currentFieldIndexOptions = fieldInfo.getIndexOptions(); + assert currentFieldIndexOptions != null; + + final boolean writeTermFreq = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0; + final boolean writePositions = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0; + final boolean writeOffsets = currentFieldIndexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0; + + final boolean readTermFreq = this.hasFreq; + final boolean readPositions = this.hasProx; + final boolean readOffsets = this.hasOffsets; + + //System.out.println("flush readTF=" + readTermFreq + " readPos=" + readPositions + " readOffs=" + readOffsets); + + // Make sure FieldInfo.update is working correctly!: + assert !writeTermFreq || readTermFreq; + assert !writePositions || readPositions; + assert !writeOffsets || readOffsets; + + assert !writeOffsets || writePositions; + + final Map segDeletes; + if (state.segUpdates != null && state.segUpdates.terms.size() > 0) { + segDeletes = state.segUpdates.terms; + } else { + segDeletes = null; + } + + final int[] termIDs = sortPostings(termComp); + final int numTerms = bytesHash.size(); + final BytesRef text = new BytesRef(); + final FreqProxPostingsArray postings = freqProxPostingsArray; + final ByteSliceReader freq = new ByteSliceReader(); + final ByteSliceReader prox = new ByteSliceReader(); + + FixedBitSet visitedDocs = new FixedBitSet(state.segmentInfo.getDocCount()); + long sumTotalTermFreq = 0; + long sumDocFreq = 0; + + Term protoTerm = new Term(fieldName); + for (int i = 0; i < numTerms; i++) { + final int termID = termIDs[i]; + //System.out.println("term=" + termID); + // Get BytesRef + final int textStart = postings.textStarts[termID]; + bytePool.setBytesRef(text, textStart); + + initReader(freq, termID, 0); + if (readPositions || readOffsets) { + initReader(prox, termID, 1); + } + + // TODO: really TermsHashPerField should take over most + // of this loop, including merge sort of terms from + // multiple threads and interacting with the + // TermsConsumer, only calling out to us (passing us the + // DocsConsumer) to handle delivery of docs/positions + + final PostingsConsumer postingsConsumer = termsConsumer.startTerm(text); + + final int delDocLimit; + if (segDeletes != null) { + protoTerm.bytes = text; + final Integer docIDUpto = segDeletes.get(protoTerm); + if (docIDUpto != null) { + delDocLimit = docIDUpto; + } else { + delDocLimit = 0; + } + } else { + delDocLimit = 0; + } + + // Now termStates has numToMerge FieldMergeStates + // which all share the same term. Now we must + // interleave the docID streams. + int docFreq = 0; + long totalTermFreq = 0; + int docID = 0; + + while(true) { + //System.out.println(" cycle"); + final int termFreq; + if (freq.eof()) { + if (postings.lastDocCodes[termID] != -1) { + // Return last doc + docID = postings.lastDocIDs[termID]; + if (readTermFreq) { + termFreq = postings.termFreqs[termID]; + } else { + termFreq = -1; + } + postings.lastDocCodes[termID] = -1; + } else { + // EOF + break; + } + } else { + final int code = freq.readVInt(); + if (!readTermFreq) { + docID += code; + termFreq = -1; + } else { + docID += code >>> 1; + if ((code & 1) != 0) { + termFreq = 1; + } else { + termFreq = freq.readVInt(); + } + } + + assert docID != postings.lastDocIDs[termID]; + } + + docFreq++; + assert docID < state.segmentInfo.getDocCount(): "doc=" + docID + " maxDoc=" + state.segmentInfo.getDocCount(); + + // NOTE: we could check here if the docID was + // deleted, and skip it. However, this is somewhat + // dangerous because it can yield non-deterministic + // behavior since we may see the docID before we see + // the term that caused it to be deleted. This + // would mean some (but not all) of its postings may + // make it into the index, which'd alter the docFreq + // for those terms. We could fix this by doing two + // passes, ie first sweep marks all del docs, and + // 2nd sweep does the real flush, but I suspect + // that'd add too much time to flush. + visitedDocs.set(docID); + postingsConsumer.startDoc(docID, writeTermFreq ? termFreq : -1); + if (docID < delDocLimit) { + // Mark it deleted. TODO: we could also skip + // writing its postings; this would be + // deterministic (just for this Term's docs). + + // TODO: can we do this reach-around in a cleaner way???? + if (state.liveDocs == null) { + state.liveDocs = docState.docWriter.codec.liveDocsFormat().newLiveDocs(state.segmentInfo.getDocCount()); + } + if (state.liveDocs.get(docID)) { + state.delCountOnFlush++; + state.liveDocs.clear(docID); + } + } + + totalTermFreq += termFreq; + + // Carefully copy over the prox + payload info, + // changing the format to match Lucene's segment + // format. + + if (readPositions || readOffsets) { + // we did record positions (& maybe payload) and/or offsets + int position = 0; + int offset = 0; + for(int j=0;j>> 1; + + if ((code & 1) != 0) { + + // This position has a payload + final int payloadLength = prox.readVInt(); + + if (payload == null) { + payload = new BytesRefBuilder(); + } + payload.grow(payloadLength); + + prox.readBytes(payload.bytes(), 0, payloadLength); + payload.setLength(payloadLength); + thisPayload = payload.get(); + + } else { + thisPayload = null; + } + + if (readOffsets) { + final int startOffset = offset + prox.readVInt(); + final int endOffset = startOffset + prox.readVInt(); + if (writePositions) { + if (writeOffsets) { + assert startOffset >=0 && endOffset >= startOffset : "startOffset=" + startOffset + ",endOffset=" + endOffset + ",offset=" + offset; + postingsConsumer.addPosition(position, thisPayload, startOffset, endOffset); + } else { + postingsConsumer.addPosition(position, thisPayload, -1, -1); + } + } + offset = startOffset; + } else if (writePositions) { + postingsConsumer.addPosition(position, thisPayload, -1, -1); + } + } + } + } + postingsConsumer.finishDoc(); + } + termsConsumer.finishTerm(text, new TermStats(docFreq, writeTermFreq ? totalTermFreq : -1)); + sumTotalTermFreq += totalTermFreq; + sumDocFreq += docFreq; + } + + termsConsumer.finish(writeTermFreq ? sumTotalTermFreq : -1, sumDocFreq, visitedDocs.cardinality()); + } +} Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FreqProxTermsWriterPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/FrozenBufferedUpdates.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/IndexCommit.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/IndexCommit.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/IndexCommit.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/IndexCommit.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,7 +18,9 @@ */ import java.util.Collection; +import java.util.Map; import java.io.IOException; + import org.apache.lucene.store.Directory; /** @@ -36,11 +38,10 @@ * associated with it. The segments file associated with a * later index commit point would have a larger N.

    * - *

    WARNING: This API is a new and experimental and - * may suddenly change.

    + * @lucene.experimental */ -public abstract class IndexCommit implements IndexCommitPoint { +public abstract class IndexCommit implements Comparable { /** * Get the segments file (segments_N) associated @@ -51,7 +52,7 @@ /** * Returns all index files referenced by this commit point. */ - public abstract Collection getFileNames() throws IOException; + public abstract Collection getFileNames() throws IOException; /** * Returns the {@link Directory} for the index. @@ -70,54 +71,54 @@ * and therefore this should only be called by its {@link IndexDeletionPolicy#onInit onInit()} or * {@link IndexDeletionPolicy#onCommit onCommit()} methods. */ - public void delete() { - throw new UnsupportedOperationException("This IndexCommit does not support this method."); - } + public abstract void delete(); - public boolean isDeleted() { - throw new UnsupportedOperationException("This IndexCommit does not support this method."); - } + /** Returns true if this commit should be deleted; this is + * only used by {@link IndexWriter} after invoking the + * {@link IndexDeletionPolicy}. */ + public abstract boolean isDeleted(); - /** - * Returns true if this commit is an optimized index. - */ - public boolean isOptimized() { - throw new UnsupportedOperationException("This IndexCommit does not support this method."); + /** Returns number of segments referenced by this commit. */ + public abstract int getSegmentCount(); + + /** Sole constructor. (For invocation by subclass + * constructors, typically implicit.) */ + protected IndexCommit() { } - /** - * Two IndexCommits are equal if both their Directory and versions are equal. - */ + /** Two IndexCommits are equal if both their Directory and versions are equal. */ + @Override public boolean equals(Object other) { if (other instanceof IndexCommit) { IndexCommit otherCommit = (IndexCommit) other; - return otherCommit.getDirectory().equals(getDirectory()) && otherCommit.getVersion() == getVersion(); - } else + return otherCommit.getDirectory() == getDirectory() && otherCommit.getGeneration() == getGeneration(); + } else { return false; + } } + @Override public int hashCode() { - return getDirectory().hashCode() + getSegmentsFileName().hashCode(); + return getDirectory().hashCode() + Long.valueOf(getGeneration()).hashCode(); } - /** Returns the version for this IndexCommit. This is the - same value that {@link IndexReader#getVersion} would - return if it were opened on this commit. */ - public long getVersion() { - throw new UnsupportedOperationException("This IndexCommit does not support this method."); - } - /** Returns the generation (the _N in segments_N) for this - IndexCommit */ - public long getGeneration() { - throw new UnsupportedOperationException("This IndexCommit does not support this method."); - } + * IndexCommit */ + public abstract long getGeneration(); - /** Convenience method that returns the last modified time - * of the segments_N file corresponding to this index - * commit, equivalent to - * getDirectory().fileModified(getSegmentsFileName()). */ - public long getTimestamp() throws IOException { - return getDirectory().fileModified(getSegmentsFileName()); + /** Returns userData, previously passed to {@link + * IndexWriter#setCommitData(Map)} for this commit. Map is + * String -> String. */ + public abstract Map getUserData() throws IOException; + + @Override + public int compareTo(IndexCommit commit) { + if (getDirectory() != commit.getDirectory()) { + throw new UnsupportedOperationException("cannot compare IndexCommits from different Directory instances"); + } + + long gen = getGeneration(); + long comgen = commit.getGeneration(); + return Long.compare(gen, comgen); } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexCommitPoint.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/IndexDeletionPolicy.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/IndexDeletionPolicy.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/IndexDeletionPolicy.java 17 Aug 2012 14:55:00 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/IndexDeletionPolicy.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,6 +20,8 @@ import java.util.List; import java.io.IOException; +import org.apache.lucene.store.Directory; + /** *

    Expert: policy for deletion of stale {@link IndexCommit index commits}. * @@ -46,10 +48,17 @@ * target="top" * href="http://issues.apache.org/jira/browse/LUCENE-710">LUCENE-710 * for details.

    + * + *

    Implementers of sub-classes should make sure that {@link #clone()} + * returns an independent instance able to work with any other {@link IndexWriter} + * or {@link Directory} instance.

    */ -public interface IndexDeletionPolicy { +public abstract class IndexDeletionPolicy { + /** Sole constructor, typically called by sub-classes constructors. */ + protected IndexDeletionPolicy() {} + /** *

    This is called once when a writer is first * instantiated to give the policy a chance to remove old @@ -69,8 +78,10 @@ * @param commits List of current * {@link IndexCommit point-in-time commits}, * sorted by age (the 0th one is the oldest commit). + * Note that for a new index this method is invoked with + * an empty list. */ - public void onInit(List commits) throws IOException; + public abstract void onInit(List commits) throws IOException; /** *

    This is called each time the writer completed a commit. @@ -81,13 +92,10 @@ * by calling method {@link IndexCommit#delete delete()} * of {@link IndexCommit}.

    * - *

    If writer has autoCommit = true then - * this method will in general be called many times during - * one instance of {@link IndexWriter}. If - * autoCommit = false then this method is - * only called once when {@link IndexWriter#close} is - * called, or not at all if the {@link IndexWriter#abort} - * is called. + *

    This method is only called when {@link + * IndexWriter#commit} or {@link IndexWriter#close} is + * called, or possibly not at all if the {@link + * IndexWriter#rollback} is called. * *

    Note: the last CommitPoint is the most recent one, * i.e. the "front index state". Be careful not to delete it, @@ -97,5 +105,5 @@ * @param commits List of {@link IndexCommit}, * sorted by age (the 0th one is the oldest commit). */ - public void onCommit(List commits) throws IOException; + public abstract void onCommit(List commits) throws IOException; } Index: 3rdParty_sources/lucene/org/apache/lucene/index/IndexFileDeleter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/IndexFileDeleter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/IndexFileDeleter.java 17 Aug 2012 14:55:01 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/IndexFileDeleter.java 16 Dec 2014 11:31:41 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,19 +17,28 @@ * limitations under the License. */ -import org.apache.lucene.store.Directory; - -import java.io.IOException; +import java.io.Closeable; import java.io.FileNotFoundException; -import java.io.PrintStream; -import java.util.Map; -import java.util.HashMap; -import java.util.Iterator; -import java.util.List; +import java.io.IOException; +import java.nio.file.NoSuchFileException; import java.util.ArrayList; -import java.util.Collections; import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Objects; +import java.util.Set; +import java.util.regex.Matcher; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.NoSuchDirectoryException; +import org.apache.lucene.util.CollectionUtil; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InfoStream; + /* * This class keeps track of each SegmentInfos instance that * is still "live", either because it corresponds to a @@ -40,13 +49,6 @@ * counting to map the live SegmentInfos instances to * individual files in the Directory. * - * When autoCommit=true, IndexWriter currently commits only - * on completion of a merge (though this may change with - * time: it is not a guarantee). When autoCommit=false, - * IndexWriter only commits when it is closed. Regardless - * of autoCommit, the user may call IndexWriter.commit() to - * force a blocking commit. - * * The same directory file may be referenced by more than * one IndexCommit, i.e. more than one SegmentInfos. * Therefore we count how many commits reference each file. @@ -58,12 +60,12 @@ * (IndexDeletionPolicy) is consulted on creation (onInit) * and once per commit (onCommit), to decide when a commit * should be removed. - * + * * It is the business of the IndexDeletionPolicy to choose * when to delete commit points. The actual mechanics of * file deletion, retrying, etc, derived from the deletion * of commit points is the business of the IndexFileDeleter. - * + * * The current default deletion policy is {@link * KeepOnlyLastCommitDeletionPolicy}, which removes all * prior commits when a new commit has completed. This @@ -74,132 +76,146 @@ * directly with no retry logic. */ -final class IndexFileDeleter { +final class IndexFileDeleter implements Closeable { /* Files that we tried to delete but failed (likely * because they are open and we are running on Windows), * so we will retry them again later: */ - private List deletable; + private Set deletable; - /* Reference count for all files in the index. + /* Reference count for all files in the index. * Counts how many existing commits reference a file. - * Maps String to RefCount (class below) instances: */ - private Map refCounts = new HashMap(); + **/ + private Map refCounts = new HashMap<>(); /* Holds all commits (segments_N) currently in the index. * This will have just 1 commit if you are using the * default delete policy (KeepOnlyLastCommitDeletionPolicy). * Other policies may leave commit points live for longer * in which case this list would be longer than 1: */ - private List commits = new ArrayList(); + private List commits = new ArrayList<>(); /* Holds files we had incref'd from the previous * non-commit checkpoint: */ - private List lastFiles = new ArrayList(); + private final List lastFiles = new ArrayList<>(); - /* Commits that the IndexDeletionPolicy have decided to delete: */ - private List commitsToDelete = new ArrayList(); + /* Commits that the IndexDeletionPolicy have decided to delete: */ + private List commitsToDelete = new ArrayList<>(); - private PrintStream infoStream; + private final InfoStream infoStream; private Directory directory; private IndexDeletionPolicy policy; - private DocumentsWriter docWriter; + final boolean startingCommitDeleted; + private SegmentInfos lastSegmentInfos; + /** Change to true to see details of reference counts when - * infoStream != null */ + * infoStream is enabled */ public static boolean VERBOSE_REF_COUNTS = false; - void setInfoStream(PrintStream infoStream) { - this.infoStream = infoStream; - if (infoStream != null) - message("setInfoStream deletionPolicy=" + policy); + private final IndexWriter writer; + + // called only from assert + private boolean locked() { + return writer == null || Thread.holdsLock(writer); } - - private void message(String message) { - infoStream.println("IFD [" + Thread.currentThread().getName() + "]: " + message); - } /** * Initialize the deleter: find all previous commits in * the Directory, incref the files they reference, call - * the policy to let it delete commits. The incoming - * segmentInfos must have been loaded from a commit point - * and not yet modified. This will remove any files not - * referenced by any of the commits. - * @throws CorruptIndexException if the index is corrupt + * the policy to let it delete commits. This will remove + * any files not referenced by any of the commits. * @throws IOException if there is a low-level IO error */ - public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, PrintStream infoStream, DocumentsWriter docWriter) - throws CorruptIndexException, IOException { - - this.docWriter = docWriter; + public IndexFileDeleter(Directory directory, IndexDeletionPolicy policy, SegmentInfos segmentInfos, + InfoStream infoStream, IndexWriter writer, boolean initialIndexExists) throws IOException { + Objects.requireNonNull(writer); this.infoStream = infoStream; + this.writer = writer; - if (infoStream != null) - message("init: current segments file is \"" + segmentInfos.getCurrentSegmentFileName() + "\"; deletionPolicy=" + policy); + final String currentSegmentsFile = segmentInfos.getSegmentsFileName(); + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "init: current segments file is \"" + currentSegmentsFile + "\"; deletionPolicy=" + policy); + } + this.policy = policy; this.directory = directory; // First pass: walk the files and initialize our ref // counts: long currentGen = segmentInfos.getGeneration(); - IndexFileNameFilter filter = IndexFileNameFilter.getFilter(); - String[] files = directory.list(); - if (files == null) - throw new IOException("cannot read directory " + directory + ": list() returned null"); - CommitPoint currentCommitPoint = null; + String[] files = null; + try { + files = directory.listAll(); + } catch (NoSuchDirectoryException e) { + // it means the directory is empty, so ignore it. + files = new String[0]; + } - for(int i=0;i 0) { + throw e; + } else { + // Most likely we are opening an index that + // has an aborted "future" commit, so suppress + // exc in this case + sis = null; + } } if (sis != null) { - CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis); + final CommitPoint commitPoint = new CommitPoint(commitsToDelete, directory, sis); if (sis.getGeneration() == segmentInfos.getGeneration()) { currentCommitPoint = commitPoint; } commits.add(commitPoint); incRef(sis, true); + + if (lastSegmentInfos == null || sis.getGeneration() > lastSegmentInfos.getGeneration()) { + lastSegmentInfos = sis; + } } } } } } - if (currentCommitPoint == null) { + if (currentCommitPoint == null && currentSegmentsFile != null && initialIndexExists) { // We did not in fact see the segments_N file // corresponding to the segmentInfos that was passed // in. Yet, it must exist, because our caller holds @@ -209,30 +225,33 @@ // try now to explicitly open this commit point: SegmentInfos sis = new SegmentInfos(); try { - sis.read(directory, segmentInfos.getCurrentSegmentFileName()); + sis.read(directory, currentSegmentsFile); } catch (IOException e) { - throw new CorruptIndexException("failed to locate current segments_N file"); + throw new CorruptIndexException("failed to locate current segments_N file \"" + currentSegmentsFile + "\""); } - if (infoStream != null) - message("forced open of current segments file " + segmentInfos.getCurrentSegmentFileName()); + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "forced open of current segments file " + segmentInfos.getSegmentsFileName()); + } currentCommitPoint = new CommitPoint(commitsToDelete, directory, sis); commits.add(currentCommitPoint); incRef(sis, true); } // We keep commits list in sorted order (oldest to newest): - Collections.sort(commits); + CollectionUtil.timSort(commits); + // refCounts only includes "normal" filenames (does not include segments.gen, write.lock) + inflateGens(segmentInfos, refCounts.keySet(), infoStream); + // Now delete anything with ref count at 0. These are // presumably abandoned files eg due to crash of // IndexWriter. - Iterator it = refCounts.keySet().iterator(); - while(it.hasNext()) { - String fileName = (String) it.next(); - RefCount rc = (RefCount) refCounts.get(fileName); + for(Map.Entry entry : refCounts.entrySet() ) { + RefCount rc = entry.getValue(); + final String fileName = entry.getKey(); if (0 == rc.count) { - if (infoStream != null) { - message("init: removing unreferenced file \"" + fileName + "\""); + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "init: removing unreferenced file \"" + fileName + "\""); } deleteFile(fileName); } @@ -242,46 +261,141 @@ // startup: policy.onInit(commits); - // It's OK for the onInit to remove the current commit - // point; we just have to checkpoint our in-memory - // SegmentInfos to protect those files that it uses: - if (currentCommitPoint.deleted) { - checkpoint(segmentInfos, false); - } - + // Always protect the incoming segmentInfos since + // sometime it may not be the most recent commit + checkpoint(segmentInfos, false); + + startingCommitDeleted = currentCommitPoint == null ? false : currentCommitPoint.isDeleted(); + deleteCommits(); } + /** Set all gens beyond what we currently see in the directory, to avoid double-write in cases where the previous IndexWriter did not + * gracefully close/rollback (e.g. os/machine crashed or lost power). */ + static void inflateGens(SegmentInfos infos, Collection files, InfoStream infoStream) { + + long maxSegmentGen = Long.MIN_VALUE; + int maxSegmentName = Integer.MIN_VALUE; + + // Confusingly, this is the union of liveDocs, field infos, doc values + // (and maybe others, in the future) gens. This is somewhat messy, + // since it means DV updates will suddenly write to the next gen after + // live docs' gen, for example, but we don't have the APIs to ask the + // codec which file is which: + Map maxPerSegmentGen = new HashMap<>(); + + for(String fileName : files) { + if (fileName.equals(IndexFileNames.SEGMENTS_GEN) || fileName.equals(IndexWriter.WRITE_LOCK_NAME)) { + // do nothing + } else if (fileName.startsWith(IndexFileNames.SEGMENTS)) { + try { + maxSegmentGen = Math.max(SegmentInfos.generationFromSegmentsFileName(fileName), maxSegmentGen); + } catch (NumberFormatException ignore) { + // trash file: we have to handle this since we allow anything starting with 'segments' here + } + } else { + String segmentName = IndexFileNames.parseSegmentName(fileName); + assert segmentName.startsWith("_"): "wtf? file=" + fileName; + + maxSegmentName = Math.max(maxSegmentName, Integer.parseInt(segmentName.substring(1), Character.MAX_RADIX)); + + Long curGen = maxPerSegmentGen.get(segmentName); + if (curGen == null) { + curGen = 0L; + } + + try { + curGen = Math.max(curGen, IndexFileNames.parseGeneration(fileName)); + } catch (NumberFormatException ignore) { + // trash file: we have to handle this since codec regex is only so good + } + maxPerSegmentGen.put(segmentName, curGen); + } + } + + // Generation is advanced before write: + infos.setGeneration(Math.max(infos.getGeneration(), maxSegmentGen)); + if (infos.counter < 1+maxSegmentName) { + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "init: inflate infos.counter to " + (1+maxSegmentName) + " vs current=" + infos.counter); + } + infos.counter = 1+maxSegmentName; + } + + for(SegmentCommitInfo info : infos) { + Long gen = maxPerSegmentGen.get(info.info.name); + assert gen != null; + long genLong = gen; + if (info.getNextWriteDelGen() < genLong+1) { + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "init: seg=" + info.info.name + " set nextWriteDelGen=" + (genLong+1) + " vs current=" + info.getNextWriteDelGen()); + } + info.setNextWriteDelGen(genLong+1); + } + if (info.getNextWriteFieldInfosGen() < genLong+1) { + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "init: seg=" + info.info.name + " set nextWriteFieldInfosGen=" + (genLong+1) + " vs current=" + info.getNextWriteFieldInfosGen()); + } + info.setNextWriteFieldInfosGen(genLong+1); + } + if (info.getNextWriteDocValuesGen() < genLong+1) { + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "init: seg=" + info.info.name + " set nextWriteDocValuesGen=" + (genLong+1) + " vs current=" + info.getNextWriteDocValuesGen()); + } + info.setNextWriteDocValuesGen(genLong+1); + } + } + } + + private void ensureOpen() throws AlreadyClosedException { + writer.ensureOpen(false); + // since we allow 'closing' state, we must still check this, we could be closing because we hit e.g. OOM + if (writer.tragedy != null) { + throw new AlreadyClosedException("refusing to delete any files: this IndexWriter hit an unrecoverable exception", writer.tragedy); + } + } + + public SegmentInfos getLastSegmentInfos() { + return lastSegmentInfos; + } + /** * Remove the CommitPoints in the commitsToDelete List by * DecRef'ing all files from each SegmentInfos. */ - private void deleteCommits() throws IOException { + private void deleteCommits() { int size = commitsToDelete.size(); if (size > 0) { // First decref all files that had been referred to by // the now-deleted commits: + Throwable firstThrowable = null; for(int i=0;i 0) { + policy.onCommit(commits); + deleteCommits(); + } + } + + public void deletePendingFiles() { + assert locked(); if (deletable != null) { - List oldDeletable = deletable; + Set oldDeletable = deletable; deletable = null; - int size = oldDeletable.size(); - for(int i=0;i 0) { + // LUCENE-5904: should never happen! This means we are about to pending-delete a referenced index file + assert false: "fileName=" + fileName + " is in pending delete list but also has refCount=" + rc.count; + } else { + deleteFile(fileName); + } } } } /** * For definition of "check point" see IndexWriter comments: * "Clarification: Check Points (and commits)". - * + * * Writer calls this when it has made a "consistent * change" to the index, meaning new files are written to * the index and the in-memory SegmentInfos have been @@ -378,9 +540,13 @@ * removed, we decref their files as well. */ public void checkpoint(SegmentInfos segmentInfos, boolean isCommit) throws IOException { + assert locked(); - if (infoStream != null) { - message("now checkpoint \"" + segmentInfos.getCurrentSegmentFileName() + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); + assert Thread.holdsLock(writer); + long t0 = 0; + if (infoStream.isEnabled("IFD")) { + t0 = System.nanoTime(); + infoStream.message("IFD", "now checkpoint \"" + writer.segString(writer.toLiveInfos(segmentInfos)) + "\" [" + segmentInfos.size() + " segments " + "; isCommit = " + isCommit + "]"); } // Try again now to delete any previously un-deletable @@ -400,150 +566,177 @@ // Decref files for commits that were deleted by the policy: deleteCommits(); } else { - - final List docWriterFiles; - if (docWriter != null) { - docWriterFiles = docWriter.openFiles(); - if (docWriterFiles != null) - // We must incRef these files before decRef'ing - // last files to make sure we don't accidentally - // delete them: - incRef(docWriterFiles); - } else - docWriterFiles = null; - // DecRef old files from the last checkpoint, if any: - int size = lastFiles.size(); - if (size > 0) { - for(int i=0;i files) { + assert locked(); + for(final String file : files) { + incRef(file); } } - void incRef(List files) throws IOException { - int size = files.size(); - for(int i=0;i files) { + assert locked(); + Throwable firstThrowable = null; + for(final String file : files) { + try { + decRef(file); + } catch (Throwable t) { + if (firstThrowable == null) { + // Save first exception and throw it in the end, but be sure to finish decRef all files + firstThrowable = t; + } + } } + + // NOTE: does nothing if firstThrowable is null + IOUtils.reThrowUnchecked(firstThrowable); } - void decRef(String fileName) throws IOException { + /** Decrefs all provided files, ignoring any exceptions hit; call this if + * you are already handling an exception. */ + void decRefWhileHandlingException(Collection files) { + assert locked(); + for(final String file : files) { + try { + decRef(file); + } catch (Throwable t) { + } + } + } + + void decRef(String fileName) { + assert locked(); RefCount rc = getRefCount(fileName); - if (infoStream != null && VERBOSE_REF_COUNTS) { - message(" DecRef \"" + fileName + "\": pre-decr count is " + rc.count); + if (infoStream.isEnabled("IFD")) { + if (VERBOSE_REF_COUNTS) { + infoStream.message("IFD", " DecRef \"" + fileName + "\": pre-decr count is " + rc.count); + } } if (0 == rc.DecRef()) { // This file is no longer referenced by any past // commit points nor by the in-memory SegmentInfos: - deleteFile(fileName); - refCounts.remove(fileName); + try { + deleteFile(fileName); + } finally { + refCounts.remove(fileName); + } } } void decRef(SegmentInfos segmentInfos) throws IOException { - final int size = segmentInfos.size(); - for(int i=0;i 0; } } private RefCount getRefCount(String fileName) { + assert locked(); RefCount rc; if (!refCounts.containsKey(fileName)) { - rc = new RefCount(); + rc = new RefCount(fileName); + // We should never incRef a file we are already wanting to delete: + assert deletable == null || deletable.contains(fileName) == false: "file \"" + fileName + "\" cannot be incRef'd: it's already pending delete"; refCounts.put(fileName, rc); } else { - rc = (RefCount) refCounts.get(fileName); + rc = refCounts.get(fileName); } return rc; } - void deleteFiles(List files) throws IOException { - final int size = files.size(); - for(int i=0;i files) { + assert locked(); + for(final String file: files) { + deleteFile(file); + } } - /** Delets the specified files, but only if they are new + /** Deletes the specified files, but only if they are new * (have not yet been incref'd). */ - void deleteNewFiles(Collection files) throws IOException { - final Iterator it = files.iterator(); - while(it.hasNext()) { - final String fileName = (String) it.next(); - if (!refCounts.containsKey(fileName)) + void deleteNewFiles(Collection files) { + assert locked(); + for (final String fileName: files) { + // NOTE: it's very unusual yet possible for the + // refCount to be present and 0: it can happen if you + // open IW on a crashed index, and it removes a bunch + // of unref'd files, and then you add new docs / do + // merging, and it reuses that segment name. + // TestCrash.testCrashAfterReopen can hit this: + if (!refCounts.containsKey(fileName) || refCounts.get(fileName).count == 0) { + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "delete new file \"" + fileName + "\""); + } deleteFile(fileName); + } } } - void deleteFile(String fileName) - throws IOException { + void deleteFile(String fileName) { + assert locked(); + ensureOpen(); try { - if (infoStream != null) { - message("delete \"" + fileName + "\""); + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "delete \"" + fileName + "\""); } directory.deleteFile(fileName); - } catch (IOException e) { // if delete fails - if (directory.fileExists(fileName)) { + } catch (IOException e) { // if delete fails + // Some operating systems (e.g. Windows) don't + // permit a file to be deleted while it is opened + // for read (e.g. by another process or thread). So + // we assume that when a delete fails it is because + // the file is open in another process, and queue + // the file for subsequent deletion. - // Some operating systems (e.g. Windows) don't - // permit a file to be deleted while it is opened - // for read (e.g. by another process or thread). So - // we assume that when a delete fails it is because - // the file is open in another process, and queue - // the file for subsequent deletion. - - if (infoStream != null) { - message("IndexFileDeleter: unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later."); - } - if (deletable == null) { - deletable = new ArrayList(); - } - deletable.add(fileName); // add to deletable + if (infoStream.isEnabled("IFD")) { + infoStream.message("IFD", "unable to remove file \"" + fileName + "\": " + e.toString() + "; Will re-try later."); } + if (deletable == null) { + deletable = new HashSet<>(); + } + deletable.add(fileName); // add to deletable } } @@ -552,14 +745,26 @@ */ final private static class RefCount { + // fileName used only for better assert error messages + final String fileName; + boolean initDone; + RefCount(String fileName) { + this.fileName = fileName; + } + int count; public int IncRef() { + if (!initDone) { + initDone = true; + } else { + assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-increment for file \"" + fileName + "\""; + } return ++count; } public int DecRef() { - assert count > 0; + assert count > 0: Thread.currentThread().getName() + ": RefCount is 0 pre-decrement for file \"" + fileName + "\""; return --count; } } @@ -571,85 +776,77 @@ * equals. */ - final private static class CommitPoint extends IndexCommit implements Comparable { + final private static class CommitPoint extends IndexCommit { - long gen; - List files; + Collection files; String segmentsFileName; boolean deleted; Directory directory; - Collection commitsToDelete; - long version; + Collection commitsToDelete; long generation; - final boolean isOptimized; + final Map userData; + private final int segmentCount; - public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException { + public CommitPoint(Collection commitsToDelete, Directory directory, SegmentInfos segmentInfos) throws IOException { this.directory = directory; this.commitsToDelete = commitsToDelete; - segmentsFileName = segmentInfos.getCurrentSegmentFileName(); - version = segmentInfos.getVersion(); + userData = segmentInfos.getUserData(); + segmentsFileName = segmentInfos.getSegmentsFileName(); generation = segmentInfos.getGeneration(); - int size = segmentInfos.size(); - files = new ArrayList(size); - files.add(segmentsFileName); - gen = segmentInfos.getGeneration(); - for(int i=0;i getFileNames() { + return files; } + @Override public Directory getDirectory() { return directory; } - public long getVersion() { - return version; - } - + @Override public long getGeneration() { return generation; } + @Override + public Map getUserData() { + return userData; + } + /** * Called only be the deletion policy, to remove this * commit point from the index. */ + @Override public void delete() { if (!deleted) { deleted = true; commitsToDelete.add(this); } } + @Override public boolean isDeleted() { return deleted; } - - public int compareTo(Object obj) { - CommitPoint commit = (CommitPoint) obj; - if (gen < commit.gen) { - return -1; - } else if (gen > commit.gen) { - return 1; - } else { - return 0; - } - } } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexFileNameFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/IndexFileNames.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/IndexFileNames.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/IndexFileNames.java 17 Aug 2012 14:55:01 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/IndexFileNames.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,182 +17,222 @@ * limitations under the License. */ +import java.util.regex.Pattern; + +import org.apache.lucene.codecs.Codec; + +// TODO: put all files under codec and remove all the static extensions here + /** - * Useful constants representing filenames and extensions used by lucene + * This class contains useful constants representing filenames and extensions + * used by lucene, as well as convenience methods for querying whether a file + * name matches an extension ({@link #matchesExtension(String, String) + * matchesExtension}), as well as generating file names from a segment name, + * generation and extension ( + * {@link #fileNameFromGeneration(String, String, long) fileNameFromGeneration}, + * {@link #segmentFileName(String, String, String) segmentFileName}). * - * @version $rcs = ' $Id$ ' ; + *

    NOTE: extensions used by codecs are not + * listed here. You must interact with the {@link Codec} + * directly. + * + * @lucene.internal */ -final class IndexFileNames { +public final class IndexFileNames { + + /** No instance */ + private IndexFileNames() {} + /** Name of the index segment file */ - static final String SEGMENTS = "segments"; + public static final String SEGMENTS = "segments"; - /** Name of the generation reference file name */ - static final String SEGMENTS_GEN = "segments.gen"; + /** Extension of gen file */ + public static final String GEN_EXTENSION = "gen"; - /** Name of the index deletable file (only used in - * pre-lockless indices) */ - static final String DELETABLE = "deletable"; - - /** Extension of norms file */ - static final String NORMS_EXTENSION = "nrm"; + /** Name of the generation reference file name */ + public static final String SEGMENTS_GEN = "segments." + GEN_EXTENSION; - /** Extension of freq postings file */ - static final String FREQ_EXTENSION = "frq"; - - /** Extension of prox postings file */ - static final String PROX_EXTENSION = "prx"; - - /** Extension of terms file */ - static final String TERMS_EXTENSION = "tis"; - - /** Extension of terms index file */ - static final String TERMS_INDEX_EXTENSION = "tii"; - - /** Extension of stored fields index file */ - static final String FIELDS_INDEX_EXTENSION = "fdx"; - - /** Extension of stored fields file */ - static final String FIELDS_EXTENSION = "fdt"; - - /** Extension of vectors fields file */ - static final String VECTORS_FIELDS_EXTENSION = "tvf"; - - /** Extension of vectors documents file */ - static final String VECTORS_DOCUMENTS_EXTENSION = "tvd"; - - /** Extension of vectors index file */ - static final String VECTORS_INDEX_EXTENSION = "tvx"; - /** Extension of compound file */ - static final String COMPOUND_FILE_EXTENSION = "cfs"; + public static final String COMPOUND_FILE_EXTENSION = "cfs"; + + /** Extension of compound file entries */ + public static final String COMPOUND_FILE_ENTRIES_EXTENSION = "cfe"; - /** Extension of compound file for doc store files*/ - static final String COMPOUND_FILE_STORE_EXTENSION = "cfx"; - - /** Extension of deletes */ - static final String DELETES_EXTENSION = "del"; - - /** Extension of field infos */ - static final String FIELD_INFOS_EXTENSION = "fnm"; - - /** Extension of plain norms */ - static final String PLAIN_NORMS_EXTENSION = "f"; - - /** Extension of separate norms */ - static final String SEPARATE_NORMS_EXTENSION = "s"; - - /** Extension of gen file */ - static final String GEN_EXTENSION = "gen"; - /** * This array contains all filename extensions used by - * Lucene's index files, with two exceptions, namely the - * extension made up from .f + a number and - * from .s + a number. Also note that - * Lucene's segments_N files do not have any - * filename extension. + * Lucene's index files, with one exception, namely the + * extension made up from .s + a number. + * Also note that Lucene's segments_N files + * do not have any filename extension. */ - static final String INDEX_EXTENSIONS[] = new String[] { + public static final String INDEX_EXTENSIONS[] = new String[] { COMPOUND_FILE_EXTENSION, - FIELD_INFOS_EXTENSION, - FIELDS_INDEX_EXTENSION, - FIELDS_EXTENSION, - TERMS_INDEX_EXTENSION, - TERMS_EXTENSION, - FREQ_EXTENSION, - PROX_EXTENSION, - DELETES_EXTENSION, - VECTORS_INDEX_EXTENSION, - VECTORS_DOCUMENTS_EXTENSION, - VECTORS_FIELDS_EXTENSION, + COMPOUND_FILE_ENTRIES_EXTENSION, GEN_EXTENSION, - NORMS_EXTENSION, - COMPOUND_FILE_STORE_EXTENSION, }; - /** File extensions that are added to a compound file - * (same as above, minus "del", "gen", "cfs"). */ - static final String[] INDEX_EXTENSIONS_IN_COMPOUND_FILE = new String[] { - FIELD_INFOS_EXTENSION, - FIELDS_INDEX_EXTENSION, - FIELDS_EXTENSION, - TERMS_INDEX_EXTENSION, - TERMS_EXTENSION, - FREQ_EXTENSION, - PROX_EXTENSION, - VECTORS_INDEX_EXTENSION, - VECTORS_DOCUMENTS_EXTENSION, - VECTORS_FIELDS_EXTENSION, - NORMS_EXTENSION - }; + /** + * Computes the full file name from base, extension and generation. If the + * generation is -1, the file name is null. If it's 0, the file name is + * <base>.<ext>. If it's > 0, the file name is + * <base>_<gen>.<ext>.
    + * NOTE: .<ext> is added to the name only if ext is + * not an empty string. + * + * @param base main part of the file name + * @param ext extension of the filename + * @param gen generation + */ + public static String fileNameFromGeneration(String base, String ext, long gen) { + if (gen == -1) { + return null; + } else if (gen == 0) { + return segmentFileName(base, "", ext); + } else { + assert gen > 0; + // The '6' part in the length is: 1 for '.', 1 for '_' and 4 as estimate + // to the gen length as string (hopefully an upper limit so SB won't + // expand in the middle. + StringBuilder res = new StringBuilder(base.length() + 6 + ext.length()) + .append(base).append('_').append(Long.toString(gen, Character.MAX_RADIX)); + if (ext.length() > 0) { + res.append('.').append(ext); + } + return res.toString(); + } + } - static final String[] STORE_INDEX_EXTENSIONS = new String[] { - VECTORS_INDEX_EXTENSION, - VECTORS_FIELDS_EXTENSION, - VECTORS_DOCUMENTS_EXTENSION, - FIELDS_INDEX_EXTENSION, - FIELDS_EXTENSION - }; + /** + * Returns a file name that includes the given segment name, your own custom + * name and extension. The format of the filename is: + * <segmentName>(_<name>)(.<ext>). + *

    + * NOTE: .<ext> is added to the result file name only if + * ext is not empty. + *

    + * NOTE: _<segmentSuffix> is added to the result file name only if + * it's not the empty string + *

    + * NOTE: all custom files should be named using this method, or + * otherwise some structures may fail to handle them properly (such as if they + * are added to compound files). + */ + public static String segmentFileName(String segmentName, String segmentSuffix, String ext) { + if (ext.length() > 0 || segmentSuffix.length() > 0) { + assert !ext.startsWith("."); + StringBuilder sb = new StringBuilder(segmentName.length() + 2 + segmentSuffix.length() + ext.length()); + sb.append(segmentName); + if (segmentSuffix.length() > 0) { + sb.append('_').append(segmentSuffix); + } + if (ext.length() > 0) { + sb.append('.').append(ext); + } + return sb.toString(); + } else { + return segmentName; + } + } - static final String[] NON_STORE_INDEX_EXTENSIONS = new String[] { - FIELD_INFOS_EXTENSION, - FREQ_EXTENSION, - PROX_EXTENSION, - TERMS_EXTENSION, - TERMS_INDEX_EXTENSION, - NORMS_EXTENSION - }; + /** + * Returns true if the given filename ends with the given extension. One + * should provide a pure extension, without '.'. + */ + public static boolean matchesExtension(String filename, String ext) { + // It doesn't make a difference whether we allocate a StringBuilder ourself + // or not, since there's only 1 '+' operator. + return filename.endsWith("." + ext); + } + + /** locates the boundary of the segment name, or -1 */ + private static int indexOfSegmentName(String filename) { + // If it is a .del file, there's an '_' after the first character + int idx = filename.indexOf('_', 1); + if (idx == -1) { + // If it's not, strip everything that's before the '.' + idx = filename.indexOf('.'); + } + return idx; + } - /** File extensions of old-style index files */ - static final String COMPOUND_EXTENSIONS[] = new String[] { - FIELD_INFOS_EXTENSION, - FREQ_EXTENSION, - PROX_EXTENSION, - FIELDS_INDEX_EXTENSION, - FIELDS_EXTENSION, - TERMS_INDEX_EXTENSION, - TERMS_EXTENSION - }; + /** + * Strips the segment name out of the given file name. If you used + * {@link #segmentFileName} or {@link #fileNameFromGeneration} to create your + * files, then this method simply removes whatever comes before the first '.', + * or the second '_' (excluding both). + * + * @return the filename with the segment name removed, or the given filename + * if it does not contain a '.' and '_'. + */ + public static String stripSegmentName(String filename) { + int idx = indexOfSegmentName(filename); + if (idx != -1) { + filename = filename.substring(idx); + } + return filename; + } + + /** Returns the generation from this file name, or 0 if there is no + * generation. */ + public static long parseGeneration(String filename) { + assert filename.startsWith("_"); + String parts[] = stripExtension(filename).substring(1).split("_"); + // 4 cases: + // segment.ext + // segment_gen.ext + // segment_codec_suffix.ext + // segment_gen_codec_suffix.ext + if (parts.length == 2 || parts.length == 4) { + return Long.parseLong(parts[1], Character.MAX_RADIX); + } else { + return 0; + } + } - /** File extensions for term vector support */ - static final String VECTOR_EXTENSIONS[] = new String[] { - VECTORS_INDEX_EXTENSION, - VECTORS_DOCUMENTS_EXTENSION, - VECTORS_FIELDS_EXTENSION - }; + /** + * Parses the segment name out of the given file name. + * + * @return the segment name only, or filename + * if it does not contain a '.' and '_'. + */ + public static String parseSegmentName(String filename) { + int idx = indexOfSegmentName(filename); + if (idx != -1) { + filename = filename.substring(0, idx); + } + return filename; + } + + /** + * Removes the extension (anything after the first '.'), + * otherwise returns the original filename. + */ + public static String stripExtension(String filename) { + int idx = filename.indexOf('.'); + if (idx != -1) { + filename = filename.substring(0, idx); + } + return filename; + } /** - * Computes the full file name from base, extension and - * generation. If the generation is -1, the file name is - * null. If it's 0, the file name is . - * If it's > 0, the file name is _. - * - * @param base -- main part of the file name - * @param extension -- extension of the filename (including .) - * @param gen -- generation + * Return the extension (anything after the first '.'), + * or null if there is no '.' in the file name. */ - static final String fileNameFromGeneration(String base, String extension, long gen) { - if (gen == SegmentInfo.NO) { + public static String getExtension(String filename) { + final int idx = filename.indexOf('.'); + if (idx == -1) { return null; - } else if (gen == SegmentInfo.WITHOUT_GEN) { - return base + extension; } else { - return base + "_" + Long.toString(gen, Character.MAX_RADIX) + extension; + return filename.substring(idx + 1, filename.length()); } } /** - * Returns true if the provided filename is one of the doc - * store files (ends with an extension in - * STORE_INDEX_EXTENSIONS). + * All files created by codecs much match this pattern (checked in + * SegmentInfo). */ - static final boolean isDocStoreFile(String fileName) { - if (fileName.endsWith(COMPOUND_FILE_STORE_EXTENSION)) - return true; - for(int i=0;i Concrete subclasses of IndexReader are usually constructed with a call to - one of the static open() methods, e.g. {@link #open(String)}. +

    There are two different types of IndexReaders: +

      +
    • {@link AtomicReader}: These indexes do not consist of several sub-readers, + they are atomic. They support retrieval of stored fields, doc values, terms, + and postings. +
    • {@link CompositeReader}: Instances (like {@link DirectoryReader}) + of this reader can only + be used to get stored fields from the underlying AtomicReaders, + but it is not possible to directly retrieve postings. To do that, get + the sub-readers via {@link CompositeReader#getSequentialSubReaders}. + Alternatively, you can mimic an {@link AtomicReader} (with a serious slowdown), + by wrapping composite readers with {@link SlowCompositeReaderWrapper}. +
    + +

    IndexReader instances for indexes on disk are usually constructed + with a call to one of the static DirectoryReader.open() methods, + e.g. {@link DirectoryReader#open(org.apache.lucene.store.Directory)}. {@link DirectoryReader} implements + the {@link CompositeReader} interface, it is not possible to directly get postings.

    For efficiency, in this API documents are often referred to via document numbers, non-negative integers which each name a unique - document in the index. These document numbers are ephemeral--they may change + document in the index. These document numbers are ephemeral -- they may change as documents are added to and deleted from an index. Clients should thus not rely on a given document having the same number between sessions. -

    An IndexReader can be opened on a directory for which an IndexWriter is - opened already, but it cannot be used to delete documents from the index then. -

    - NOTE: for backwards API compatibility, several methods are not listed - as abstract, but have no useful implementations in this base class and - instead always throw UnsupportedOperationException. Subclasses are - strongly encouraged to override these methods, but in many cases may not - need to. -

    +

    NOTE: {@link + IndexReader} instances are completely thread + safe, meaning multiple threads can call any of its methods, + concurrently. If your application requires external + synchronization, you should not synchronize on the + IndexReader instance; use your own + (non-Lucene) objects instead. +*/ +public abstract class IndexReader implements Closeable { + + private boolean closed = false; + private boolean closedByChild = false; + private final AtomicInteger refCount = new AtomicInteger(1); -

    + IndexReader() { + if (!(this instanceof CompositeReader || this instanceof AtomicReader)) + throw new Error("IndexReader should never be directly extended, subclass AtomicReader or CompositeReader instead."); + } + + /** + * A custom listener that's invoked when the IndexReader + * is closed. + * + * @lucene.experimental + */ + public static interface ReaderClosedListener { + /** Invoked when the {@link IndexReader} is closed. */ + public void onClose(IndexReader reader); + } - NOTE: as of 2.4, it's possible to open a read-only - IndexReader using one of the static open methods that - accepts the boolean readOnly parameter. Such a reader has - better concurrency as it's not necessary to synchronize on - the isDeleted method. Currently the default for readOnly - is false, meaning if not specified you will get a - read/write IndexReader. But in 3.0 this default will - change to true, meaning you must explicitly specify false - if you want to make changes with the resulting IndexReader. -

    + private final Set readerClosedListeners = + Collections.synchronizedSet(new LinkedHashSet()); - @version $Id$ -*/ -public abstract class IndexReader { + private final Set parentReaders = + Collections.synchronizedSet(Collections.newSetFromMap(new WeakHashMap())); - // NOTE: in 3.0 this will change to true - final static boolean READ_ONLY_DEFAULT = false; + /** Expert: adds a {@link ReaderClosedListener}. The + * provided listener will be invoked when this reader is closed. + * At this point, it is safe for apps to evict this reader from + * any caches keyed on {@link #getCombinedCoreAndDeletesKey()}. + * + * @lucene.experimental */ + public final void addReaderClosedListener(ReaderClosedListener listener) { + ensureOpen(); + readerClosedListeners.add(listener); + } - /** - * Constants describing field properties, for example used for - * {@link IndexReader#getFieldNames(FieldOption)}. - */ - public static final class FieldOption { - private String option; - private FieldOption() { } - private FieldOption(String option) { - this.option = option; + /** Expert: remove a previously added {@link ReaderClosedListener}. + * + * @lucene.experimental */ + public final void removeReaderClosedListener(ReaderClosedListener listener) { + ensureOpen(); + readerClosedListeners.remove(listener); + } + + /** Expert: This method is called by {@code IndexReader}s which wrap other readers + * (e.g. {@link CompositeReader} or {@link FilterAtomicReader}) to register the parent + * at the child (this reader) on construction of the parent. When this reader is closed, + * it will mark all registered parents as closed, too. The references to parent readers + * are weak only, so they can be GCed once they are no longer in use. + * @lucene.experimental */ + public final void registerParentReader(IndexReader reader) { + ensureOpen(); + parentReaders.add(reader); + } + + private void notifyReaderClosedListeners(Throwable th) { + synchronized(readerClosedListeners) { + for(ReaderClosedListener listener : readerClosedListeners) { + try { + listener.onClose(this); + } catch (Throwable t) { + if (th == null) { + th = t; + } else { + th.addSuppressed(t); + } + } + } + IOUtils.reThrowUnchecked(th); } - public String toString() { - return this.option; + } + + private void reportCloseToParentReaders() { + synchronized(parentReaders) { + for(IndexReader parent : parentReaders) { + parent.closedByChild = true; + // cross memory barrier by a fake write: + parent.refCount.addAndGet(0); + // recurse: + parent.reportCloseToParentReaders(); + } } - /** All fields */ - public static final FieldOption ALL = new FieldOption ("ALL"); - /** All indexed fields */ - public static final FieldOption INDEXED = new FieldOption ("INDEXED"); - /** All fields that store payloads */ - public static final FieldOption STORES_PAYLOADS = new FieldOption ("STORES_PAYLOADS"); - /** All fields that omit tf */ - public static final FieldOption OMIT_TF = new FieldOption ("OMIT_TF"); - /** All fields which are not indexed */ - public static final FieldOption UNINDEXED = new FieldOption ("UNINDEXED"); - /** All fields which are indexed with termvectors enabled */ - public static final FieldOption INDEXED_WITH_TERMVECTOR = new FieldOption ("INDEXED_WITH_TERMVECTOR"); - /** All fields which are indexed but don't have termvectors enabled */ - public static final FieldOption INDEXED_NO_TERMVECTOR = new FieldOption ("INDEXED_NO_TERMVECTOR"); - /** All fields with termvectors enabled. Please note that only standard termvector fields are returned */ - public static final FieldOption TERMVECTOR = new FieldOption ("TERMVECTOR"); - /** All fields with termvectors with position values enabled */ - public static final FieldOption TERMVECTOR_WITH_POSITION = new FieldOption ("TERMVECTOR_WITH_POSITION"); - /** All fields with termvectors with offset values enabled */ - public static final FieldOption TERMVECTOR_WITH_OFFSET = new FieldOption ("TERMVECTOR_WITH_OFFSET"); - /** All fields with termvectors with offset values and position values enabled */ - public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET"); } - private boolean closed; - protected boolean hasChanges; - - private volatile int refCount; - - // for testing - synchronized int getRefCount() { - return refCount; + /** Expert: returns the current refCount for this reader */ + public final int getRefCount() { + // NOTE: don't ensureOpen, so that callers can see + // refCount is 0 (reader is closed) + return refCount.get(); } /** @@ -132,487 +189,230 @@ * references. * * @see #decRef + * @see #tryIncRef */ - public synchronized void incRef() { - assert refCount > 0; - ensureOpen(); - refCount++; + public final void incRef() { + if (!tryIncRef()) { + ensureOpen(); + } } - + /** - * Expert: decreases the refCount of this IndexReader - * instance. If the refCount drops to 0, then pending - * changes (if any) are committed to the index and this - * reader is closed. - * - * @throws IOException in case an IOException occurs in commit() or doClose() + * Expert: increments the refCount of this IndexReader + * instance only if the IndexReader has not been closed yet + * and returns true iff the refCount was + * successfully incremented, otherwise false. + * If this method returns false the reader is either + * already closed or is currently being closed. Either way this + * reader instance shouldn't be used by an application unless + * true is returned. + *

    + * RefCounts are used to determine when a + * reader can be closed safely, i.e. as soon as there are + * no more references. Be sure to always call a + * corresponding {@link #decRef}, in a finally clause; + * otherwise the reader may never be closed. Note that + * {@link #close} simply calls decRef(), which means that + * the IndexReader will not really be closed until {@link + * #decRef} has been called for all outstanding + * references. * + * @see #decRef * @see #incRef */ - public synchronized void decRef() throws IOException { - assert refCount > 0; - ensureOpen(); - if (refCount == 1) { - commit(); - doClose(); + public final boolean tryIncRef() { + int count; + while ((count = refCount.get()) > 0) { + if (refCount.compareAndSet(count, count+1)) { + return true; + } } - refCount--; + return false; } - - /** - * @deprecated will be deleted when IndexReader(Directory) is deleted - * @see #directory() - */ - private Directory directory; /** - * Legacy Constructor for backwards compatibility. + * Expert: decreases the refCount of this IndexReader + * instance. If the refCount drops to 0, then this + * reader is closed. If an exception is hit, the refCount + * is unchanged. * - *

    - * This Constructor should not be used, it exists for backwards - * compatibility only to support legacy subclasses that did not "own" - * a specific directory, but needed to specify something to be returned - * by the directory() method. Future subclasses should delegate to the - * no arg constructor and implement the directory() method as appropriate. - * - * @param directory Directory to be returned by the directory() method - * @see #directory() - * @deprecated - use IndexReader() + * @throws IOException in case an IOException occurs in doClose() + * + * @see #incRef */ - protected IndexReader(Directory directory) { - this(); - this.directory = directory; + public final void decRef() throws IOException { + // only check refcount here (don't call ensureOpen()), so we can + // still close the reader if it was made invalid by a child: + if (refCount.get() <= 0) { + throw new AlreadyClosedException("this IndexReader is closed"); + } + + final int rc = refCount.decrementAndGet(); + if (rc == 0) { + closed = true; + Throwable throwable = null; + try { + doClose(); + } catch (Throwable th) { + throwable = th; + } finally { + try { + reportCloseToParentReaders(); + } finally { + notifyReaderClosedListeners(throwable); + } + } + } else if (rc < 0) { + throw new IllegalStateException("too many decRef calls: refCount is " + rc + " after decrement"); + } } - protected IndexReader() { - refCount = 1; - } - /** - * @throws AlreadyClosedException if this IndexReader is closed + * Throws AlreadyClosedException if this IndexReader or any + * of its child readers is closed, otherwise returns. */ protected final void ensureOpen() throws AlreadyClosedException { - if (refCount <= 0) { + if (refCount.get() <= 0) { throw new AlreadyClosedException("this IndexReader is closed"); } + // the happens before rule on reading the refCount, which must be after the fake write, + // ensures that we see the value: + if (closedByChild) { + throw new AlreadyClosedException("this IndexReader cannot be used anymore as one of its child readers was closed"); + } } - - /** Returns a read/write IndexReader reading the index in an FSDirectory in the named - path. NOTE: starting in 3.0 this will return a readOnly IndexReader. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @param path the path to the index directory */ - public static IndexReader open(String path) throws CorruptIndexException, IOException { - return open(FSDirectory.getDirectory(path), true, null, null, READ_ONLY_DEFAULT); - } - - /** Returns a read/write IndexReader reading the index in an FSDirectory in the named - * path. NOTE: starting in 3.0 this will return a readOnly IndexReader. - * @param path the path to the index directory - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + + /** {@inheritDoc} + *

    For caching purposes, {@code IndexReader} subclasses are not allowed + * to implement equals/hashCode, so methods are declared final. + * To lookup instances from caches use {@link #getCoreCacheKey} and + * {@link #getCombinedCoreAndDeletesKey}. */ - public static IndexReader open(File path) throws CorruptIndexException, IOException { - return open(FSDirectory.getDirectory(path), true, null, null, READ_ONLY_DEFAULT); + @Override + public final boolean equals(Object obj) { + return (this == obj); } - - /** Returns a read/write IndexReader reading the index in - * the given Directory. NOTE: starting in 3.0 this - * will return a readOnly IndexReader. - * @param directory the index directory - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + + /** {@inheritDoc} + *

    For caching purposes, {@code IndexReader} subclasses are not allowed + * to implement equals/hashCode, so methods are declared final. + * To lookup instances from caches use {@link #getCoreCacheKey} and + * {@link #getCombinedCoreAndDeletesKey}. */ - public static IndexReader open(final Directory directory) throws CorruptIndexException, IOException { - return open(directory, false, null, null, READ_ONLY_DEFAULT); + @Override + public final int hashCode() { + return System.identityHashCode(this); } - - /** Returns a read/write or read only IndexReader reading the index in the given Directory. + + /** Returns a IndexReader reading the index in the given + * Directory * @param directory the index directory - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error + * @deprecated Use {@link DirectoryReader#open(Directory)} */ - public static IndexReader open(final Directory directory, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, false, null, null, readOnly); + @Deprecated + public static DirectoryReader open(final Directory directory) throws IOException { + return DirectoryReader.open(directory); } - - /** Expert: returns a read/write IndexReader reading the index in the given - * {@link IndexCommit}. NOTE: starting in 3.0 this - * will return a readOnly IndexReader. - * @param commit the commit point to open - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final IndexCommit commit) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), false, null, commit, READ_ONLY_DEFAULT); - } - - /** Expert: returns a read/write IndexReader reading the index in the given - * Directory, with a custom {@link IndexDeletionPolicy}. - * NOTE: starting in 3.0 this will return a - * readOnly IndexReader. + + /** Expert: Returns a IndexReader reading the index in the given + * Directory with the given termInfosIndexDivisor. * @param directory the index directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @throws CorruptIndexException if the index is corrupt + * @param termInfosIndexDivisor Subsamples which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriterConfig#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. * @throws IOException if there is a low-level IO error + * @deprecated Use {@link DirectoryReader#open(Directory,int)} */ - public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(directory, false, deletionPolicy, null, READ_ONLY_DEFAULT); + @Deprecated + public static DirectoryReader open(final Directory directory, int termInfosIndexDivisor) throws IOException { + return DirectoryReader.open(directory, termInfosIndexDivisor); } - - /** Expert: returns a read/write or read only IndexReader reading the index in the given - * Directory, with a custom {@link IndexDeletionPolicy}. - * NOTE: starting in 3.0 this will return a - * readOnly IndexReader. - * @param directory the index directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final Directory directory, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(directory, false, deletionPolicy, null, readOnly); - } - - /** Expert: returns a read/write IndexReader reading the index in the given - * Directory, using a specific commit and with a custom - * {@link IndexDeletionPolicy}. NOTE: starting in - * 3.0 this will return a readOnly IndexReader. - * @param commit the specific {@link IndexCommit} to open; - * see {@link IndexReader#listCommits} to list all commits - * in a directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), false, deletionPolicy, commit, READ_ONLY_DEFAULT); - } - - /** Expert: returns a read/write or read only IndexReader reading the index in the given - * Directory, using a specific commit and with a custom {@link IndexDeletionPolicy}. - * @param commit the specific {@link IndexCommit} to open; - * see {@link IndexReader#listCommits} to list all commits - * in a directory - * @param deletionPolicy a custom deletion policy (only used - * if you use this reader to perform deletes or to set - * norms); see {@link IndexWriter} for details. - * @param readOnly true if no changes (deletions, norms) will be made with this IndexReader - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static IndexReader open(final IndexCommit commit, IndexDeletionPolicy deletionPolicy, boolean readOnly) throws CorruptIndexException, IOException { - return open(commit.getDirectory(), false, deletionPolicy, commit, readOnly); - } - - private static IndexReader open(final Directory directory, final boolean closeDirectory, final IndexDeletionPolicy deletionPolicy, final IndexCommit commit, final boolean readOnly) throws CorruptIndexException, IOException { - return DirectoryIndexReader.open(directory, closeDirectory, deletionPolicy, commit, readOnly); - } - + /** - * Refreshes an IndexReader if the index has changed since this instance - * was (re)opened. - *

    - * Opening an IndexReader is an expensive operation. This method can be used - * to refresh an existing IndexReader to reduce these costs. This method - * tries to only load segments that have changed or were created after the - * IndexReader was (re)opened. - *

    - * If the index has not changed since this instance was (re)opened, then this - * call is a NOOP and returns this instance. Otherwise, a new instance is - * returned. The old instance is not closed and remains usable.
    - * Note: The re-opened reader instance and the old instance might share - * the same resources. For this reason no index modification operations - * (e. g. {@link #deleteDocument(int)}, {@link #setNorm(int, String, byte)}) - * should be performed using one of the readers until the old reader instance - * is closed. Otherwise, the behavior of the readers is undefined. - *

    - * You can determine whether a reader was actually reopened by comparing the - * old instance with the instance returned by this method: - *

    -   * IndexReader reader = ... 
    -   * ...
    -   * IndexReader new = r.reopen();
    -   * if (new != reader) {
    -   *   ...     // reader was reopened
    -   *   reader.close(); 
    -   * }
    -   * reader = new;
    -   * ...
    -   * 
    - * - * @throws CorruptIndexException if the index is corrupt + * Open a near real time IndexReader from the {@link org.apache.lucene.index.IndexWriter}. + * + * @param writer The IndexWriter to open from + * @param applyAllDeletes If true, all buffered deletes will + * be applied (made visible) in the returned reader. If + * false, the deletes are not applied but remain buffered + * (in IndexWriter) so that they will be applied in the + * future. Applying deletes can be costly, so if your app + * can tolerate deleted documents being returned you might + * gain some performance by passing false. + * @return The new IndexReader * @throws IOException if there is a low-level IO error - */ - public synchronized IndexReader reopen() throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not support reopen()."); - } - - /** - * Returns the directory associated with this index. The Default - * implementation returns the directory specified by subclasses when - * delegating to the IndexReader(Directory) constructor, or throws an - * UnsupportedOperationException if one was not specified. - * @throws UnsupportedOperationException if no directory + * + * @see DirectoryReader#openIfChanged(DirectoryReader,IndexWriter,boolean) + * + * @lucene.experimental + * @deprecated Use {@link DirectoryReader#open(IndexWriter,boolean)} */ - public Directory directory() { - ensureOpen(); - if (null != directory) { - return directory; - } else { - throw new UnsupportedOperationException("This reader does not support this method."); - } + @Deprecated + public static DirectoryReader open(final IndexWriter writer, boolean applyAllDeletes) throws IOException { + return DirectoryReader.open(writer, applyAllDeletes); } - /** - * Returns the time the index in the named directory was last modified. - * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. - * @throws CorruptIndexException if the index is corrupt + /** Expert: returns an IndexReader reading the index in the given + * {@link IndexCommit}. + * @param commit the commit point to open * @throws IOException if there is a low-level IO error + * @deprecated Use {@link DirectoryReader#open(IndexCommit)} */ - public static long lastModified(String directory) throws CorruptIndexException, IOException { - return lastModified(new File(directory)); + @Deprecated + public static DirectoryReader open(final IndexCommit commit) throws IOException { + return DirectoryReader.open(commit); } - /** - * Returns the time the index in the named directory was last modified. - * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long lastModified(File fileDirectory) throws CorruptIndexException, IOException { - return ((Long) new SegmentInfos.FindSegmentsFile(fileDirectory) { - public Object doBody(String segmentFileName) { - return new Long(FSDirectory.fileModified(fileDirectory, segmentFileName)); - } - }.run()).longValue(); - } - /** - * Returns the time the index in the named directory was last modified. - * Do not use this to check whether the reader is still up-to-date, use - * {@link #isCurrent()} instead. - * @throws CorruptIndexException if the index is corrupt + /** Expert: returns an IndexReader reading the index in the given + * {@link IndexCommit} and termInfosIndexDivisor. + * @param commit the commit point to open + * @param termInfosIndexDivisor Subsamples which indexed + * terms are loaded into RAM. This has the same effect as {@link + * IndexWriterConfig#setTermIndexInterval} except that setting + * must be done at indexing time while this setting can be + * set per reader. When set to N, then one in every + * N*termIndexInterval terms in the index is loaded into + * memory. By setting this to a value > 1 you can reduce + * memory usage, at the expense of higher latency when + * loading a TermInfo. The default value is 1. Set this + * to -1 to skip loading the terms index entirely. * @throws IOException if there is a low-level IO error + * @deprecated Use {@link DirectoryReader#open(IndexCommit,int)} */ - public static long lastModified(final Directory directory2) throws CorruptIndexException, IOException { - return ((Long) new SegmentInfos.FindSegmentsFile(directory2) { - public Object doBody(String segmentFileName) throws IOException { - return new Long(directory2.fileModified(segmentFileName)); - } - }.run()).longValue(); + @Deprecated + public static DirectoryReader open(final IndexCommit commit, int termInfosIndexDivisor) throws IOException { + return DirectoryReader.open(commit, termInfosIndexDivisor); } - /** - * Reads version number from segments files. The version number is - * initialized with a timestamp and then increased by one for each change of - * the index. - * - * @param directory where the index resides. - * @return version number. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long getCurrentVersion(String directory) throws CorruptIndexException, IOException { - return getCurrentVersion(new File(directory)); - } - - /** - * Reads version number from segments files. The version number is - * initialized with a timestamp and then increased by one for each change of - * the index. - * - * @param directory where the index resides. - * @return version number. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long getCurrentVersion(File directory) throws CorruptIndexException, IOException { - Directory dir = FSDirectory.getDirectory(directory); - long version = getCurrentVersion(dir); - dir.close(); - return version; - } - - /** - * Reads version number from segments files. The version number is - * initialized with a timestamp and then increased by one for each change of - * the index. - * - * @param directory where the index resides. - * @return version number. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long getCurrentVersion(Directory directory) throws CorruptIndexException, IOException { - return SegmentInfos.readCurrentVersion(directory); - } - - /** - * Version number when this IndexReader was opened. Not implemented in the IndexReader base class. - * @throws UnsupportedOperationException unless overridden in subclass - */ - public long getVersion() { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /**

    For IndexReader implementations that use - * TermInfosReader to read terms, this sets the - * indexDivisor to subsample the number of indexed terms - * loaded into memory. This has the same effect as {@link - * IndexWriter#setTermIndexInterval} except that setting - * must be done at indexing time while this setting can be - * set per reader. When set to N, then one in every - * N*termIndexInterval terms in the index is loaded into - * memory. By setting this to a value > 1 you can reduce - * memory usage, at the expense of higher latency when - * loading a TermInfo. The default value is 1.

    - * - * NOTE: you must call this before the term - * index is loaded. If the index is already loaded, - * an IllegalStateException is thrown. - * @throws IllegalStateException if the term index has already been loaded into memory - */ - public void setTermInfosIndexDivisor(int indexDivisor) throws IllegalStateException { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /**

    For IndexReader implementations that use - * TermInfosReader to read terms, this returns the - * current indexDivisor. - * @see #setTermInfosIndexDivisor */ - public int getTermInfosIndexDivisor() { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Check whether this IndexReader is still using the - * current (i.e., most recently committed) version of the - * index. If a writer has committed any changes to the - * index since this reader was opened, this will return - * false, in which case you must open a new - * IndexReader in order to see the changes. See the - * description of the autoCommit - * flag which controls when the {@link IndexWriter} - * actually commits changes to the index. - * - *

    - * Not implemented in the IndexReader base class. - *

    - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @throws UnsupportedOperationException unless overridden in subclass - */ - public boolean isCurrent() throws CorruptIndexException, IOException { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Checks is the index is optimized (if it has a single segment and - * no deletions). Not implemented in the IndexReader base class. - * @return true if the index is optimized; false otherwise - * @throws UnsupportedOperationException unless overridden in subclass - */ - public boolean isOptimized() { - throw new UnsupportedOperationException("This reader does not support this method."); - } - - /** - * Return an array of term frequency vectors for the specified document. - * The array contains a vector for each vectorized field in the document. - * Each vector contains terms and frequencies for all terms in a given vectorized field. - * If no such fields existed, the method returns null. The term vectors that are - * returned my either be of type TermFreqVector or of type TermPositionsVector if - * positions or offsets have been stored. - * - * @param docNumber document for which term frequency vectors are returned - * @return array of term frequency vectors. May be null if no term vectors have been - * stored for the specified document. - * @throws IOException if index cannot be accessed - * @see org.apache.lucene.document.Field.TermVector - */ - abstract public TermFreqVector[] getTermFreqVectors(int docNumber) + /** Retrieve term vectors for this document, or null if + * term vectors were not indexed. The returned Fields + * instance acts like a single-document inverted index + * (the docID will be 0). */ + public abstract Fields getTermVectors(int docID) throws IOException; - - /** - * Return a term frequency vector for the specified document and field. The - * returned vector contains terms and frequencies for the terms in - * the specified field of this document, if the field had the storeTermVector - * flag set. If termvectors had been stored with positions or offsets, a - * TermPositionsVector is returned. - * - * @param docNumber document for which the term frequency vector is returned - * @param field field for which the term frequency vector is returned. - * @return term frequency vector May be null if field does not exist in the specified - * document or term vector was not stored. - * @throws IOException if index cannot be accessed - * @see org.apache.lucene.document.Field.TermVector - */ - abstract public TermFreqVector getTermFreqVector(int docNumber, String field) - throws IOException; - - /** - * Load the Term Vector into a user-defined data structure instead of relying on the parallel arrays of - * the {@link TermFreqVector}. - * @param docNumber The number of the document to load the vector for - * @param field The name of the field to load - * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null - * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. - * - */ - abstract public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException; - - /** - * Map all the term vectors for all fields in a Document - * @param docNumber The number of the document to load the vector for - * @param mapper The {@link TermVectorMapper} to process the vector. Must not be null - * @throws IOException if term vectors cannot be accessed or if they do not exist on the field and doc. specified. - */ - abstract public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException; - - /** - * Returns true if an index exists at the specified directory. - * If the directory does not exist or if there is no index in it. - * false is returned. - * @param directory the directory to check for an index - * @return true if an index exists; false otherwise - */ - public static boolean indexExists(String directory) { - return indexExists(new File(directory)); + /** Retrieve term vector for this document and field, or + * null if term vectors were not indexed. The returned + * Fields instance acts like a single-document inverted + * index (the docID will be 0). */ + public final Terms getTermVector(int docID, String field) + throws IOException { + Fields vectors = getTermVectors(docID); + if (vectors == null) { + return null; + } + return vectors.terms(field); } - /** - * Returns true if an index exists at the specified directory. - * If the directory does not exist or if there is no index in it. - * @param directory the directory to check for an index - * @return true if an index exists; false otherwise - */ - - public static boolean indexExists(File directory) { - return SegmentInfos.getCurrentSegmentGeneration(directory.list()) != -1; - } - - /** - * Returns true if an index exists at the specified directory. - * If the directory does not exist or if there is no index in it. - * @param directory the directory to check for an index - * @return true if an index exists; false otherwise - * @throws IOException if there is a problem with accessing the index - */ - public static boolean indexExists(Directory directory) throws IOException { - return SegmentInfos.getCurrentSegmentGeneration(directory) != -1; - } - /** Returns the number of documents in this index. */ public abstract int numDocs(); @@ -623,322 +423,69 @@ public abstract int maxDoc(); /** Returns the number of deleted documents. */ - public int numDeletedDocs() { + public final int numDeletedDocs() { return maxDoc() - numDocs(); } - /** Returns the stored fields of the nth - Document in this index. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public Document document(int n) throws CorruptIndexException, IOException { - ensureOpen(); - return document(n, null); - } - - /** - * Get the {@link org.apache.lucene.document.Document} at the nth position. The {@link org.apache.lucene.document.FieldSelector} - * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded. - * - * NOTE: If this Reader (more specifically, the underlying FieldsReader) is closed before the lazy {@link org.apache.lucene.document.Field} is - * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must - * explicitly load it or fetch the Document again with a new loader. - * - * - * @param n Get the document at the nth position - * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded. - * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * - * @see org.apache.lucene.document.Fieldable - * @see org.apache.lucene.document.FieldSelector - * @see org.apache.lucene.document.SetBasedFieldSelector - * @see org.apache.lucene.document.LoadFirstFieldSelector - */ - //When we convert to JDK 1.5 make this Set - public abstract Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException; + /** Expert: visits the fields of a stored document, for + * custom processing/loading of each field. If you + * simply want to load all fields, use {@link + * #document(int)}. If you want to load a subset, use + * {@link DocumentStoredFieldVisitor}. */ + public abstract void document(int docID, StoredFieldVisitor visitor) throws IOException; - - - /** Returns true if document n has been deleted */ - public abstract boolean isDeleted(int n); - - /** Returns true if any documents have been deleted */ - public abstract boolean hasDeletions(); - - /** Returns true if there are norms stored for this field. */ - public boolean hasNorms(String field) throws IOException { - // backward compatible implementation. - // SegmentReader has an efficient implementation. - ensureOpen(); - return norms(field) != null; - } - - /** Returns the byte-encoded normalization factor for the named field of - * every document. This is used by the search code to score documents. + /** + * Returns the stored fields of the nth + * Document in this index. This is just + * sugar for using {@link DocumentStoredFieldVisitor}. + *

    + * NOTE: for performance reasons, this method does not check if the + * requested document is deleted, and therefore asking for a deleted document + * may yield unspecified results. Usually this is not required, however you + * can test if the doc is deleted by checking the {@link + * Bits} returned from {@link MultiFields#getLiveDocs}. * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract byte[] norms(String field) throws IOException; - - /** Reads the byte-encoded normalization factor for the named field of every - * document. This is used by the search code to score documents. - * - * @see org.apache.lucene.document.Field#setBoost(float) - */ - public abstract void norms(String field, byte[] bytes, int offset) - throws IOException; - - /** Expert: Resets the normalization factor for the named field of the named - * document. The norm represents the product of the field's {@link - * org.apache.lucene.document.Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String, - * int) length normalization}. Thus, to preserve the length normalization - * values when resetting this, one should base the new value upon the old. - * - * @see #norms(String) - * @see Similarity#decodeNorm(byte) - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public synchronized void setNorm(int doc, String field, byte value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - acquireWriteLock(); - hasChanges = true; - doSetNorm(doc, field, value); - } - - /** Implements setNorm in subclass.*/ - protected abstract void doSetNorm(int doc, String field, byte value) - throws CorruptIndexException, IOException; - - /** Expert: Resets the normalization factor for the named field of the named - * document. - * - * @see #norms(String) - * @see Similarity#decodeNorm(byte) + * NOTE: only the content of a field is returned, + * if that field was stored during indexing. Metadata + * like boost, omitNorm, IndexOptions, tokenized, etc., + * are not preserved. * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) * @throws IOException if there is a low-level IO error */ - public void setNorm(int doc, String field, float value) - throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - setNorm(doc, field, Similarity.encodeNorm(value)); + // TODO: we need a separate StoredField, so that the + // Document returned here contains that class not + // IndexableField + public final Document document(int docID) throws IOException { + final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(); + document(docID, visitor); + return visitor.getDocument(); } - /** Returns an enumeration of all the terms in the index. The - * enumeration is ordered by Term.compareTo(). Each term is greater - * than all that precede it in the enumeration. Note that after - * calling terms(), {@link TermEnum#next()} must be called - * on the resulting enumeration before calling other methods such as - * {@link TermEnum#term()}. - * @throws IOException if there is a low-level IO error - */ - public abstract TermEnum terms() throws IOException; - - /** Returns an enumeration of all terms starting at a given term. If - * the given term does not exist, the enumeration is positioned at the - * first term greater than the supplied term. The enumeration is - * ordered by Term.compareTo(). Each term is greater than all that - * precede it in the enumeration. - * @throws IOException if there is a low-level IO error - */ - public abstract TermEnum terms(Term t) throws IOException; - - /** Returns the number of documents containing the term t. - * @throws IOException if there is a low-level IO error - */ - public abstract int docFreq(Term t) throws IOException; - - /** Returns an enumeration of all the documents which contain - * term. For each document, the document number, the frequency of - * the term in that document is also provided, for use in search scoring. - * Thus, this method implements the mapping: - *

      - * Term    =>    <docNum, freq>* - *
    - *

    The enumeration is ordered by document number. Each document number - * is greater than all that precede it in the enumeration. - * @throws IOException if there is a low-level IO error - */ - public TermDocs termDocs(Term term) throws IOException { - ensureOpen(); - TermDocs termDocs = termDocs(); - termDocs.seek(term); - return termDocs; - } - - /** Returns an unpositioned {@link TermDocs} enumerator. - * @throws IOException if there is a low-level IO error - */ - public abstract TermDocs termDocs() throws IOException; - - /** Returns an enumeration of all the documents which contain - * term. For each document, in addition to the document number - * and frequency of the term in that document, a list of all of the ordinal - * positions of the term in the document is available. Thus, this method - * implements the mapping: - * - *

      - * Term    =>    <docNum, freq, - * <pos1, pos2, ... - * posfreq-1> - * >* - *
    - *

    This positional information facilitates phrase and proximity searching. - *

    The enumeration is ordered by document number. Each document number is - * greater than all that precede it in the enumeration. - * @throws IOException if there is a low-level IO error - */ - public TermPositions termPositions(Term term) throws IOException { - ensureOpen(); - TermPositions termPositions = termPositions(); - termPositions.seek(term); - return termPositions; - } - - /** Returns an unpositioned {@link TermPositions} enumerator. - * @throws IOException if there is a low-level IO error - */ - public abstract TermPositions termPositions() throws IOException; - - - - /** Deletes the document numbered docNum. Once a document is - * deleted it will not appear in TermDocs or TermPostitions enumerations. - * Attempts to read its field with the {@link #document} - * method will result in an error. The presence of this document may still be - * reflected in the {@link #docFreq} statistic, though - * this will be corrected eventually as the index is further modified. - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public synchronized void deleteDocument(int docNum) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - acquireWriteLock(); - hasChanges = true; - doDelete(docNum); - } - - - /** Implements deletion of the document numbered docNum. - * Applications should call {@link #deleteDocument(int)} or {@link #deleteDocuments(Term)}. - */ - protected abstract void doDelete(int docNum) throws CorruptIndexException, IOException; - - - /** Deletes all documents that have a given term indexed. - * This is useful if one uses a document field to hold a unique ID string for - * the document. Then to delete such a document, one merely constructs a - * term with the appropriate field and the unique ID string as its text and - * passes it to this method. - * See {@link #deleteDocument(int)} for information about when this deletion will - * become effective. - * - * @return the number of documents deleted - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if there is a low-level IO error - */ - public int deleteDocuments(Term term) throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - TermDocs docs = termDocs(term); - if (docs == null) return 0; - int n = 0; - try { - while (docs.next()) { - deleteDocument(docs.doc()); - n++; - } - } finally { - docs.close(); - } - return n; - } - - /** Undeletes all documents currently marked as deleted in this index. - * - * @throws StaleReaderException if the index has changed - * since this reader was opened - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public synchronized void undeleteAll() throws StaleReaderException, CorruptIndexException, LockObtainFailedException, IOException { - ensureOpen(); - acquireWriteLock(); - hasChanges = true; - doUndeleteAll(); - } - - /** Implements actual undeleteAll() in subclass. */ - protected abstract void doUndeleteAll() throws CorruptIndexException, IOException; - - /** Does nothing by default. Subclasses that require a write lock for - * index modifications must implement this method. */ - protected synchronized void acquireWriteLock() throws IOException { - /* NOOP */ - } - /** - * - * @throws IOException + * Like {@link #document(int)} but only loads the specified + * fields. Note that this is simply sugar for {@link + * DocumentStoredFieldVisitor#DocumentStoredFieldVisitor(Set)}. */ - public final synchronized void flush() throws IOException { - ensureOpen(); - commit(); + public final Document document(int docID, Set fieldsToLoad) throws IOException { + final DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(fieldsToLoad); + document(docID, visitor); + return visitor.getDocument(); } - /** - * Commit changes resulting from delete, undeleteAll, or - * setNorm operations - * - * If an exception is hit, then either no changes or all - * changes will have been committed to the index - * (transactional semantics). - * @throws IOException if there is a low-level IO error - */ - protected final synchronized void commit() throws IOException { - if(hasChanges){ - doCommit(); - } - hasChanges = false; + /** Returns true if any documents have been deleted. Implementers should + * consider overriding this method if {@link #maxDoc()} or {@link #numDocs()} + * are not constant-time operations. */ + public boolean hasDeletions() { + return numDeletedDocs() > 0; } - /** Implements commit. */ - protected abstract void doCommit() throws IOException; - /** * Closes files associated with this index. * Also saves any new deletions to disk. * No other methods should be called after this has been called. * @throws IOException if there is a low-level IO error */ + @Override public final synchronized void close() throws IOException { if (!closed) { decRef(); @@ -949,158 +496,97 @@ /** Implements close. */ protected abstract void doClose() throws IOException; - /** - * Get a list of unique field names that exist in this index and have the specified - * field option information. - * @param fldOption specifies which field option should be available for the returned fields - * @return Collection of Strings indicating the names of the fields. - * @see IndexReader.FieldOption + * Expert: Returns the root {@link IndexReaderContext} for this + * {@link IndexReader}'s sub-reader tree. + *

    + * Iff this reader is composed of sub + * readers, i.e. this reader being a composite reader, this method returns a + * {@link CompositeReaderContext} holding the reader's direct children as well as a + * view of the reader tree's atomic leaf contexts. All sub- + * {@link IndexReaderContext} instances referenced from this readers top-level + * context are private to this reader and are not shared with another context + * tree. For example, IndexSearcher uses this API to drive searching by one + * atomic leaf reader at a time. If this reader is not composed of child + * readers, this method returns an {@link AtomicReaderContext}. + *

    + * Note: Any of the sub-{@link CompositeReaderContext} instances referenced + * from this top-level context do not support {@link CompositeReaderContext#leaves()}. + * Only the top-level context maintains the convenience leaf-view + * for performance reasons. */ - public abstract Collection getFieldNames(FieldOption fldOption); - + public abstract IndexReaderContext getContext(); + /** - * Returns true iff the index in the named directory is - * currently locked. - * @param directory the directory to check for a lock - * @throws IOException if there is a low-level IO error - * @deprecated Please use {@link IndexWriter#isLocked(Directory)} instead + * Returns the reader's leaves, or itself if this reader is atomic. + * This is a convenience method calling {@code this.getContext().leaves()}. + * @see IndexReaderContext#leaves() */ - public static boolean isLocked(Directory directory) throws IOException { - return - directory.makeLock(IndexWriter.WRITE_LOCK_NAME).isLocked(); + public final List leaves() { + return getContext().leaves(); } - /** - * Returns true iff the index in the named directory is - * currently locked. - * @param directory the directory to check for a lock - * @throws IOException if there is a low-level IO error - * @deprecated Please use {@link IndexWriter#isLocked(String)} instead - */ - public static boolean isLocked(String directory) throws IOException { - Directory dir = FSDirectory.getDirectory(directory); - boolean result = isLocked(dir); - dir.close(); - return result; + /** Expert: Returns a key for this IndexReader, so FieldCache/CachingWrapperFilter can find + * it again. + * This key must not have equals()/hashCode() methods, so "equals" means "identical". */ + public Object getCoreCacheKey() { + // Don't call ensureOpen since FC calls this (to evict) + // on close + return this; } + /** Expert: Returns a key for this IndexReader that also includes deletions, + * so FieldCache/CachingWrapperFilter can find it again. + * This key must not have equals()/hashCode() methods, so "equals" means "identical". */ + public Object getCombinedCoreAndDeletesKey() { + // Don't call ensureOpen since FC calls this (to evict) + // on close + return this; + } + + /** Returns the number of documents containing the + * term. This method returns 0 if the term or + * field does not exists. This method does not take into + * account deleted documents that have not yet been merged + * away. + * @see TermsEnum#docFreq() + */ + public abstract int docFreq(Term term) throws IOException; + /** - * Forcibly unlocks the index in the named directory. - *

    - * Caution: this should only be used by failure recovery code, - * when it is known that no other process nor thread is in fact - * currently accessing this index. - * @deprecated Please use {@link IndexWriter#unlock(Directory)} instead + * Returns the total number of occurrences of {@code term} across all + * documents (the sum of the freq() for each doc that has this term). This + * will be -1 if the codec doesn't support this measure. Note that, like other + * term measures, this measure does not take deleted documents into account. */ - public static void unlock(Directory directory) throws IOException { - directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); - } - + public abstract long totalTermFreq(Term term) throws IOException; + /** - * Expert: return the IndexCommit that this reader has - * opened. This method is only implemented by those - * readers that correspond to a Directory with its own - * segments_N file. - * - *

    WARNING: this API is new and experimental and - * may suddenly change.

    + * Returns the sum of {@link TermsEnum#docFreq()} for all terms in this field, + * or -1 if this measure isn't stored by the codec. Note that, just like other + * term measures, this measure does not take deleted documents into account. + * + * @see Terms#getSumDocFreq() */ - public IndexCommit getIndexCommit() throws IOException { - throw new UnsupportedOperationException("This reader does not support this method."); - } + public abstract long getSumDocFreq(String field) throws IOException; /** - * Prints the filename and size of each file within a given compound file. - * Add the -extract flag to extract files to the current working directory. - * In order to make the extracted version of the index work, you have to copy - * the segments file from the compound index into the directory where the extracted files are stored. - * @param args Usage: org.apache.lucene.index.IndexReader [-extract] <cfsfile> + * Returns the number of documents that have at least one term for this field, + * or -1 if this measure isn't stored by the codec. Note that, just like other + * term measures, this measure does not take deleted documents into account. + * + * @see Terms#getDocCount() */ - public static void main(String [] args) { - String filename = null; - boolean extract = false; + public abstract int getDocCount(String field) throws IOException; - for (int i = 0; i < args.length; ++i) { - if (args[i].equals("-extract")) { - extract = true; - } else if (filename == null) { - filename = args[i]; - } - } + /** + * Returns the sum of {@link TermsEnum#totalTermFreq} for all terms in this + * field, or -1 if this measure isn't stored by the codec (or if this fields + * omits term freq and positions). Note that, just like other term measures, + * this measure does not take deleted documents into account. + * + * @see Terms#getSumTotalTermFreq() + */ + public abstract long getSumTotalTermFreq(String field) throws IOException; - if (filename == null) { - System.out.println("Usage: org.apache.lucene.index.IndexReader [-extract] "); - return; - } - - Directory dir = null; - CompoundFileReader cfr = null; - - try { - File file = new File(filename); - String dirname = file.getAbsoluteFile().getParent(); - filename = file.getName(); - dir = FSDirectory.getDirectory(dirname); - cfr = new CompoundFileReader(dir, filename); - - String [] files = cfr.list(); - Arrays.sort(files); // sort the array of filename so that the output is more readable - - for (int i = 0; i < files.length; ++i) { - long len = cfr.fileLength(files[i]); - - if (extract) { - System.out.println("extract " + files[i] + " with " + len + " bytes to local directory..."); - IndexInput ii = cfr.openInput(files[i]); - - FileOutputStream f = new FileOutputStream(files[i]); - - // read and write with a small buffer, which is more effectiv than reading byte by byte - byte[] buffer = new byte[1024]; - int chunk = buffer.length; - while(len > 0) { - final int bufLen = (int) Math.min(chunk, len); - ii.readBytes(buffer, 0, bufLen); - f.write(buffer, 0, bufLen); - len -= bufLen; - } - - f.close(); - ii.close(); - } - else - System.out.println(files[i] + ": " + len + " bytes"); - } - } catch (IOException ioe) { - ioe.printStackTrace(); - } - finally { - try { - if (dir != null) - dir.close(); - if (cfr != null) - cfr.close(); - } - catch (IOException ioe) { - ioe.printStackTrace(); - } - } - } - - /** Returns all commit points that exist in the Directory. - * Normally, because the default is {@link - * KeepOnlyLastCommitDeletionPolicy}, there would be only - * one commit point. But if you're using a custom {@link - * IndexDeletionPolicy} then there could be many commits. - * Once you have a given commit, you can open a reader on - * it by calling {@link IndexReader#open(IndexCommit)} - * There must be at least one commit in - * the Directory, else this method throws {@link - * java.io.IOException}. Note that if a commit is in - * progress while this method is running, that commit - * may or may not be returned array. */ - public static Collection listCommits(Directory dir) throws IOException { - return DirectoryIndexReader.listCommits(dir); - } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexReaderContext.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexUpgrader.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/IndexWriter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/IndexWriter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/IndexWriter.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/IndexWriter.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,68 +17,94 @@ * limitations under the License. */ +import java.io.Closeable; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.NoSuchFileException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedList; +import java.util.List; +import java.util.Locale; +import java.util.Map.Entry; +import java.util.Map; +import java.util.Queue; +import java.util.Set; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.concurrent.atomic.AtomicLong; + import org.apache.lucene.analysis.Analyzer; -import org.apache.lucene.document.Document; -import org.apache.lucene.search.Similarity; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; +import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; +import org.apache.lucene.document.Field; +import org.apache.lucene.index.DocValuesUpdate.BinaryDocValuesUpdate; +import org.apache.lucene.index.DocValuesUpdate.NumericDocValuesUpdate; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.index.FieldInfos.FieldNumbers; +import org.apache.lucene.index.IndexWriterConfig.OpenMode; +import org.apache.lucene.index.MergeState.CheckAbort; import org.apache.lucene.search.Query; +import org.apache.lucene.store.AlreadyClosedException; +import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.FSDirectory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.Lock; import org.apache.lucene.store.LockObtainFailedException; -import org.apache.lucene.store.AlreadyClosedException; -import org.apache.lucene.util.BitVector; +import org.apache.lucene.store.MergeInfo; +import org.apache.lucene.store.TrackingDirectoryWrapper; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; import org.apache.lucene.util.Constants; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InfoStream; +import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.util.Version; -import java.io.File; -import java.io.IOException; -import java.io.PrintStream; -import java.util.List; -import java.util.Collection; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.Set; -import java.util.HashSet; -import java.util.LinkedList; -import java.util.Iterator; - /** An IndexWriter creates and maintains an index. -

    The create argument to the - constructor - determines whether a new index is created, or whether an existing index is - opened. Note that you - can open an index with create=true even while readers are - using the index. The old readers will continue to search - the "point in time" snapshot they had opened, and won't - see the newly created index until they re-open. There are - also constructors - with no create argument which - will create a new index if there is not already an index at the - provided path and otherwise open the existing index.

    +

    The {@link OpenMode} option on + {@link IndexWriterConfig#setOpenMode(OpenMode)} determines + whether a new index is created, or whether an existing index is + opened. Note that you can open an index with {@link OpenMode#CREATE} + even while readers are using the index. The old readers will + continue to search the "point in time" snapshot they had opened, + and won't see the newly created index until they re-open. If + {@link OpenMode#CREATE_OR_APPEND} is used IndexWriter will create a + new index if there is not already an index at the provided path + and otherwise open the existing index.

    -

    In either case, documents are added with addDocument - and removed with deleteDocuments(Term) - or deleteDocuments(Query). - A document can be updated with updateDocument - (which just deletes and then adds the entire document). - When finished adding, deleting and updating documents, close should be called.

    +

    In either case, documents are added with {@link #addDocument(Iterable) + addDocument} and removed with {@link #deleteDocuments(Term...)} or {@link + #deleteDocuments(Query...)}. A document can be updated with {@link + #updateDocument(Term, Iterable) updateDocument} (which just deletes + and then adds the entire document). When finished adding, deleting + and updating documents, {@link #close() close} should be called.

    These changes are buffered in memory and periodically flushed to the {@link Directory} (during the above method - calls). A flush is triggered when there are enough - buffered deletes (see {@link #setMaxBufferedDeleteTerms}) - or enough added documents since the last flush, whichever - is sooner. For the added documents, flushing is triggered - either by RAM usage of the documents (see {@link - #setRAMBufferSizeMB}) or the number of added documents. - The default is to flush when RAM usage hits 16 MB. For + calls). A flush is triggered when there are enough added documents + since the last flush. Flushing is triggered either by RAM usage of the + documents (see {@link IndexWriterConfig#setRAMBufferSizeMB}) or the + number of added documents (see {@link IndexWriterConfig#setMaxBufferedDocs(int)}). + The default is to flush when RAM usage hits + {@link IndexWriterConfig#DEFAULT_RAM_BUFFER_SIZE_MB} MB. For best indexing speed you should flush by RAM usage with a - large RAM buffer. Note that flushing just moves the + large RAM buffer. Additionally, if IndexWriter reaches the configured number of + buffered deletes (see {@link IndexWriterConfig#setMaxBufferedDeleteTerms}) + the deleted terms and queries are flushed and applied to existing segments. + In contrast to the other flush options {@link IndexWriterConfig#setRAMBufferSizeMB} and + {@link IndexWriterConfig#setMaxBufferedDocs(int)}, deleted terms + won't trigger a segment flush. Note that flushing just moves the internal buffered state in IndexWriter into the index, but these changes are not visible to IndexReader until either {@link #commit()} or {@link #close} is called. A flush may @@ -87,63 +113,6 @@ addDocument calls (see below for changing the {@link MergeScheduler}).

    - -

    The optional autoCommit argument to the constructors - controls visibility of the changes to {@link IndexReader} - instances reading the same index. When this is - false, changes are not visible until {@link - #close()} or {@link #commit()} is called. Note that changes will still be - flushed to the {@link org.apache.lucene.store.Directory} - as new files, but are not committed (no new - segments_N file is written referencing the - new files, nor are the files sync'd to stable storage) - until {@link #close()} or {@link #commit()} is called. If something - goes terribly wrong (for example the JVM crashes), then - the index will reflect none of the changes made since the - last commit, or the starting state if commit was not called. - You can also call {@link #rollback}, which closes the writer - without committing any changes, and removes any index - files that had been flushed but are now unreferenced. - This mode is useful for preventing readers from refreshing - at a bad time (for example after you've done all your - deletes but before you've done your adds). It can also be - used to implement simple single-writer transactional - semantics ("all or none"). You can do a two-phase commit - by calling {@link #prepareCommit()} - followed by {@link #commit()}. This is necessary when - Lucene is working with an external resource (for example, - a database) and both must either commit or rollback the - transaction.

    - -

    When autoCommit is true then - the writer will periodically commit on its own. [Deprecated: Note that in 3.0, IndexWriter will - no longer accept autoCommit=true (it will be hardwired to - false). You can always call {@link #commit()} yourself - when needed]. There is - no guarantee when exactly an auto commit will occur (it - used to be after every flush, but it is now after every - completed merge, as of 2.4). If you want to force a - commit, call {@link #commit()}, or, close the writer. Once - a commit has finished, newly opened {@link IndexReader} instances will - see the changes to the index as of that commit. When - running in this mode, be careful not to refresh your - readers while optimize or segment merges are taking place - as this can tie up substantial disk space.

    - -

    Regardless of autoCommit, an {@link - IndexReader} or {@link org.apache.lucene.search.IndexSearcher} will only see the - index as of the "point in time" that it was opened. Any - changes committed to the index after the reader was opened - are not visible until the reader is re-opened.

    - -

    If an index will not have more documents added for a while and optimal search - performance is desired, then either the full optimize - method or partial {@link #optimize(int)} method should be - called before the index is closed.

    -

    Opening an IndexWriter creates a lock file for the directory in use. Trying to open another IndexWriter on the same directory will lead to a {@link LockObtainFailedException}. The {@link LockObtainFailedException} @@ -172,890 +141,637 @@ The {@link MergePolicy} is invoked whenever there are changes to the segments in the index. Its role is to select which merges to do, if any, and return a {@link - MergePolicy.MergeSpecification} describing the merges. It - also selects merges to do for optimize(). (The default is - {@link LogByteSizeMergePolicy}. Then, the {@link + MergePolicy.MergeSpecification} describing the merges. + The default is {@link LogByteSizeMergePolicy}. Then, the {@link MergeScheduler} is invoked with the requested merges and it decides when and how to run the merges. The default is {@link ConcurrentMergeScheduler}.

    + +

    NOTE: if you hit an + OutOfMemoryError, or disaster strikes during a checkpoint + then IndexWriter will close itself. This is a + defensive measure in case any internal state (buffered + documents, deletions, reference counts) were corrupted. + Any subsequent calls will throw an AlreadyClosedException.

    + +

    NOTE: {@link + IndexWriter} instances are completely thread + safe, meaning multiple threads can call any of its + methods, concurrently. If your application requires + external synchronization, you should not + synchronize on the IndexWriter instance as + this may cause deadlock; use your own (non-Lucene) objects + instead.

    + +

    NOTE: If you call + Thread.interrupt() on a thread that's within + IndexWriter, IndexWriter will try to catch this (eg, if + it's in a wait() or Thread.sleep()), and will then throw + the unchecked exception {@link ThreadInterruptedException} + and clear the interrupt status on the thread.

    */ /* * Clarification: Check Points (and commits) - * Being able to set autoCommit=false allows IndexWriter to flush and - * write new index files to the directory without writing a new segments_N - * file which references these new files. It also means that the state of + * IndexWriter writes new index files to the directory without writing a new segments_N + * file which references these new files. It also means that the state of * the in memory SegmentInfos object is different than the most recent * segments_N file written to the directory. - * - * Each time the SegmentInfos is changed, and matches the (possibly - * modified) directory files, we have a new "check point". - * If the modified/new SegmentInfos is written to disk - as a new - * (generation of) segments_N file - this check point is also an + * + * Each time the SegmentInfos is changed, and matches the (possibly + * modified) directory files, we have a new "check point". + * If the modified/new SegmentInfos is written to disk - as a new + * (generation of) segments_N file - this check point is also an * IndexCommit. - * - * With autoCommit=true, every checkPoint is also a CommitPoint. - * With autoCommit=false, some checkPoints may not be commits. - * - * A new checkpoint always replaces the previous checkpoint and - * becomes the new "front" of the index. This allows the IndexFileDeleter + * + * A new checkpoint always replaces the previous checkpoint and + * becomes the new "front" of the index. This allows the IndexFileDeleter * to delete files that are referenced only by stale checkpoints. * (files that were created since the last commit, but are no longer - * referenced by the "front" of the index). For this, IndexFileDeleter + * referenced by the "front" of the index). For this, IndexFileDeleter * keeps track of the last non commit checkpoint. */ -public class IndexWriter { +public class IndexWriter implements Closeable, TwoPhaseCommit, Accountable { - /** - * Default value for the write lock timeout (1,000). - * @see #setDefaultWriteLockTimeout - */ - public static long WRITE_LOCK_TIMEOUT = 1000; + /** Hard limit on maximum number of documents that may be added to the + * index. If you try to add more than this you'll hit {@code IllegalStateException}. */ + // We defensively subtract 128 to be well below the lowest + // ArrayUtil.MAX_ARRAY_LENGTH on "typical" JVMs. We don't just use + // ArrayUtil.MAX_ARRAY_LENGTH here because this can vary across JVMs: + public static final int MAX_DOCS = Integer.MAX_VALUE - 128; - private long writeLockTimeout = WRITE_LOCK_TIMEOUT; + // Use package-private instance var to enforce the limit so testing + // can use less electricity: + private static int actualMaxDocs = MAX_DOCS; + /** Used only for testing. */ + static void setMaxDocs(int maxDocs) { + if (maxDocs > MAX_DOCS) { + // Cannot go higher than the hard max: + throw new IllegalArgumentException("maxDocs must be <= IndexWriter.MAX_DOCS=" + MAX_DOCS + "; got: " + maxDocs); + } + IndexWriter.actualMaxDocs = maxDocs; + } + + static int getActualMaxDocs() { + return IndexWriter.actualMaxDocs; + } + + private static final int UNBOUNDED_MAX_MERGE_SEGMENTS = -1; + /** * Name of the write lock in the index. */ public static final String WRITE_LOCK_NAME = "write.lock"; - /** - * @deprecated - * @see LogMergePolicy#DEFAULT_MERGE_FACTOR - */ - public final static int DEFAULT_MERGE_FACTOR = LogMergePolicy.DEFAULT_MERGE_FACTOR; + /** Key for the source of a segment in the {@link SegmentInfo#getDiagnostics() diagnostics}. */ + public static final String SOURCE = "source"; + /** Source of a segment which results from a merge of other segments. */ + public static final String SOURCE_MERGE = "merge"; + /** Source of a segment which results from a flush. */ + public static final String SOURCE_FLUSH = "flush"; + /** Source of a segment which results from a call to {@link #addIndexes(IndexReader...)}. */ + public static final String SOURCE_ADDINDEXES_READERS = "addIndexes(IndexReader...)"; /** - * Value to denote a flush trigger is disabled + * Absolute hard maximum length for a term, in bytes once + * encoded as UTF8. If a term arrives from the analyzer + * longer than this length, an + * IllegalArgumentException is thrown + * and a message is printed to infoStream, if set (see {@link + * IndexWriterConfig#setInfoStream(InfoStream)}). */ - public final static int DISABLE_AUTO_FLUSH = -1; + public final static int MAX_TERM_LENGTH = DocumentsWriterPerThread.MAX_TERM_LENGTH_UTF8; + // when unrecoverable disaster strikes, we populate this with the reason that we had to close IndexWriter + volatile Throwable tragedy; - /** - * Disabled by default (because IndexWriter flushes by RAM usage - * by default). Change using {@link #setMaxBufferedDocs(int)}. - */ - public final static int DEFAULT_MAX_BUFFERED_DOCS = DISABLE_AUTO_FLUSH; + private final Directory directory; // where this index resides + private final Analyzer analyzer; // how to analyze text - /** - * Default value is 16 MB (which means flush when buffered - * docs consume 16 MB RAM). Change using {@link #setRAMBufferSizeMB}. - */ - public final static double DEFAULT_RAM_BUFFER_SIZE_MB = 16.0; - - /** - * Disabled by default (because IndexWriter flushes by RAM usage - * by default). Change using {@link #setMaxBufferedDeleteTerms(int)}. - */ - public final static int DEFAULT_MAX_BUFFERED_DELETE_TERMS = DISABLE_AUTO_FLUSH; - - /** - * @deprecated - * @see LogDocMergePolicy#DEFAULT_MAX_MERGE_DOCS - */ - public final static int DEFAULT_MAX_MERGE_DOCS = LogDocMergePolicy.DEFAULT_MAX_MERGE_DOCS; - - /** - * Default value is 10,000. Change using {@link #setMaxFieldLength(int)}. - */ - public final static int DEFAULT_MAX_FIELD_LENGTH = 10000; - - /** - * Default value is 128. Change using {@link #setTermIndexInterval(int)}. - */ - public final static int DEFAULT_TERM_INDEX_INTERVAL = 128; - - /** - * Absolute hard maximum length for a term. If a term - * arrives from the analyzer longer than this length, it - * is skipped and a message is printed to infoStream, if - * set (see {@link #setInfoStream}). - */ - public final static int MAX_TERM_LENGTH = DocumentsWriter.MAX_TERM_LENGTH; - - /** - * Default for {@link #getMaxSyncPauseSeconds}. On - * Windows this defaults to 10.0 seconds; elsewhere it's - * 0. - */ - public final static double DEFAULT_MAX_SYNC_PAUSE_SECONDS; - static { - if (Constants.WINDOWS) - DEFAULT_MAX_SYNC_PAUSE_SECONDS = 10.0; - else - DEFAULT_MAX_SYNC_PAUSE_SECONDS = 0.0; - } - - // The normal read buffer size defaults to 1024, but - // increasing this during merging seems to yield - // performance gains. However we don't want to increase - // it too much because there are quite a few - // BufferedIndexInputs created during merging. See - // LUCENE-888 for details. - private final static int MERGE_READ_BUFFER_SIZE = 4096; - - // Used for printing messages - private static Object MESSAGE_ID_LOCK = new Object(); - private static int MESSAGE_ID = 0; - private int messageID = -1; - volatile private boolean hitOOM; - - private Directory directory; // where this index resides - private Analyzer analyzer; // how to analyze text - - private Similarity similarity = Similarity.getDefault(); // how to normalize - private volatile long changeCount; // increments every time a change is completed - private long lastCommitChangeCount; // last changeCount that was committed + private volatile long lastCommitChangeCount; // last changeCount that was committed - private SegmentInfos rollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails - private HashMap rollbackSegments; + private List rollbackSegments; // list of segmentInfo we will fallback to if the commit fails volatile SegmentInfos pendingCommit; // set when a commit is pending (after prepareCommit() & before commit()) volatile long pendingCommitChangeCount; - private SegmentInfos localRollbackSegmentInfos; // segmentInfos we will fallback to if the commit fails - private boolean localAutoCommit; // saved autoCommit during local transaction - private int localFlushedDocCount; // saved docWriter.getFlushedDocCount during local transaction - private boolean autoCommit = true; // false if we should commit only on close + private Collection filesToCommit; - private SegmentInfos segmentInfos = new SegmentInfos(); // the segments + final SegmentInfos segmentInfos; // the segments + final FieldNumbers globalFieldNumberMap; - private DocumentsWriter docWriter; - private IndexFileDeleter deleter; + private final DocumentsWriter docWriter; + private final Queue eventQueue; + final IndexFileDeleter deleter; - private Set segmentsToOptimize = new HashSet(); // used by optimize to note those needing optimization + // used by forceMerge to note those needing merging + private Map segmentsToMerge = new HashMap<>(); + private int mergeMaxNumSegments; private Lock writeLock; - private int termIndexInterval = DEFAULT_TERM_INDEX_INTERVAL; + private volatile boolean closed; + private volatile boolean closing; - private boolean closeDir; - private boolean closed; - private boolean closing; - // Holds all SegmentInfo instances currently involved in // merges - private HashSet mergingSegments = new HashSet(); + private HashSet mergingSegments = new HashSet<>(); - private MergePolicy mergePolicy = new LogByteSizeMergePolicy(); - private MergeScheduler mergeScheduler = new ConcurrentMergeScheduler(); - private LinkedList pendingMerges = new LinkedList(); - private Set runningMerges = new HashSet(); - private List mergeExceptions = new ArrayList(); + private final MergeScheduler mergeScheduler; + private LinkedList pendingMerges = new LinkedList<>(); + private Set runningMerges = new HashSet<>(); + private List mergeExceptions = new ArrayList<>(); private long mergeGen; private boolean stopMerges; + private boolean didMessageState; - private int flushCount; - private int flushDeletesCount; - private double maxSyncPauseSeconds = DEFAULT_MAX_SYNC_PAUSE_SECONDS; + final AtomicInteger flushCount = new AtomicInteger(); + final AtomicInteger flushDeletesCount = new AtomicInteger(); - // Used to only allow one addIndexes to proceed at once - // TODO: use ReadWriteLock once we are on 5.0 - private int readCount; // count of how many threads are holding read lock - private Thread writeThread; // non-null if any thread holds write lock - - synchronized void acquireWrite() { - while(writeThread != null || readCount > 0) - doWait(); + final ReaderPool readerPool = new ReaderPool(); + final BufferedUpdatesStream bufferedUpdatesStream; - // We could have been closed while we were waiting: - ensureOpen(); + // This is a "write once" variable (like the organic dye + // on a DVD-R that may or may not be heated by a laser and + // then cooled to permanently record the event): it's + // false, until getReader() is called for the first time, + // at which point it's switched to true and never changes + // back to false. Once this is true, we hold open and + // reuse SegmentReader instances internally for applying + // deletes, doing merges, and reopening near real-time + // readers. + private volatile boolean poolReaders; - writeThread = Thread.currentThread(); - } + // The instance that was passed to the constructor. It is saved only in order + // to allow users to query an IndexWriter settings. + private final LiveIndexWriterConfig config; - synchronized void releaseWrite() { - assert Thread.currentThread() == writeThread; - writeThread = null; - notifyAll(); - } + /** System.nanoTime() when commit started; used to write + * an infoStream message about how long commit took. */ + private long startCommitTime; - synchronized void acquireRead() { - final Thread current = Thread.currentThread(); - while(writeThread != null && writeThread != current) - doWait(); + /** How many documents are in the index, or are in the process of being + * added (reserved). E.g., operations like addIndexes will first reserve + * the right to add N docs, before they actually change the index, + * much like how hotels place an "authorization hold" on your credit + * card to make sure they can later charge you when you check out. */ + final AtomicLong pendingNumDocs = new AtomicLong(); - readCount++; + DirectoryReader getReader() throws IOException { + return getReader(true); } - synchronized void releaseRead() { - readCount--; - assert readCount >= 0; - if (0 == readCount) - notifyAll(); - } - /** - * Used internally to throw an {@link - * AlreadyClosedException} if this IndexWriter has been - * closed. - * @throws AlreadyClosedException if this IndexWriter is + * Expert: returns a readonly reader, covering all + * committed as well as un-committed changes to the index. + * This provides "near real-time" searching, in that + * changes made during an IndexWriter session can be + * quickly made available for searching without closing + * the writer nor calling {@link #commit}. + * + *

    Note that this is functionally equivalent to calling + * {#flush} and then opening a new reader. But the turnaround time of this + * method should be faster since it avoids the potentially + * costly {@link #commit}.

    + * + *

    You must close the {@link IndexReader} returned by + * this method once you are done using it.

    + * + *

    It's near real-time because there is no hard + * guarantee on how quickly you can get a new reader after + * making changes with IndexWriter. You'll have to + * experiment in your situation to determine if it's + * fast enough. As this is a new and experimental + * feature, please report back on your findings so we can + * learn, improve and iterate.

    + * + *

    The resulting reader supports {@link + * DirectoryReader#openIfChanged}, but that call will simply forward + * back to this method (though this may change in the + * future).

    + * + *

    The very first time this method is called, this + * writer instance will make every effort to pool the + * readers that it opens for doing merges, applying + * deletes, etc. This means additional resources (RAM, + * file descriptors, CPU time) will be consumed.

    + * + *

    For lower latency on reopening a reader, you should + * call {@link IndexWriterConfig#setMergedSegmentWarmer} to + * pre-warm a newly merged segment before it's committed + * to the index. This is important for minimizing + * index-to-search delay after a large merge.

    + * + *

    If an addIndexes* call is running in another thread, + * then this reader will only search those segments from + * the foreign index that have been successfully copied + * over, so far

    . + * + *

    NOTE: Once the writer is closed, any + * outstanding readers may continue to be used. However, + * if you attempt to reopen any of those readers, you'll + * hit an {@link AlreadyClosedException}.

    + * + * @lucene.experimental + * + * @return IndexReader that covers entire index plus all + * changes made so far by this IndexWriter instance + * + * @throws IOException If there is a low-level I/O error */ - protected synchronized final void ensureOpen(boolean includePendingClose) throws AlreadyClosedException { - if (closed || (includePendingClose && closing)) { - throw new AlreadyClosedException("this IndexWriter is closed"); + DirectoryReader getReader(boolean applyAllDeletes) throws IOException { + ensureOpen(); + + final long tStart = System.currentTimeMillis(); + + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "flush at getReader"); } + // Do this up front before flushing so that the readers + // obtained during this flush are pooled, the first time + // this method is called: + poolReaders = true; + DirectoryReader r = null; + doBeforeFlush(); + boolean anySegmentFlushed = false; + /* + * for releasing a NRT reader we must ensure that + * DW doesn't add any segments or deletes until we are + * done with creating the NRT DirectoryReader. + * We release the two stage full flush after we are done opening the + * directory reader! + */ + boolean success2 = false; + try { + synchronized (fullFlushLock) { + boolean success = false; + try { + anySegmentFlushed = docWriter.flushAllThreads(this); + if (!anySegmentFlushed) { + // prevent double increment since docWriter#doFlush increments the flushcount + // if we flushed anything. + flushCount.incrementAndGet(); + } + success = true; + // Prevent segmentInfos from changing while opening the + // reader; in theory we could instead do similar retry logic, + // just like we do when loading segments_N + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + r = StandardDirectoryReader.open(this, segmentInfos, applyAllDeletes); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "return reader version=" + r.getVersion() + " reader=" + r); + } + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "getReader"); + // never reached but javac disagrees: + return null; + } finally { + if (!success) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception during NRT reader"); + } + } + if (tragedy == null) { + // Done: finish the full flush! (unless we hit OOM or something) + docWriter.finishFullFlush(success); + processEvents(false, true); + doAfterFlush(); + } + } + } + if (anySegmentFlushed) { + maybeMerge(config.getMergePolicy(), MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); + } + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "getReader took " + (System.currentTimeMillis() - tStart) + " msec"); + } + success2 = true; + } finally { + if (!success2) { + IOUtils.closeWhileHandlingException(r); + } + } + return r; } - protected synchronized final void ensureOpen() throws AlreadyClosedException { - ensureOpen(true); + @Override + public final long ramBytesUsed() { + ensureOpen(); + return docWriter.ramBytesUsed(); } - /** - * Prints a message to the infoStream (if non-null), - * prefixed with the identifying information for this - * writer and the thread that's calling it. - */ - public void message(String message) { - if (infoStream != null) - infoStream.println("IW " + messageID + " [" + Thread.currentThread().getName() + "]: " + message); - } + /** Holds shared SegmentReader instances. IndexWriter uses + * SegmentReaders for 1) applying deletes, 2) doing + * merges, 3) handing out a real-time reader. This pool + * reuses instances of the SegmentReaders in all these + * places if it is in "near real-time mode" (getReader() + * has been called on this instance). */ - private synchronized void setMessageID(PrintStream infoStream) { - if (infoStream != null && messageID == -1) { - synchronized(MESSAGE_ID_LOCK) { - messageID = MESSAGE_ID++; + class ReaderPool implements Closeable { + + private final Map readerMap = new HashMap<>(); + + // used only by asserts + public synchronized boolean infoIsLive(SegmentCommitInfo info) { + int idx = segmentInfos.indexOf(info); + assert idx != -1: "info=" + info + " isn't live"; + assert segmentInfos.info(idx) == info: "info=" + info + " doesn't match live info in segmentInfos"; + return true; + } + + public synchronized void drop(SegmentCommitInfo info) throws IOException { + final ReadersAndUpdates rld = readerMap.get(info); + if (rld != null) { + assert info == rld.info; +// System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.drop: " + info); + readerMap.remove(info); + rld.dropReaders(); } } - this.infoStream = infoStream; - } - /** - * Casts current mergePolicy to LogMergePolicy, and throws - * an exception if the mergePolicy is not a LogMergePolicy. - */ - private LogMergePolicy getLogMergePolicy() { - if (mergePolicy instanceof LogMergePolicy) - return (LogMergePolicy) mergePolicy; - else - throw new IllegalArgumentException("this method can only be called when the merge policy is the default LogMergePolicy"); - } + public synchronized boolean anyPendingDeletes() { + for(ReadersAndUpdates rld : readerMap.values()) { + if (rld.getPendingDeleteCount() != 0) { + return true; + } + } - /**

    Get the current setting of whether newly flushed - * segments will use the compound file format. Note that - * this just returns the value previously set with - * setUseCompoundFile(boolean), or the default value - * (true). You cannot use this to query the status of - * previously flushed segments.

    - * - *

    Note that this method is a convenience method: it - * just calls mergePolicy.getUseCompoundFile as long as - * mergePolicy is an instance of {@link LogMergePolicy}. - * Otherwise an IllegalArgumentException is thrown.

    - * - * @see #setUseCompoundFile(boolean) - */ - public boolean getUseCompoundFile() { - return getLogMergePolicy().getUseCompoundFile(); - } + return false; + } - /**

    Setting to turn on usage of a compound file. When on, - * multiple files for each segment are merged into a - * single file when a new segment is flushed.

    - * - *

    Note that this method is a convenience method: it - * just calls mergePolicy.setUseCompoundFile as long as - * mergePolicy is an instance of {@link LogMergePolicy}. - * Otherwise an IllegalArgumentException is thrown.

    - */ - public void setUseCompoundFile(boolean value) { - getLogMergePolicy().setUseCompoundFile(value); - getLogMergePolicy().setUseCompoundDocStore(value); - } + public synchronized void release(ReadersAndUpdates rld) throws IOException { + release(rld, true); + } - /** Expert: Set the Similarity implementation used by this IndexWriter. - * - * @see Similarity#setDefault(Similarity) - */ - public void setSimilarity(Similarity similarity) { - ensureOpen(); - this.similarity = similarity; - docWriter.setSimilarity(similarity); - } + public synchronized void release(ReadersAndUpdates rld, boolean assertInfoLive) throws IOException { - /** Expert: Return the Similarity implementation used by this IndexWriter. - * - *

    This defaults to the current value of {@link Similarity#getDefault()}. - */ - public Similarity getSimilarity() { - ensureOpen(); - return this.similarity; - } + // Matches incRef in get: + rld.decRef(); - /** Expert: Set the interval between indexed terms. Large values cause less - * memory to be used by IndexReader, but slow random-access to terms. Small - * values cause more memory to be used by an IndexReader, and speed - * random-access to terms. - * - * This parameter determines the amount of computation required per query - * term, regardless of the number of documents that contain that term. In - * particular, it is the maximum number of other terms that must be - * scanned before a term is located and its frequency and position information - * may be processed. In a large index with user-entered query terms, query - * processing time is likely to be dominated not by term lookup but rather - * by the processing of frequency and positional data. In a small index - * or when many uncommon query terms are generated (e.g., by wildcard - * queries) term lookup may become a dominant cost. - * - * In particular, numUniqueTerms/interval terms are read into - * memory by an IndexReader, and, on average, interval/2 terms - * must be scanned for each random term access. - * - * @see #DEFAULT_TERM_INDEX_INTERVAL - */ - public void setTermIndexInterval(int interval) { - ensureOpen(); - this.termIndexInterval = interval; - } + // Pool still holds a ref: + assert rld.refCount() >= 1; - /** Expert: Return the interval between indexed terms. - * - * @see #setTermIndexInterval(int) - */ - public int getTermIndexInterval() { - // We pass false because this method is called by SegmentMerger while we are in the process of closing - ensureOpen(false); - return termIndexInterval; - } + if (!poolReaders && rld.refCount() == 1) { + // This is the last ref to this RLD, and we're not + // pooling, so remove it: +// System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: " + rld.info); + if (rld.writeLiveDocs(directory)) { + // Make sure we only write del docs for a live segment: + assert assertInfoLive == false || infoIsLive(rld.info); + // Must checkpoint because we just + // created new _X_N.del and field updates files; + // don't call IW.checkpoint because that also + // increments SIS.version, which we do not want to + // do here: it was done previously (after we + // invoked BDS.applyDeletes), whereas here all we + // did was move the state to disk: + checkpointNoSIS(); + } + //System.out.println("IW: done writeLiveDocs for info=" + rld.info); - /** - * Constructs an IndexWriter for the index in path. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * path, replacing the index already there, - * if any. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified - * via the MaxFieldLength constructor. - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - */ - public IndexWriter(String path, Analyzer a, boolean create, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, create, true, null, false, mfl.getLimit()); - } +// System.out.println("[" + Thread.currentThread().getName() + "] ReaderPool.release: drop readers " + rld.info); + rld.dropReaders(); + readerMap.remove(rld.info); + } + } + + @Override + public void close() throws IOException { + dropAll(false); + } - /** - * Constructs an IndexWriter for the index in path. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * path, replacing the index already there, if any. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(String,Analyzer,boolean,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. - */ - public IndexWriter(String path, Analyzer a, boolean create) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH); - } + /** Remove all our references to readers, and commits + * any pending changes. */ + synchronized void dropAll(boolean doSave) throws IOException { + Throwable priorE = null; + final Iterator> it = readerMap.entrySet().iterator(); + while(it.hasNext()) { + final ReadersAndUpdates rld = it.next().getValue(); - /** - * Constructs an IndexWriter for the index in path. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * path, replacing the index already there, if any. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified - * via the MaxFieldLength constructor. - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - */ - public IndexWriter(File path, Analyzer a, boolean create, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, create, true, null, false, mfl.getLimit()); - } + try { + if (doSave && rld.writeLiveDocs(directory)) { + // Make sure we only write del docs and field updates for a live segment: + assert infoIsLive(rld.info); + // Must checkpoint because we just + // created new _X_N.del and field updates files; + // don't call IW.checkpoint because that also + // increments SIS.version, which we do not want to + // do here: it was done previously (after we + // invoked BDS.applyDeletes), whereas here all we + // did was move the state to disk: + checkpointNoSIS(); + } + } catch (Throwable t) { + if (doSave) { + IOUtils.reThrow(t); + } else if (priorE == null) { + priorE = t; + } + } - /** - * Constructs an IndexWriter for the index in path. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * path, replacing the index already there, if any. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(File,Analyzer,boolean,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. - */ - public IndexWriter(File path, Analyzer a, boolean create) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, create, true, null, true, DEFAULT_MAX_FIELD_LENGTH); - } + // Important to remove as-we-go, not with .clear() + // in the end, in case we hit an exception; + // otherwise we could over-decref if close() is + // called again: + it.remove(); - /** - * Constructs an IndexWriter for the index in d. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * d, replacing the index already there, if any. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param d the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified - * via the MaxFieldLength constructor. - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - */ - public IndexWriter(Directory d, Analyzer a, boolean create, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, null, false, mfl.getLimit()); - } + // NOTE: it is allowed that these decRefs do not + // actually close the SRs; this happens when a + // near real-time reader is kept open after the + // IndexWriter instance is closed: + try { + rld.dropReaders(); + } catch (Throwable t) { + if (doSave) { + IOUtils.reThrow(t); + } else if (priorE == null) { + priorE = t; + } + } + } + assert readerMap.size() == 0; + IOUtils.reThrow(priorE); + } - /** - * Constructs an IndexWriter for the index in d. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * d, replacing the index already there, if any. - * - * @param d the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 - * release, and call {@link #commit()} when needed. - * Use {@link #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} instead. - */ - public IndexWriter(Directory d, Analyzer a, boolean create) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, null, true, DEFAULT_MAX_FIELD_LENGTH); - } + /** + * Commit live docs changes for the segment readers for + * the provided infos. + * + * @throws IOException If there is a low-level I/O error + */ + public synchronized void commit(SegmentInfos infos) throws IOException { + for (SegmentCommitInfo info : infos) { + final ReadersAndUpdates rld = readerMap.get(info); + if (rld != null) { + assert rld.info == info; + if (rld.writeLiveDocs(directory)) { + // Make sure we only write del docs for a live segment: + assert infoIsLive(info); + // Must checkpoint because we just + // created new _X_N.del and field updates files; + // don't call IW.checkpoint because that also + // increments SIS.version, which we do not want to + // do here: it was done previously (after we + // invoked BDS.applyDeletes), whereas here all we + // did was move the state to disk: + checkpointNoSIS(); + } + } + } + } - /** - * Constructs an IndexWriter for the index in - * path, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified - * via the MaxFieldLength constructor. - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - */ - public IndexWriter(String path, Analyzer a, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, true, null, false, mfl.getLimit()); - } + /** + * Obtain a ReadersAndLiveDocs instance from the + * readerPool. If create is true, you must later call + * {@link #release(ReadersAndUpdates)}. + */ + public synchronized ReadersAndUpdates get(SegmentCommitInfo info, boolean create) { - /** - * Constructs an IndexWriter for the index in - * path, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 - * release, and call {@link #commit()} when needed. - * Use {@link #IndexWriter(String,Analyzer,MaxFieldLength)} instead. - */ - public IndexWriter(String path, Analyzer a) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, true, null, true, DEFAULT_MAX_FIELD_LENGTH); - } + assert info.info.dir == directory: "info.dir=" + info.info.dir + " vs " + directory; - /** - * Constructs an IndexWriter for the index in - * path, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified - * via the MaxFieldLength constructor. - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - */ - public IndexWriter(File path, Analyzer a, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, true, null, false, mfl.getLimit()); - } + ReadersAndUpdates rld = readerMap.get(info); + if (rld == null) { + if (!create) { + return null; + } + rld = new ReadersAndUpdates(IndexWriter.this, info); + // Steal initial reference: + readerMap.put(info, rld); + } else { + assert rld.info == info: "rld.info=" + rld.info + " info=" + info + " isLive?=" + infoIsLive(rld.info) + " vs " + infoIsLive(info); + } - /** - * Constructs an IndexWriter for the index in - * path, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - * @param path the path to the index directory - * @param a the analyzer to use - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link #IndexWriter(File,Analyzer,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. - */ - public IndexWriter(File path, Analyzer a) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(FSDirectory.getDirectory(path), a, true, null, true, DEFAULT_MAX_FIELD_LENGTH); - } + if (create) { + // Return ref to caller: + rld.incRef(); + } - /** - * Constructs an IndexWriter for the index in - * d, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param d the index directory - * @param a the analyzer to use - * @param mfl Maximum field length: LIMITED, UNLIMITED, or user-specified - * via the MaxFieldLength constructor. - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - */ - public IndexWriter(Directory d, Analyzer a, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, false, null, false, mfl.getLimit()); - } + assert noDups(); - /** - * Constructs an IndexWriter for the index in - * d, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - * @param d the index directory - * @param a the analyzer to use - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(Directory,Analyzer,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. - */ - public IndexWriter(Directory d, Analyzer a) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, false, null, true, DEFAULT_MAX_FIELD_LENGTH); - } + return rld; + } - /** - * Constructs an IndexWriter for the index in - * d, first creating it if it does not - * already exist. Text will be analyzed with - * a. - * - * @param d the index directory - * @param autoCommit see above - * @param a the analyzer to use - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(Directory,Analyzer,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. - */ - public IndexWriter(Directory d, boolean autoCommit, Analyzer a) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, false, null, autoCommit, DEFAULT_MAX_FIELD_LENGTH); + // Make sure that every segment appears only once in the + // pool: + private boolean noDups() { + Set seen = new HashSet<>(); + for(SegmentCommitInfo info : readerMap.keySet()) { + assert !seen.contains(info.info.name); + seen.add(info.info.name); + } + return true; + } } /** - * Constructs an IndexWriter for the index in d. - * Text will be analyzed with a. If create - * is true, then a new, empty index will be created in - * d, replacing the index already there, if any. - * - * @param d the index directory - * @param autoCommit see above - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(Directory,Analyzer,boolean,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. + * Obtain the number of deleted docs for a pooled reader. + * If the reader isn't being pooled, the segmentInfo's + * delCount is returned. */ - public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, null, autoCommit, DEFAULT_MAX_FIELD_LENGTH); - } + public int numDeletedDocs(SegmentCommitInfo info) { + ensureOpen(false); + int delCount = info.getDelCount(); - /** - * Expert: constructs an IndexWriter with a custom {@link - * IndexDeletionPolicy}, for the index in d, - * first creating it if it does not already exist. Text - * will be analyzed with a. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param d the index directory - * @param a the analyzer to use - * @param deletionPolicy see above - * @param mfl whether or not to limit field lengths - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - */ - public IndexWriter(Directory d, Analyzer a, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, false, deletionPolicy, false, mfl.getLimit()); + final ReadersAndUpdates rld = readerPool.get(info, false); + if (rld != null) { + delCount += rld.getPendingDeleteCount(); + } + return delCount; } /** - * Expert: constructs an IndexWriter with a custom {@link - * IndexDeletionPolicy}, for the index in d, - * first creating it if it does not already exist. Text - * will be analyzed with a. - * - * @param d the index directory - * @param autoCommit see above - * @param a the analyzer to use - * @param deletionPolicy see above - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be - * read/written to or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(Directory,Analyzer,IndexDeletionPolicy,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. + * Used internally to throw an {@link AlreadyClosedException} if this + * IndexWriter has been closed or is in the process of closing. + * + * @param failIfClosing + * if true, also fail when {@code IndexWriter} is in the process of + * closing ({@code closing=true}) but not yet done closing ( + * {@code closed=false}) + * @throws AlreadyClosedException + * if this IndexWriter is closed or in the process of closing */ - public IndexWriter(Directory d, boolean autoCommit, Analyzer a, IndexDeletionPolicy deletionPolicy) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); + protected final void ensureOpen(boolean failIfClosing) throws AlreadyClosedException { + if (closed || (failIfClosing && closing)) { + throw new AlreadyClosedException("this IndexWriter is closed", tragedy); + } } - - /** - * Expert: constructs an IndexWriter with a custom {@link - * IndexDeletionPolicy}, for the index in d. - * Text will be analyzed with a. If - * create is true, then a new, empty index - * will be created in d, replacing the index - * already there, if any. - * - *

    NOTE: autoCommit (see above) is set to false with this - * constructor. - * - * @param d the index directory - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @param deletionPolicy see above - * @param mfl whether or not to limit field lengths - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - */ - public IndexWriter(Directory d, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy, MaxFieldLength mfl) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, deletionPolicy, false, mfl.getLimit()); - } /** - * Expert: constructs an IndexWriter with a custom {@link - * IndexDeletionPolicy}, for the index in d. - * Text will be analyzed with a. If - * create is true, then a new, empty index - * will be created in d, replacing the index - * already there, if any. - * - * @param d the index directory - * @param autoCommit see above - * @param a the analyzer to use - * @param create true to create the index or overwrite - * the existing one; false to append to the existing - * index - * @param deletionPolicy see above - * @throws CorruptIndexException if the index is corrupt - * @throws LockObtainFailedException if another writer - * has this index open (write.lock could not - * be obtained) - * @throws IOException if the directory cannot be read/written to, or - * if it does not exist and create is - * false or if there is any other low-level - * IO error - * @deprecated This constructor will be removed in the 3.0 release. - * Use {@link - * #IndexWriter(Directory,Analyzer,boolean,IndexDeletionPolicy,MaxFieldLength)} - * instead, and call {@link #commit()} when needed. + * Used internally to throw an {@link + * AlreadyClosedException} if this IndexWriter has been + * closed ({@code closed=true}) or is in the process of + * closing ({@code closing=true}). + *

    + * Calls {@link #ensureOpen(boolean) ensureOpen(true)}. + * @throws AlreadyClosedException if this IndexWriter is closed */ - public IndexWriter(Directory d, boolean autoCommit, Analyzer a, boolean create, IndexDeletionPolicy deletionPolicy) - throws CorruptIndexException, LockObtainFailedException, IOException { - init(d, a, create, false, deletionPolicy, autoCommit, DEFAULT_MAX_FIELD_LENGTH); + protected final void ensureOpen() throws AlreadyClosedException { + ensureOpen(true); } - private void init(Directory d, Analyzer a, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) - throws CorruptIndexException, LockObtainFailedException, IOException { - if (IndexReader.indexExists(d)) { - init(d, a, false, closeDir, deletionPolicy, autoCommit, maxFieldLength); - } else { - init(d, a, true, closeDir, deletionPolicy, autoCommit, maxFieldLength); - } - } + final Codec codec; // for writing new segments - private void init(Directory d, Analyzer a, final boolean create, boolean closeDir, IndexDeletionPolicy deletionPolicy, boolean autoCommit, int maxFieldLength) - throws CorruptIndexException, LockObtainFailedException, IOException { - this.closeDir = closeDir; + /** + * Constructs a new IndexWriter per the settings given in conf. + * If you want to make "live" changes to this writer instance, use + * {@link #getConfig()}. + * + *

    + * NOTE: after ths writer is created, the given configuration instance + * cannot be passed to another writer. If you intend to do so, you should + * {@link IndexWriterConfig#clone() clone} it beforehand. + * + * @param d + * the index directory. The index is either created or appended + * according conf.getOpenMode(). + * @param conf + * the configuration settings according to which IndexWriter should + * be initialized. + * @throws IOException + * if the directory cannot be read/written to, or if it does not + * exist and conf.getOpenMode() is + * OpenMode.APPEND or if there is any other low-level + * IO error + */ + public IndexWriter(Directory d, IndexWriterConfig conf) throws IOException { + conf.setIndexWriter(this); // prevent reuse by other instances + config = conf; directory = d; - analyzer = a; - setMessageID(defaultInfoStream); - this.maxFieldLength = maxFieldLength; + analyzer = config.getAnalyzer(); + infoStream = config.getInfoStream(); + mergeScheduler = config.getMergeScheduler(); + codec = config.getCodec(); - if (create) { - // Clear the write lock in case it's leftover: - directory.clearLock(WRITE_LOCK_NAME); - } + bufferedUpdatesStream = new BufferedUpdatesStream(infoStream); + poolReaders = config.getReaderPooling(); - Lock writeLock = directory.makeLock(WRITE_LOCK_NAME); - if (!writeLock.obtain(writeLockTimeout)) // obtain write lock + writeLock = directory.makeLock(WRITE_LOCK_NAME); + + if (!writeLock.obtain(config.getWriteLockTimeout())) // obtain write lock throw new LockObtainFailedException("Index locked for write: " + writeLock); - this.writeLock = writeLock; // save it + boolean success = false; try { + OpenMode mode = config.getOpenMode(); + boolean create; + if (mode == OpenMode.CREATE) { + create = true; + } else if (mode == OpenMode.APPEND) { + create = false; + } else { + // CREATE_OR_APPEND - create only if an index does not exist + create = !DirectoryReader.indexExists(directory); + } + + // If index is too old, reading the segments will throw + // IndexFormatTooOldException. + segmentInfos = new SegmentInfos(); + + boolean initialIndexExists = true; + if (create) { // Try to read first. This is to allow create // against an index that's currently open for @@ -1066,526 +782,216 @@ segmentInfos.clear(); } catch (IOException e) { // Likely this means it's a fresh directory + initialIndexExists = false; } - segmentInfos.commit(directory); + + // Record that we have a change (zero out all + // segments) pending: + changed(); } else { segmentInfos.read(directory); - // We assume that this segments_N was previously - // properly sync'd: - for(int i=0;iDetermines the largest segment (measured by - * document count) that may be merged with other segments. - * Small values (e.g., less than 10,000) are best for - * interactive indexing, as this limits the length of - * pauses while indexing to a few seconds. Larger values - * are best for batched indexing and speedier - * searches.

    - * - *

    The default value is {@link Integer#MAX_VALUE}.

    - * - *

    Note that this method is a convenience method: it - * just calls mergePolicy.setMaxMergeDocs as long as - * mergePolicy is an instance of {@link LogMergePolicy}. - * Otherwise an IllegalArgumentException is thrown.

    - * - *

    The default merge policy ({@link - * LogByteSizeMergePolicy}) also allows you to set this - * limit by net size (in MB) of the segment, using {@link - * LogByteSizeMergePolicy#setMaxMergeMB}.

    - */ - public void setMaxMergeDocs(int maxMergeDocs) { - getLogMergePolicy().setMaxMergeDocs(maxMergeDocs); - } - + /** - *

    Returns the largest segment (measured by document - * count) that may be merged with other segments.

    - * - *

    Note that this method is a convenience method: it - * just calls mergePolicy.getMaxMergeDocs as long as - * mergePolicy is an instance of {@link LogMergePolicy}. - * Otherwise an IllegalArgumentException is thrown.

    - * - * @see #setMaxMergeDocs + * Returns a {@link LiveIndexWriterConfig}, which can be used to query the IndexWriter + * current settings, as well as modify "live" ones. */ - public int getMaxMergeDocs() { - return getLogMergePolicy().getMaxMergeDocs(); + public LiveIndexWriterConfig getConfig() { + ensureOpen(false); + return config; } - /** - * The maximum number of terms that will be indexed for a single field in a - * document. This limits the amount of memory required for indexing, so that - * collections with very large files will not crash the indexing process by - * running out of memory. This setting refers to the number of running terms, - * not to the number of different terms.

    - * Note: this silently truncates large documents, excluding from the - * index all terms that occur further in the document. If you know your source - * documents are large, be sure to set this value high enough to accomodate - * the expected size. If you set it to Integer.MAX_VALUE, then the only limit - * is your memory, but you should anticipate an OutOfMemoryError.

    - * By default, no more than {@link #DEFAULT_MAX_FIELD_LENGTH} terms - * will be indexed for a field. - */ - public void setMaxFieldLength(int maxFieldLength) { - ensureOpen(); - this.maxFieldLength = maxFieldLength; - docWriter.setMaxFieldLength(maxFieldLength); - if (infoStream != null) - message("setMaxFieldLength " + maxFieldLength); + private void messageState() { + if (infoStream.isEnabled("IW") && didMessageState == false) { + didMessageState = true; + infoStream.message("IW", "\ndir=" + directory + "\n" + + "index=" + segString() + "\n" + + "version=" + Version.LATEST.toString() + "\n" + + config.toString()); + } } /** - * Returns the maximum number of terms that will be - * indexed for a single field in a document. - * @see #setMaxFieldLength + * Gracefully closes (commits, waits for merges), but calls rollback + * if there's an exc so the IndexWriter is always closed. */ - public int getMaxFieldLength() { - ensureOpen(); - return maxFieldLength; - } - - /** Determines the minimal number of documents required - * before the buffered in-memory documents are flushed as - * a new Segment. Large values generally gives faster - * indexing. - * - *

    When this is set, the writer will flush every - * maxBufferedDocs added documents. Pass in {@link - * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due - * to number of buffered documents. Note that if flushing - * by RAM usage is also enabled, then the flush will be - * triggered by whichever comes first.

    - * - *

    Disabled by default (writer flushes by RAM usage).

    - * - * @throws IllegalArgumentException if maxBufferedDocs is - * enabled but smaller than 2, or it disables maxBufferedDocs - * when ramBufferSize is already disabled - * @see #setRAMBufferSizeMB - */ - public void setMaxBufferedDocs(int maxBufferedDocs) { - ensureOpen(); - if (maxBufferedDocs != DISABLE_AUTO_FLUSH && maxBufferedDocs < 2) - throw new IllegalArgumentException( - "maxBufferedDocs must at least be 2 when enabled"); - if (maxBufferedDocs == DISABLE_AUTO_FLUSH - && getRAMBufferSizeMB() == DISABLE_AUTO_FLUSH) - throw new IllegalArgumentException( - "at least one of ramBufferSize and maxBufferedDocs must be enabled"); - docWriter.setMaxBufferedDocs(maxBufferedDocs); - pushMaxBufferedDocs(); - if (infoStream != null) - message("setMaxBufferedDocs " + maxBufferedDocs); - } - - /** - * If we are flushing by doc count (not by RAM usage), and - * using LogDocMergePolicy then push maxBufferedDocs down - * as its minMergeDocs, to keep backwards compatibility. - */ - private void pushMaxBufferedDocs() { - if (docWriter.getMaxBufferedDocs() != DISABLE_AUTO_FLUSH) { - final MergePolicy mp = mergePolicy; - if (mp instanceof LogDocMergePolicy) { - LogDocMergePolicy lmp = (LogDocMergePolicy) mp; - final int maxBufferedDocs = docWriter.getMaxBufferedDocs(); - if (lmp.getMinMergeDocs() != maxBufferedDocs) { - if (infoStream != null) - message("now push maxBufferedDocs " + maxBufferedDocs + " to LogDocMergePolicy"); - lmp.setMinMergeDocs(maxBufferedDocs); + private void shutdown(boolean waitForMerges) throws IOException { + if (pendingCommit != null) { + throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit"); + } + // Ensure that only one thread actually gets to do the + // closing + if (shouldClose()) { + boolean success = false; + try { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "now flush at close"); } + flush(true, true); + if (waitForMerges) { + waitForMerges(); + } else { + abortMerges(); + } + commitInternal(config.getMergePolicy()); + rollbackInternal(); // ie close, since we just committed + success = true; + } finally { + if (success == false) { + // Be certain to close the index on any exception + try { + rollbackInternal(); + } catch (Throwable t) { + // Suppress so we keep throwing original exception + } + } } } } /** - * Returns the number of buffered added documents that will - * trigger a flush if enabled. - * @see #setMaxBufferedDocs - */ - public int getMaxBufferedDocs() { - ensureOpen(); - return docWriter.getMaxBufferedDocs(); - } - - /** Determines the amount of RAM that may be used for - * buffering added documents before they are flushed as a - * new Segment. Generally for faster indexing performance - * it's best to flush by RAM usage instead of document - * count and use as large a RAM buffer as you can. + * Commits all changes to an index, waits for pending merges + * to complete, closes all associated files and releases the + * write lock. * - *

    When this is set, the writer will flush whenever - * buffered documents use this much RAM. Pass in {@link - * #DISABLE_AUTO_FLUSH} to prevent triggering a flush due - * to RAM usage. Note that if flushing by document count - * is also enabled, then the flush will be triggered by - * whichever comes first.

    + *

    Note that: + *

      + *
    • If you called prepareCommit but failed to call commit, this + * method will throw {@code IllegalStateException} and the {@code IndexWriter} + * will not be closed.
    • + *
    • If this method throws any other exception, the {@code IndexWriter} + * will be closed, but changes may have been lost.
    • + *
    * - *

    The default value is {@link #DEFAULT_RAM_BUFFER_SIZE_MB}.

    - * - * @throws IllegalArgumentException if ramBufferSize is - * enabled but non-positive, or it disables ramBufferSize - * when maxBufferedDocs is already disabled - */ - public void setRAMBufferSizeMB(double mb) { - if (mb != DISABLE_AUTO_FLUSH && mb <= 0.0) - throw new IllegalArgumentException( - "ramBufferSize should be > 0.0 MB when enabled"); - if (mb == DISABLE_AUTO_FLUSH && getMaxBufferedDocs() == DISABLE_AUTO_FLUSH) - throw new IllegalArgumentException( - "at least one of ramBufferSize and maxBufferedDocs must be enabled"); - docWriter.setRAMBufferSizeMB(mb); - if (infoStream != null) - message("setRAMBufferSizeMB " + mb); - } - - /** - * Returns the value set by {@link #setRAMBufferSizeMB} if enabled. - */ - public double getRAMBufferSizeMB() { - return docWriter.getRAMBufferSizeMB(); - } - - /** - *

    Determines the minimal number of delete terms required before the buffered - * in-memory delete terms are applied and flushed. If there are documents - * buffered in memory at the time, they are merged and a new segment is - * created.

    - - *

    Disabled by default (writer flushes by RAM usage).

    - * - * @throws IllegalArgumentException if maxBufferedDeleteTerms - * is enabled but smaller than 1 - * @see #setRAMBufferSizeMB - */ - public void setMaxBufferedDeleteTerms(int maxBufferedDeleteTerms) { - ensureOpen(); - if (maxBufferedDeleteTerms != DISABLE_AUTO_FLUSH - && maxBufferedDeleteTerms < 1) - throw new IllegalArgumentException( - "maxBufferedDeleteTerms must at least be 1 when enabled"); - docWriter.setMaxBufferedDeleteTerms(maxBufferedDeleteTerms); - if (infoStream != null) - message("setMaxBufferedDeleteTerms " + maxBufferedDeleteTerms); - } - - /** - * Returns the number of buffered deleted terms that will - * trigger a flush if enabled. - * @see #setMaxBufferedDeleteTerms - */ - public int getMaxBufferedDeleteTerms() { - ensureOpen(); - return docWriter.getMaxBufferedDeleteTerms(); - } - - /** Determines how often segment indices are merged by addDocument(). With - * smaller values, less RAM is used while indexing, and searches on - * unoptimized indices are faster, but indexing speed is slower. With larger - * values, more RAM is used during indexing, and while searches on unoptimized - * indices are slower, indexing is faster. Thus larger values (> 10) are best - * for batch index creation, and smaller values (< 10) for indices that are - * interactively maintained. - * - *

    Note that this method is a convenience method: it - * just calls mergePolicy.setMergeFactor as long as - * mergePolicy is an instance of {@link LogMergePolicy}. - * Otherwise an IllegalArgumentException is thrown.

    - * - *

    This must never be less than 2. The default value is 10. - */ - public void setMergeFactor(int mergeFactor) { - getLogMergePolicy().setMergeFactor(mergeFactor); - } - - /** - *

    Returns the number of segments that are merged at - * once and also controls the total number of segments - * allowed to accumulate in the index.

    - * - *

    Note that this method is a convenience method: it - * just calls mergePolicy.getMergeFactor as long as - * mergePolicy is an instance of {@link LogMergePolicy}. - * Otherwise an IllegalArgumentException is thrown.

    - * - * @see #setMergeFactor - */ - public int getMergeFactor() { - return getLogMergePolicy().getMergeFactor(); - } - - /** - * Expert: returns max delay inserted before syncing a - * commit point. On Windows, at least, pausing before - * syncing can increase net indexing throughput. The - * delay is variable based on size of the segment's files, - * and is only inserted when using - * ConcurrentMergeScheduler for merges. - * @deprecated This will be removed in 3.0, when - * autoCommit=true is removed from IndexWriter. - */ - public double getMaxSyncPauseSeconds() { - return maxSyncPauseSeconds; - } - - /** - * Expert: sets the max delay before syncing a commit - * point. - * @see #getMaxSyncPauseSeconds - * @deprecated This will be removed in 3.0, when - * autoCommit=true is removed from IndexWriter. - */ - public void setMaxSyncPauseSeconds(double seconds) { - maxSyncPauseSeconds = seconds; - } - - /** If non-null, this will be the default infoStream used - * by a newly instantiated IndexWriter. - * @see #setInfoStream - */ - public static void setDefaultInfoStream(PrintStream infoStream) { - IndexWriter.defaultInfoStream = infoStream; - } - - /** - * Returns the current default infoStream for newly - * instantiated IndexWriters. - * @see #setDefaultInfoStream - */ - public static PrintStream getDefaultInfoStream() { - return IndexWriter.defaultInfoStream; - } - - /** If non-null, information about merges, deletes and a - * message when maxFieldLength is reached will be printed - * to this. - */ - public void setInfoStream(PrintStream infoStream) { - ensureOpen(); - setMessageID(infoStream); - docWriter.setInfoStream(infoStream); - deleter.setInfoStream(infoStream); - if (infoStream != null) - messageState(); - } - - private void messageState() { - message("setInfoStream: dir=" + directory + - " autoCommit=" + autoCommit + - " mergePolicy=" + mergePolicy + - " mergeScheduler=" + mergeScheduler + - " ramBufferSizeMB=" + docWriter.getRAMBufferSizeMB() + - " maxBufferedDocs=" + docWriter.getMaxBufferedDocs() + - " maxBuffereDeleteTerms=" + docWriter.getMaxBufferedDeleteTerms() + - " maxFieldLength=" + maxFieldLength + - " index=" + segString()); - } - - /** - * Returns the current infoStream in use by this writer. - * @see #setInfoStream - */ - public PrintStream getInfoStream() { - ensureOpen(); - return infoStream; - } - - /** - * Sets the maximum time to wait for a write lock (in milliseconds) for this instance of IndexWriter. @see - * @see #setDefaultWriteLockTimeout to change the default value for all instances of IndexWriter. - */ - public void setWriteLockTimeout(long writeLockTimeout) { - ensureOpen(); - this.writeLockTimeout = writeLockTimeout; - } - - /** - * Returns allowed timeout when acquiring the write lock. - * @see #setWriteLockTimeout - */ - public long getWriteLockTimeout() { - ensureOpen(); - return writeLockTimeout; - } - - /** - * Sets the default (for any instance of IndexWriter) maximum time to wait for a write lock (in - * milliseconds). - */ - public static void setDefaultWriteLockTimeout(long writeLockTimeout) { - IndexWriter.WRITE_LOCK_TIMEOUT = writeLockTimeout; - } - - /** - * Returns default write lock timeout for newly - * instantiated IndexWriters. - * @see #setDefaultWriteLockTimeout - */ - public static long getDefaultWriteLockTimeout() { - return IndexWriter.WRITE_LOCK_TIMEOUT; - } - - /** - * Commits all changes to an index and closes all - * associated files. Note that this may be a costly + *

    + * Note that this may be a costly * operation, so, try to re-use a single writer instead of * closing and opening a new one. See {@link #commit()} for * caveats about write caching done by some IO devices. * - *

    If an Exception is hit during close, eg due to disk - * full or some other reason, then both the on-disk index - * and the internal state of the IndexWriter instance will - * be consistent. However, the close will not be complete - * even though part of it (flushing buffered documents) - * may have succeeded, so the write lock will still be - * held.

    - * - *

    If you can correct the underlying cause (eg free up - * some disk space) then you can call close() again. - * Failing that, if you want to force the write lock to be - * released (dangerous, because you may then lose buffered - * docs in the IndexWriter instance) then you can do - * something like this:

    - * - *
    -   * try {
    -   *   writer.close();
    -   * } finally {
    -   *   if (IndexWriter.isLocked(directory)) {
    -   *     IndexWriter.unlock(directory);
    -   *   }
    -   * }
    -   * 
    - * - * after which, you must be certain not to use the writer - * instance anymore.

    - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + *

    NOTE: You must ensure no other threads are still making + * changes at the same time that this method is invoked.

    */ - public void close() throws CorruptIndexException, IOException { + @Override + public void close() throws IOException { close(true); } /** * Closes the index with or without waiting for currently * running merges to finish. This is only meaningful when * using a MergeScheduler that runs merges in background - * threads. + * threads. See {@link #close()} for details on behavior + * when exceptions are thrown. + * + *

    NOTE: it is dangerous to always call + * close(false), especially when IndexWriter is not open + * for very long, because this can result in "merge + * starvation" whereby long merges will never have a + * chance to finish. This will cause too many segments in + * your index over time, which leads to all sorts of + * problems like slow searches, too much RAM and too + * many file descriptors used by readers, etc.

    + * * @param waitForMerges if true, this call will block * until all merges complete; else, it will ask all * running merges to abort, wait until those merges have * finished (which should be at most a few seconds), and * then return. + * + * @deprecated To abort merges and then close, call + * {@link #commit} and then {@link #rollback} instead. */ - public void close(boolean waitForMerges) throws CorruptIndexException, IOException { + @Deprecated + public void close(boolean waitForMerges) throws IOException { + shutdown(waitForMerges); + } - // If any methods have hit OutOfMemoryError, then abort - // on close, in case the internal state of IndexWriter - // or DocumentsWriter is corrupt - if (hitOOM) { - rollback(); - return; + private boolean assertEventQueueAfterClose() { + if (eventQueue.isEmpty()) { + return true; } - - // Ensure that only one thread actually gets to do the closing: - if (shouldClose()) - closeInternal(waitForMerges); + for (Event e : eventQueue) { + assert e instanceof DocumentsWriter.MergePendingEvent : e; + } + return true; } // Returns true if this thread should attempt to close, or @@ -1603,158 +1009,127 @@ // successfully) or another (fails to close) doWait(); } - } else + } else { return false; + } } } - private void closeInternal(boolean waitForMerges) throws CorruptIndexException, IOException { + private void closeInternal(boolean waitForMerges, boolean doFlush) throws IOException { + boolean interrupted = false; + try { - docWriter.pauseAllThreads(); + if (pendingCommit != null) { + throw new IllegalStateException("cannot close: prepareCommit was already called with no corresponding call to commit"); + } - try { - if (infoStream != null) - message("now flush at close"); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "now flush at close waitForMerges=" + waitForMerges); + } docWriter.close(); - // Only allow a new merge to be triggered if we are - // going to wait for merges: - flush(waitForMerges, true, true); + try { + // Only allow a new merge to be triggered if we are + // going to wait for merges: + if (doFlush) { + flush(waitForMerges, true); + } else { + docWriter.abort(this); // already closed -- never sync on IW + } + + } finally { + try { + // clean up merge scheduler in all cases, although flushing may have failed: + interrupted = Thread.interrupted(); + + if (waitForMerges) { + try { + // Give merge scheduler last chance to run, in case + // any pending merges are waiting: + mergeScheduler.merge(this, MergeTrigger.CLOSING, false); + } catch (ThreadInterruptedException tie) { + // ignore any interruption, does not matter + interrupted = true; + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "interrupted while waiting for final merges"); + } + } + } + + synchronized(this) { + for (;;) { + try { + if (waitForMerges && !interrupted) { + waitForMerges(); + } else { + abortMerges(); + } + break; + } catch (ThreadInterruptedException tie) { + // by setting the interrupted status, the next iteration will abort merges + interrupted = true; + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "interrupted while waiting for merges to finish"); + } + } + } + stopMerges = true; + } + + } finally { + // shutdown policy, scheduler and all threads (this call is not interruptible): + IOUtils.closeWhileHandlingException(mergeScheduler); + } + } - if (waitForMerges) - // Give merge scheduler last chance to run, in case - // any pending merges are waiting: - mergeScheduler.merge(this); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "now call final commit()"); + } - mergePolicy.close(); - - finishMerges(waitForMerges); - - mergeScheduler.close(); - - if (infoStream != null) - message("now call final commit()"); - - commit(0); - - if (infoStream != null) - message("at close: " + segString()); - + if (doFlush) { + commitInternal(config.getMergePolicy()); + } + processEvents(false, true); synchronized(this) { - docWriter = null; + // commitInternal calls ReaderPool.commit, which + // writes any pending liveDocs from ReaderPool, so + // it's safe to drop all readers now: + readerPool.dropAll(true); deleter.close(); } - - if (closeDir) - directory.close(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "at close: " + segString()); + } + if (writeLock != null) { - writeLock.release(); // release write lock + writeLock.close(); // release write lock writeLock = null; } synchronized(this) { closed = true; } + assert docWriter.perThreadPool.numDeactivatedThreadStates() == docWriter.perThreadPool.getMaxThreadStates() : "" + docWriter.perThreadPool.numDeactivatedThreadStates() + " " + docWriter.perThreadPool.getMaxThreadStates(); } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "closeInternal"); } finally { synchronized(this) { closing = false; notifyAll(); if (!closed) { - if (docWriter != null) - docWriter.resumeAllThreads(); - if (infoStream != null) - message("hit exception while closing"); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception while closing"); + } } } + // finally, restore interrupt status: + if (interrupted) Thread.currentThread().interrupt(); } } - /** Tells the docWriter to close its currently open shared - * doc stores (stored fields & vectors files). - * Return value specifices whether new doc store files are compound or not. - */ - private synchronized boolean flushDocStores() throws IOException { - - boolean useCompoundDocStore = false; - - String docStoreSegment; - - boolean success = false; - try { - docStoreSegment = docWriter.closeDocStore(); - success = true; - } finally { - if (!success) { - if (infoStream != null) - message("hit exception closing doc store segment"); - } - } - - useCompoundDocStore = mergePolicy.useCompoundDocStore(segmentInfos); - - if (useCompoundDocStore && docStoreSegment != null && docWriter.closedFiles().size() != 0) { - // Now build compound doc store file - - success = false; - - final int numSegments = segmentInfos.size(); - final String compoundFileName = docStoreSegment + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION; - - try { - CompoundFileWriter cfsWriter = new CompoundFileWriter(directory, compoundFileName); - final Iterator it = docWriter.closedFiles().iterator(); - while(it.hasNext()) - cfsWriter.addFile((String) it.next()); - - // Perform the merge - cfsWriter.close(); - success = true; - - } finally { - if (!success) { - if (infoStream != null) - message("hit exception building compound file doc store for segment " + docStoreSegment); - deleter.deleteFile(compoundFileName); - } - } - - for(int i=0;i - * Note that this effectively truncates large documents, excluding from the - * index terms that occur further in the document. If you know your source - * documents are large, be sure to set this value high enough to accomodate - * the expected size. If you set it to Integer.MAX_VALUE, then the only limit - * is your memory, but you should anticipate an OutOfMemoryError.

    - * By default, no more than 10,000 terms will be indexed for a field. + * Adds a document to this index. * - * @see MaxFieldLength - */ - private int maxFieldLength; - - /** - * Adds a document to this index. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are - * discarded. - * *

    Note that if an Exception is hit (for example disk full) * then the index will be consistent, but this document * may not have been added. Furthermore, it's possible @@ -1859,12 +1210,12 @@ * readers/searchers are open against the index, and up to * 2X the size of all segments being merged when * readers/searchers are open against the index (see - * {@link #optimize()} for details). The sequence of + * {@link #forceMerge(int)} for details). The sequence of * primitive merge operations performed is governed by the * merge policy. * *

    Note that each term in the document can be no longer - * than 16383 characters, otherwise an + * than {@link #MAX_TERM_LENGTH} in bytes, otherwise an * IllegalArgumentException will be thrown.

    * *

    Note that it's possible to create an invalid Unicode @@ -1876,120 +1227,247 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc) throws CorruptIndexException, IOException { + public void addDocument(Iterable doc) throws IOException { addDocument(doc, analyzer); } /** * Adds a document to this index, using the provided analyzer instead of the - * value of {@link #getAnalyzer()}. If the document contains more than - * {@link #setMaxFieldLength(int)} terms for a given field, the remainder are - * discarded. + * value of {@link #getAnalyzer()}. * - *

    See {@link #addDocument(Document)} for details on + *

    See {@link #addDocument(Iterable)} for details on * index and IndexWriter state after an Exception, and * flushing/merging temporary free space requirements.

    * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void addDocument(Document doc, Analyzer analyzer) throws CorruptIndexException, IOException { + public void addDocument(Iterable doc, Analyzer analyzer) throws IOException { + updateDocument(null, doc, analyzer); + } + + /** + * Atomically adds a block of documents with sequentially + * assigned document IDs, such that an external reader + * will see all or none of the documents. + * + *

    WARNING: the index does not currently record + * which documents were added as a block. Today this is + * fine, because merging will preserve a block. The order of + * documents within a segment will be preserved, even when child + * documents within a block are deleted. Most search features + * (like result grouping and block joining) require you to + * mark documents; when these documents are deleted these + * search features will not work as expected. Obviously adding + * documents to an existing block will require you the reindex + * the entire block. + * + *

    However it's possible that in the future Lucene may + * merge more aggressively re-order documents (for example, + * perhaps to obtain better index compression), in which case + * you may need to fully re-index your documents at that time. + * + *

    See {@link #addDocument(Iterable)} for details on + * index and IndexWriter state after an Exception, and + * flushing/merging temporary free space requirements.

    + * + *

    NOTE: tools that do offline splitting of an index + * (for example, IndexSplitter in contrib) or + * re-sorting of documents (for example, IndexSorter in + * contrib) are not aware of these atomically added documents + * and will likely break them up. Use such tools at your + * own risk! + * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @lucene.experimental + */ + public void addDocuments(Iterable> docs) throws IOException { + addDocuments(docs, analyzer); + } + + /** + * Atomically adds a block of documents, analyzed using the + * provided analyzer, with sequentially assigned document + * IDs, such that an external reader will see all or none + * of the documents. + * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @lucene.experimental + */ + public void addDocuments(Iterable> docs, Analyzer analyzer) throws IOException { + updateDocuments(null, docs, analyzer); + } + + /** + * Atomically deletes documents matching the provided + * delTerm and adds a block of documents with sequentially + * assigned document IDs, such that an external reader + * will see all or none of the documents. + * + * See {@link #addDocuments(Iterable)}. + * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @lucene.experimental + */ + public void updateDocuments(Term delTerm, Iterable> docs) throws IOException { + updateDocuments(delTerm, docs, analyzer); + } + + /** + * Atomically deletes documents matching the provided + * delTerm and adds a block of documents, analyzed using + * the provided analyzer, with sequentially + * assigned document IDs, such that an external reader + * will see all or none of the documents. + * + * See {@link #addDocuments(Iterable)}. + * + * @throws CorruptIndexException if the index is corrupt + * @throws IOException if there is a low-level IO error + * + * @lucene.experimental + */ + public void updateDocuments(Term delTerm, Iterable> docs, Analyzer analyzer) throws IOException { ensureOpen(); - boolean doFlush = false; - boolean success = false; try { + boolean success = false; try { - doFlush = docWriter.addDocument(doc, analyzer); + if (docWriter.updateDocuments(docs, analyzer, delTerm)) { + processEvents(true, false); + } success = true; } finally { if (!success) { - - if (infoStream != null) - message("hit exception adding document"); - - synchronized (this) { - // If docWriter has some aborted files that were - // never incref'd, then we clean them up here - if (docWriter != null) { - final Collection files = docWriter.abortedFiles(); - if (files != null) - deleter.deleteNewFiles(files); - } + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception updating document"); } } } - if (doFlush) - flush(true, false, false); } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "updateDocuments"); } } - /** - * Deletes the document(s) containing term. - * @param term the term to identify the documents to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public void deleteDocuments(Term term) throws CorruptIndexException, IOException { - ensureOpen(); - try { - boolean doFlush = docWriter.bufferDeleteTerm(term); - if (doFlush) - flush(true, false, false); - } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + /** Expert: attempts to delete by document ID, as long as + * the provided reader is a near-real-time reader (from {@link + * DirectoryReader#open(IndexWriter,boolean)}). If the + * provided reader is an NRT reader obtained from this + * writer, and its segment has not been merged away, then + * the delete succeeds and this method returns true; else, it + * returns false the caller must then separately delete by + * Term or Query. + * + * NOTE: this method can only delete documents + * visible to the currently open NRT reader. If you need + * to delete documents indexed after opening the NRT + * reader you must use {@link #deleteDocuments(Term...)}). */ + public synchronized boolean tryDeleteDocument(IndexReader readerIn, int docID) throws IOException { + + final AtomicReader reader; + if (readerIn instanceof AtomicReader) { + // Reader is already atomic: use the incoming docID: + reader = (AtomicReader) readerIn; + } else { + // Composite reader: lookup sub-reader and re-base docID: + List leaves = readerIn.leaves(); + int subIndex = ReaderUtil.subIndex(docID, leaves); + reader = leaves.get(subIndex).reader(); + docID -= leaves.get(subIndex).docBase; + assert docID >= 0; + assert docID < reader.maxDoc(); } + + if (!(reader instanceof SegmentReader)) { + throw new IllegalArgumentException("the reader must be a SegmentReader or composite reader containing only SegmentReaders"); + } + + final SegmentCommitInfo info = ((SegmentReader) reader).getSegmentInfo(); + + // TODO: this is a slow linear search, but, number of + // segments should be contained unless something is + // seriously wrong w/ the index, so it should be a minor + // cost: + + if (segmentInfos.indexOf(info) != -1) { + ReadersAndUpdates rld = readerPool.get(info, false); + if (rld != null) { + synchronized(bufferedUpdatesStream) { + rld.initWritableLiveDocs(); + if (rld.delete(docID)) { + final int fullDelCount = rld.info.getDelCount() + rld.getPendingDeleteCount(); + if (fullDelCount == rld.info.info.getDocCount()) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(rld.info)) { + segmentInfos.remove(rld.info); + readerPool.drop(rld.info); + checkpoint(); + } + } + + // Must bump changeCount so if no other changes + // happened, we still commit this change: + changed(); + } + //System.out.println(" yes " + info.info.name + " " + docID); + return true; + } + } else { + //System.out.println(" no rld " + info.info.name + " " + docID); + } + } else { + //System.out.println(" no seg " + info.info.name + " " + docID); + } + return false; } /** * Deletes the document(s) containing any of the - * terms. All deletes are flushed at the same time. + * terms. All given deletes are applied and flushed atomically + * at the same time. + * * @param terms array of terms to identify the documents * to be deleted * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void deleteDocuments(Term[] terms) throws CorruptIndexException, IOException { + public void deleteDocuments(Term... terms) throws IOException { ensureOpen(); try { - boolean doFlush = docWriter.bufferDeleteTerms(terms); - if (doFlush) - flush(true, false, false); + if (docWriter.deleteTerms(terms)) { + processEvents(true, false); + } } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "deleteDocuments(Term..)"); } } /** - * Deletes the document(s) matching the provided query. - * @param query the query to identify the documents to be deleted - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public void deleteDocuments(Query query) throws CorruptIndexException, IOException { - ensureOpen(); - boolean doFlush = docWriter.bufferDeleteQuery(query); - if (doFlush) - flush(true, false, false); - } - - /** * Deletes the document(s) matching any of the provided queries. - * All deletes are flushed at the same time. + * All given deletes are applied and flushed atomically at the same time. + * * @param queries array of queries to identify the documents * to be deleted * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void deleteDocuments(Query[] queries) throws CorruptIndexException, IOException { + public void deleteDocuments(Query... queries) throws IOException { ensureOpen(); - boolean doFlush = docWriter.bufferDeleteQueries(queries); - if (doFlush) - flush(true, false, false); + try { + if (docWriter.deleteQueries(queries)) { + processEvents(true, false); + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "deleteDocuments(Query..)"); + } } /** @@ -1998,15 +1476,16 @@ * document. The delete and then add are atomic as seen * by a reader on the same index (flush may happen only after * the add). + * * @param term the term to identify the document(s) to be * deleted * @param doc the document to be added * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void updateDocument(Term term, Document doc) throws CorruptIndexException, IOException { + public void updateDocument(Term term, Iterable doc) throws IOException { ensureOpen(); - updateDocument(term, doc, getAnalyzer()); + updateDocument(term, doc, analyzer); } /** @@ -2015,72 +1494,180 @@ * document. The delete and then add are atomic as seen * by a reader on the same index (flush may happen only after * the add). + * * @param term the term to identify the document(s) to be * deleted * @param doc the document to be added * @param analyzer the analyzer to use when analyzing the document * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public void updateDocument(Term term, Document doc, Analyzer analyzer) - throws CorruptIndexException, IOException { + public void updateDocument(Term term, Iterable doc, Analyzer analyzer) + throws IOException { ensureOpen(); try { - boolean doFlush = false; boolean success = false; try { - doFlush = docWriter.updateDocument(term, doc, analyzer); + if (docWriter.updateDocument(doc, analyzer, term)) { + processEvents(true, false); + } success = true; } finally { if (!success) { - - if (infoStream != null) - message("hit exception updating document"); - - synchronized (this) { - // If docWriter has some aborted files that were - // never incref'd, then we clean them up here - final Collection files = docWriter.abortedFiles(); - if (files != null) - deleter.deleteNewFiles(files); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception updating document"); } } } - if (doFlush) - flush(true, false, false); } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "updateDocument"); } } + /** + * Updates a document's {@link NumericDocValues} for field to the + * given value. You can only update fields that already exist in + * the index, not add new fields through this method. + * + * @param term + * the term to identify the document(s) to be updated + * @param field + * field name of the {@link NumericDocValues} field + * @param value + * new value for the field + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public void updateNumericDocValue(Term term, String field, long value) throws IOException { + ensureOpen(); + if (!globalFieldNumberMap.contains(field, DocValuesType.NUMERIC)) { + throw new IllegalArgumentException("can only update existing numeric-docvalues fields!"); + } + try { + if (docWriter.updateDocValues(new NumericDocValuesUpdate(term, field, value))) { + processEvents(true, false); + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "updateNumericDocValue"); + } + } + + /** + * Updates a document's {@link BinaryDocValues} for field to the + * given value. You can only update fields that already exist in + * the index, not add new fields through this method. + * + *

    + * NOTE: this method currently replaces the existing value of all + * affected documents with the new value. + * + * @param term + * the term to identify the document(s) to be updated + * @param field + * field name of the {@link BinaryDocValues} field + * @param value + * new value for the field + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public void updateBinaryDocValue(Term term, String field, BytesRef value) throws IOException { + ensureOpen(); + if (value == null) { + throw new IllegalArgumentException("cannot update a field to a null value: " + field); + } + if (!globalFieldNumberMap.contains(field, DocValuesType.BINARY)) { + throw new IllegalArgumentException("can only update existing binary-docvalues fields!"); + } + try { + if (docWriter.updateDocValues(new BinaryDocValuesUpdate(term, field, value))) { + processEvents(true, false); + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "updateBinaryDocValue"); + } + } + + /** + * Updates documents' DocValues fields to the given values. Each field update + * is applied to the set of documents that are associated with the + * {@link Term} to the same value. All updates are atomically applied and + * flushed together. + * + * @param updates + * the updates to apply + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public void updateDocValues(Term term, Field... updates) throws IOException { + ensureOpen(); + DocValuesUpdate[] dvUpdates = new DocValuesUpdate[updates.length]; + for (int i = 0; i < updates.length; i++) { + final Field f = updates[i]; + final DocValuesType dvType = f.fieldType().docValueType(); + if (dvType == null) { + throw new IllegalArgumentException("can only update NUMERIC or BINARY fields! field=" + f.name()); + } + if (!globalFieldNumberMap.contains(f.name(), dvType)) { + throw new IllegalArgumentException("can only update existing docvalues fields! field=" + f.name() + ", type=" + dvType); + } + switch (dvType) { + case NUMERIC: + dvUpdates[i] = new NumericDocValuesUpdate(term, f.name(), (Long) f.numericValue()); + break; + case BINARY: + dvUpdates[i] = new BinaryDocValuesUpdate(term, f.name(), f.binaryValue()); + break; + default: + throw new IllegalArgumentException("can only update NUMERIC or BINARY fields: field=" + f.name() + ", type=" + dvType); + } + } + try { + if (docWriter.updateDocValues(dvUpdates)) { + processEvents(true, false); + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "updateDocValues"); + } + } + // for test purpose final synchronized int getSegmentCount(){ return segmentInfos.size(); } // for test purpose final synchronized int getNumBufferedDocuments(){ - return docWriter.getNumDocsInRAM(); + return docWriter.getNumDocs(); } // for test purpose + final synchronized Collection getIndexFileNames() throws IOException { + return segmentInfos.files(directory, true); + } + + // for test purpose final synchronized int getDocCount(int i) { if (i >= 0 && i < segmentInfos.size()) { - return segmentInfos.info(i).docCount; + return segmentInfos.info(i).info.getDocCount(); } else { return -1; } } // for test purpose - final synchronized int getFlushCount() { - return flushCount; + final int getFlushCount() { + return flushCount.get(); } // for test purpose - final synchronized int getFlushDeletesCount() { - return flushDeletesCount; + final int getFlushDeletesCount() { + return flushDeletesCount.get(); } final String newSegmentName() { @@ -2093,178 +1680,143 @@ // name that was previously returned which can cause // problems at least with ConcurrentMergeScheduler. changeCount++; + segmentInfos.changed(); return "_" + Integer.toString(segmentInfos.counter++, Character.MAX_RADIX); } } /** If non-null, information about merges will be printed to this. */ - private PrintStream infoStream = null; - private static PrintStream defaultInfoStream = null; + final InfoStream infoStream; /** - * Requests an "optimize" operation on an index, priming the index - * for the fastest available search. Traditionally this has meant - * merging all segments into a single segment as is done in the - * default merge policy, but individaul merge policies may implement - * optimize in different ways. + * Forces merge policy to merge segments until there are <= + * maxNumSegments. The actual merges to be + * executed are determined by the {@link MergePolicy}. * - * @see LogMergePolicy#findMergesForOptimize + *

    This is a horribly costly operation, especially when + * you pass a small {@code maxNumSegments}; usually you + * should only call this if the index is static (will no + * longer be changed).

    * - *

    It is recommended that this method be called upon completion of indexing. In - * environments with frequent updates, optimize is best done during low volume times, if at all. - * - *

    - *

    See http://www.gossamer-threads.com/lists/lucene/java-dev/47895 for more discussion.

    + *

    Note that this requires up to 2X the index size free + * space in your Directory (3X if you're using compound + * file format). For example, if your index size is 10 MB + * then you need up to 20 MB free for this to complete (30 + * MB if you're using compound file format). Also, + * it's best to call {@link #commit()} afterwards, + * to allow IndexWriter to free up disk space.

    * - *

    Note that this can require substantial temporary free - * space in the Directory (see LUCENE-764 - * for details):

    + *

    If some but not all readers re-open while merging + * is underway, this will cause > 2X temporary + * space to be consumed as those new readers will then + * hold open the temporary segments at that time. It is + * best not to re-open readers while merging is running.

    * - *
      - *
    • - * - *

      If no readers/searchers are open against the index, - * then free space required is up to 1X the total size of - * the starting index. For example, if the starting - * index is 10 GB, then you must have up to 10 GB of free - * space before calling optimize.

      - * - *
    • - * - *

      If readers/searchers are using the index, then free - * space required is up to 2X the size of the starting - * index. This is because in addition to the 1X used by - * optimize, the original 1X of the starting index is - * still consuming space in the Directory as the readers - * are holding the segments files open. Even on Unix, - * where it will appear as if the files are gone ("ls" - * won't list them), they still consume storage due to - * "delete on last close" semantics.

      - * - *

      Furthermore, if some but not all readers re-open - * while the optimize is underway, this will cause > 2X - * temporary space to be consumed as those new readers - * will then hold open the partially optimized segments at - * that time. It is best not to re-open readers while - * optimize is running.

      - * - *
    - * *

    The actual temporary usage could be much less than * these figures (it depends on many factors).

    * - *

    In general, once the optimize completes, the total size of the + *

    In general, once this completes, the total size of the * index will be less than the size of the starting index. * It could be quite a bit smaller (if there were many * pending deletes) or just slightly smaller.

    * - *

    If an Exception is hit during optimize(), for example - * due to disk full, the index will not be corrupt and no - * documents will have been lost. However, it may have - * been partially optimized (some segments were merged but + *

    If an Exception is hit, for example + * due to disk full, the index will not be corrupted and no + * documents will be lost. However, it may have + * been partially merged (some segments were merged but * not all), and it's possible that one of the segments in * the index will be in non-compound format even when * using compound file format. This will occur when the * Exception is hit during conversion of the segment into * compound format.

    * - *

    This call will optimize those segments present in + *

    This call will merge those segments present in * the index when the call started. If other threads are * still adding documents and flushing segments, those - * newly created segments will not be optimized unless you - * call optimize again.

    + * newly created segments will not be merged unless you + * call forceMerge again.

    * + *

    NOTE: if you call {@link #close(boolean)} + * with false, which aborts all running merges, + * then any thread still running this method might hit a + * {@link MergePolicy.MergeAbortedException}. + * + * @param maxNumSegments maximum number of segments left + * in the index after merging finishes + * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error + * @see MergePolicy#findMerges + * */ - public void optimize() throws CorruptIndexException, IOException { - optimize(true); + public void forceMerge(int maxNumSegments) throws IOException { + forceMerge(maxNumSegments, true); } - /** - * Optimize the index down to <= maxNumSegments. If - * maxNumSegments==1 then this is the same as {@link - * #optimize()}. - * @param maxNumSegments maximum number of segments left - * in the index after optimization finishes - */ - public void optimize(int maxNumSegments) throws CorruptIndexException, IOException { - optimize(maxNumSegments, true); - } - - /** Just like {@link #optimize()}, except you can specify - * whether the call should block until the optimize - * completes. This is only meaningful with a + /** Just like {@link #forceMerge(int)}, except you can + * specify whether the call should block until + * all merging completes. This is only meaningful with a * {@link MergeScheduler} that is able to run merges in - * background threads. */ - public void optimize(boolean doWait) throws CorruptIndexException, IOException { - optimize(1, doWait); - } - - /** Just like {@link #optimize(int)}, except you can - * specify whether the call should block until the - * optimize completes. This is only meaningful with a - * {@link MergeScheduler} that is able to run merges in - * background threads. */ - public void optimize(int maxNumSegments, boolean doWait) throws CorruptIndexException, IOException { + * background threads. + */ + public void forceMerge(int maxNumSegments, boolean doWait) throws IOException { ensureOpen(); if (maxNumSegments < 1) throw new IllegalArgumentException("maxNumSegments must be >= 1; got " + maxNumSegments); - if (infoStream != null) - message("optimize: index now " + segString()); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "forceMerge: index now " + segString()); + infoStream.message("IW", "now flush at forceMerge"); + } - flush(true, false, true); + flush(true, true); synchronized(this) { resetMergeExceptions(); - segmentsToOptimize = new HashSet(); - final int numSegments = segmentInfos.size(); - for(int i=0;i 0) { // Forward any exceptions in background merge // threads to the current thread: final int size = mergeExceptions.size(); for(int i=0;iNOTE: if you call {@link #close(boolean)} + * with false, which aborts all running merges, + * then any thread still running this method might hit a + * {@link MergePolicy.MergeAbortedException}. + */ + public void forceMergeDeletes(boolean doWait) + throws IOException { ensureOpen(); - if (infoStream != null) - message("expungeDeletes: index now " + segString()); + flush(true, true); - MergePolicy.MergeSpecification spec; + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "forceMergeDeletes: index now " + segString()); + } + final MergePolicy mergePolicy = config.getMergePolicy(); + MergePolicy.MergeSpecification spec; + boolean newMergesFound = false; synchronized(this) { - spec = mergePolicy.findMergesToExpungeDeletes(segmentInfos, this); - if (spec != null) { + spec = mergePolicy.findForcedDeletesMerges(segmentInfos, this); + newMergesFound = spec != null; + if (newMergesFound) { final int numMerges = spec.merges.size(); for(int i=0;iThis is often a horribly costly operation; rarely + * is it warranted.

    + * + *

    To see how * many deletions you have pending in your index, call - * {@link IndexReader#numDeletedDocs} - * This saves disk space and memory usage while - * searching. expungeDeletes should be somewhat faster - * than optimize since it does not insist on reducing the - * index to a single segment (though, this depends on the - * {@link MergePolicy}; see {@link - * MergePolicy#findMergesToExpungeDeletes}.). Note that - * this call does not first commit any buffered - * documents, so you must do so yourself if necessary. - * See also {@link #expungeDeletes(boolean)} */ - public void expungeDeletes() throws CorruptIndexException, IOException { - expungeDeletes(true); + * {@link IndexReader#numDeletedDocs}.

    + * + *

    NOTE: this method first flushes a new + * segment (if there are indexed documents), and applies + * all buffered deletes. + */ + public void forceMergeDeletes() throws IOException { + forceMergeDeletes(true); } /** @@ -2388,517 +1956,475 @@ * Explicit calls to maybeMerge() are usually not * necessary. The most common case is when merge policy * parameters have changed. + * + * This method will call the {@link MergePolicy} with + * {@link MergeTrigger#EXPLICIT}. */ - public final void maybeMerge() throws CorruptIndexException, IOException { - maybeMerge(false); + public final void maybeMerge() throws IOException { + maybeMerge(config.getMergePolicy(), MergeTrigger.EXPLICIT, UNBOUNDED_MAX_MERGE_SEGMENTS); } - private final void maybeMerge(boolean optimize) throws CorruptIndexException, IOException { - maybeMerge(1, optimize); + private final void maybeMerge(MergePolicy mergePolicy, MergeTrigger trigger, int maxNumSegments) throws IOException { + ensureOpen(false); + boolean newMergesFound = updatePendingMerges(mergePolicy, trigger, maxNumSegments); + mergeScheduler.merge(this, trigger, newMergesFound); } - private final void maybeMerge(int maxNumSegmentsOptimize, boolean optimize) throws CorruptIndexException, IOException { - updatePendingMerges(maxNumSegmentsOptimize, optimize); - mergeScheduler.merge(this); - } + private synchronized boolean updatePendingMerges(MergePolicy mergePolicy, MergeTrigger trigger, int maxNumSegments) + throws IOException { - private synchronized void updatePendingMerges(int maxNumSegmentsOptimize, boolean optimize) - throws CorruptIndexException, IOException { - assert !optimize || maxNumSegmentsOptimize > 0; + // In case infoStream was disabled on init, but then enabled at some + // point, try again to log the config here: + messageState(); - if (stopMerges) - return; + assert maxNumSegments == -1 || maxNumSegments > 0; + assert trigger != null; + if (stopMerges) { + return false; + } + // Do not start new merges if disaster struck + if (tragedy != null) { + return false; + } + boolean newMergesFound = false; final MergePolicy.MergeSpecification spec; - if (optimize) { - spec = mergePolicy.findMergesForOptimize(segmentInfos, this, maxNumSegmentsOptimize, segmentsToOptimize); - - if (spec != null) { + if (maxNumSegments != UNBOUNDED_MAX_MERGE_SEGMENTS) { + assert trigger == MergeTrigger.EXPLICIT || trigger == MergeTrigger.MERGE_FINISHED : + "Expected EXPLICT or MERGE_FINISHED as trigger even with maxNumSegments set but was: " + trigger.name(); + spec = mergePolicy.findForcedMerges(segmentInfos, maxNumSegments, Collections.unmodifiableMap(segmentsToMerge), this); + newMergesFound = spec != null; + if (newMergesFound) { final int numMerges = spec.merges.size(); for(int i=0;iDo not alter the returned collection! */ + public synchronized Collection getMergingSegments() { + return mergingSegments; + } + + /** + * Expert: the {@link MergeScheduler} calls this method to retrieve the next + * merge requested by the MergePolicy + * + * @lucene.experimental + */ + public synchronized MergePolicy.OneMerge getNextMerge() { + if (pendingMerges.size() == 0) { return null; - else { + } else { // Advance the merge from pending to running - MergePolicy.OneMerge merge = (MergePolicy.OneMerge) pendingMerges.removeFirst(); + MergePolicy.OneMerge merge = pendingMerges.removeFirst(); runningMerges.add(merge); return merge; } } - /** Like getNextMerge() except only returns a merge if it's - * external. */ - private synchronized MergePolicy.OneMerge getNextExternalMerge() { - if (pendingMerges.size() == 0) - return null; - else { - Iterator it = pendingMerges.iterator(); - while(it.hasNext()) { - MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next(); - if (merge.isExternal) { - // Advance the merge from pending to running - it.remove(); - runningMerges.add(merge); - return merge; - } - } - - // All existing merges do not involve external segments - return null; - } - } - - /* - * Begin a transaction. During a transaction, any segment - * merges that happen (or ram segments flushed) will not - * write a new segments file and will not remove any files - * that were present at the start of the transaction. You - * must make a matched (try/finally) call to - * commitTransaction() or rollbackTransaction() to finish - * the transaction. - * - * Note that buffered documents and delete terms are not handled - * within the transactions, so they must be flushed before the - * transaction is started. - */ - private synchronized void startTransaction(boolean haveWriteLock) throws IOException { - - boolean success = false; - try { - if (infoStream != null) - message("now start transaction"); - - assert docWriter.getNumBufferedDeleteTerms() == 0 : - "calling startTransaction with buffered delete terms not supported: numBufferedDeleteTerms=" + docWriter.getNumBufferedDeleteTerms(); - assert docWriter.getNumDocsInRAM() == 0 : - "calling startTransaction with buffered documents not supported: numDocsInRAM=" + docWriter.getNumDocsInRAM(); - - ensureOpen(); - - // If a transaction is trying to roll back (because - // addIndexes hit an exception) then wait here until - // that's done: - synchronized(this) { - while(stopMerges) - doWait(); - } - success = true; - } finally { - // Release the write lock if our caller held it, on - // hitting an exception - if (!success && haveWriteLock) - releaseWrite(); - } - - if (!haveWriteLock) - acquireWrite(); - - success = false; - try { - localRollbackSegmentInfos = (SegmentInfos) segmentInfos.clone(); - - assert !hasExternalSegments(segmentInfos); - - localAutoCommit = autoCommit; - localFlushedDocCount = docWriter.getFlushedDocCount(); - - if (localAutoCommit) { - - if (infoStream != null) - message("flush at startTransaction"); - - flush(true, false, false); - - // Turn off auto-commit during our local transaction: - autoCommit = false; - } else - // We must "protect" our files at this point from - // deletion in case we need to rollback: - deleter.incRef(segmentInfos, false); - - success = true; - } finally { - if (!success) - finishAddIndexes(); - } - } - - /* - * Rolls back the transaction and restores state to where - * we were at the start. - */ - private synchronized void rollbackTransaction() throws IOException { - - if (infoStream != null) - message("now rollback transaction"); - - // First restore autoCommit in case we hit an exception below: - autoCommit = localAutoCommit; - docWriter.setFlushedDocCount(localFlushedDocCount); - - // Must finish merges before rolling back segmentInfos - // so merges don't hit exceptions on trying to commit - // themselves, don't get files deleted out from under - // them, etc: - finishMerges(false); - - // Keep the same segmentInfos instance but replace all - // of its SegmentInfo instances. This is so the next - // attempt to commit using this instance of IndexWriter - // will always write to a new generation ("write once"). - segmentInfos.clear(); - segmentInfos.addAll(localRollbackSegmentInfos); - localRollbackSegmentInfos = null; - - // This must come after we rollback segmentInfos, so - // that if a commit() kicks off it does not see the - // segmentInfos with external segments - finishAddIndexes(); - - // Ask deleter to locate unreferenced files we had - // created & remove them: - deleter.checkpoint(segmentInfos, false); - - if (!autoCommit) - // Remove the incRef we did in startTransaction: - deleter.decRef(segmentInfos); - - // Also ask deleter to remove any newly created files - // that were never incref'd; this "garbage" is created - // when a merge kicks off but aborts part way through - // before it had a chance to incRef the files it had - // partially created - deleter.refresh(); - - notifyAll(); - - assert !hasExternalSegments(); - } - - /* - * Commits the transaction. This will write the new - * segments file and remove and pending deletions we have - * accumulated during the transaction - */ - private synchronized void commitTransaction() throws IOException { - - if (infoStream != null) - message("now commit transaction"); - - // First restore autoCommit in case we hit an exception below: - autoCommit = localAutoCommit; - - // Give deleter a chance to remove files now: - checkpoint(); - - if (autoCommit) { - boolean success = false; - try { - commit(0); - success = true; - } finally { - if (!success) { - if (infoStream != null) - message("hit exception committing transaction"); - rollbackTransaction(); - } - } - } else - // Remove the incRef we did in startTransaction. - deleter.decRef(localRollbackSegmentInfos); - - localRollbackSegmentInfos = null; - - assert !hasExternalSegments(); - - finishAddIndexes(); - } - /** - * @deprecated Please use {@link #rollback} instead. + * Expert: returns true if there are merges waiting to be scheduled. + * + * @lucene.experimental */ - public void abort() throws IOException { - rollback(); + public synchronized boolean hasPendingMerges() { + return pendingMerges.size() != 0; } /** * Close the IndexWriter without committing - * any of the changes that have occurred since it was - * opened. This removes any temporary files that had been - * created, after which the state of the index will be the - * same as it was when this writer was first opened. This - * can only be called when this IndexWriter was opened - * with autoCommit=false. This also clears a - * previous call to {@link #prepareCommit}. - * @throws IllegalStateException if this is called when - * the writer was opened with autoCommit=true. + * any changes that have occurred since the last commit + * (or since it was opened, if commit hasn't been called). + * This removes any temporary files that had been created, + * after which the state of the index will be the same as + * it was when commit() was last called or when this + * writer was first opened. This also clears a previous + * call to {@link #prepareCommit}. * @throws IOException if there is a low-level IO error */ + @Override public void rollback() throws IOException { - ensureOpen(); - if (autoCommit) - throw new IllegalStateException("rollback() can only be called when IndexWriter was opened with autoCommit=false"); - - // Ensure that only one thread actually gets to do the closing: - if (shouldClose()) - rollbackInternal(); + // don't call ensureOpen here: this acts like "close()" in closeable. + + // Ensure that only one thread actually gets to do the + // closing, and make sure no commit is also in progress: + synchronized(commitLock) { + if (shouldClose()) { + rollbackInternal(); + } + } } private void rollbackInternal() throws IOException { boolean success = false; - docWriter.pauseAllThreads(); - + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "rollback"); + } + try { - finishMerges(false); + synchronized(this) { + abortMerges(); + stopMerges = true; + } - // Must pre-close these two, in case they increment - // changeCount so that we can then set it to false - // before calling closeInternal - mergePolicy.close(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "rollback: done finish merges"); + } + + // Must pre-close in case it increments changeCount so that we can then + // set it to false before calling closeInternal mergeScheduler.close(); + bufferedUpdatesStream.clear(); + docWriter.close(); // mark it as closed first to prevent subsequent indexing actions/flushes + docWriter.abort(this); // don't sync on IW here synchronized(this) { if (pendingCommit != null) { pendingCommit.rollbackCommit(directory); - deleter.decRef(pendingCommit); - pendingCommit = null; - notifyAll(); + try { + deleter.decRef(pendingCommit); + } finally { + pendingCommit = null; + notifyAll(); + } } + // Don't bother saving any changes in our segmentInfos + readerPool.dropAll(false); + // Keep the same segmentInfos instance but replace all // of its SegmentInfo instances. This is so the next // attempt to commit using this instance of IndexWriter // will always write to a new generation ("write // once"). - segmentInfos.clear(); - segmentInfos.addAll(rollbackSegmentInfos); + segmentInfos.rollbackSegmentInfos(rollbackSegments); + if (infoStream.isEnabled("IW") ) { + infoStream.message("IW", "rollback: infos=" + segString(segmentInfos)); + } - assert !hasExternalSegments(); - - docWriter.abort(); + testPoint("rollback before checkpoint"); - assert testPoint("rollback before checkpoint"); - // Ask deleter to locate unreferenced files & remove // them: deleter.checkpoint(segmentInfos, false); deleter.refresh(); - } - lastCommitChangeCount = changeCount; + lastCommitChangeCount = changeCount; + + deleter.refresh(); + deleter.close(); + // Must set closed while inside same sync block where we call deleter.refresh, else concurrent threads may try to sneak a flush in, + // after we leave this sync block and before we enter the sync block in the finally clause below that sets closed: + closed = true; + + IOUtils.close(writeLock); // release write lock + writeLock = null; + + assert docWriter.perThreadPool.numDeactivatedThreadStates() == docWriter.perThreadPool.getMaxThreadStates() : "" + docWriter.perThreadPool.numDeactivatedThreadStates() + " " + docWriter.perThreadPool.getMaxThreadStates(); + } + success = true; } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "rollbackInternal"); } finally { + if (!success) { + // Must not hold IW's lock while closing + // mergeScheduler: this can lead to deadlock, + // e.g. TestIW.testThreadInterruptDeadlock + IOUtils.closeWhileHandlingException(mergeScheduler); + } synchronized(this) { if (!success) { - docWriter.resumeAllThreads(); - closing = false; - notifyAll(); - if (infoStream != null) - message("hit exception during rollback"); + // we tried to be nice about it: do the minimum + + // don't leak a segments_N file if there is a pending commit + if (pendingCommit != null) { + try { + pendingCommit.rollbackCommit(directory); + deleter.decRef(pendingCommit); + } catch (Throwable t) { + } + pendingCommit = null; + } + + // close all the closeables we can (but important is readerPool and writeLock to prevent leaks) + IOUtils.closeWhileHandlingException(readerPool, deleter, writeLock); + writeLock = null; } + closed = true; + closing = false; } } + } - closeInternal(false); + /** + * Delete all documents in the index. + * + *

    + * This method will drop all buffered documents and will remove all segments + * from the index. This change will not be visible until a {@link #commit()} + * has been called. This method can be rolled back using {@link #rollback()}. + *

    + * + *

    + * NOTE: this method is much faster than using deleteDocuments( new + * MatchAllDocsQuery() ). Yet, this method also has different semantics + * compared to {@link #deleteDocuments(Query...)} since internal + * data-structures are cleared as well as all segment information is + * forcefully dropped anti-viral semantics like omitting norms are reset or + * doc value types are cleared. Essentially a call to {@link #deleteAll()} is + * equivalent to creating a new {@link IndexWriter} with + * {@link OpenMode#CREATE} which a delete query only marks documents as + * deleted. + *

    + * + *

    + * NOTE: this method will forcefully abort all merges in progress. If other + * threads are running {@link #forceMerge}, {@link #addIndexes(IndexReader[])} + * or {@link #forceMergeDeletes} methods, they may receive + * {@link MergePolicy.MergeAbortedException}s. + */ + public void deleteAll() throws IOException { + ensureOpen(); + // Remove any buffered docs + boolean success = false; + /* hold the full flush lock to prevent concurrency commits / NRT reopens to + * get in our way and do unnecessary work. -- if we don't lock this here we might + * get in trouble if */ + synchronized (fullFlushLock) { + /* + * We first abort and trash everything we have in-memory + * and keep the thread-states locked, the lockAndAbortAll operation + * also guarantees "point in time semantics" ie. the checkpoint that we need in terms + * of logical happens-before relationship in the DW. So we do + * abort all in memory structures + * We also drop global field numbering before during abort to make + * sure it's just like a fresh index. + */ + try { + docWriter.lockAndAbortAll(this); + processEvents(false, true); + synchronized (this) { + try { + // Abort any running merges + abortMerges(); + // Remove all segments + segmentInfos.clear(); + // Ask deleter to locate unreferenced files & remove them: + deleter.checkpoint(segmentInfos, false); + /* don't refresh the deleter here since there might + * be concurrent indexing requests coming in opening + * files on the directory after we called DW#abort() + * if we do so these indexing requests might hit FNF exceptions. + * We will remove the files incrementally as we go... + */ + // Don't bother saving any changes in our segmentInfos + readerPool.dropAll(false); + // Mark that the index has changed + ++changeCount; + segmentInfos.changed(); + globalFieldNumberMap.clear(); + success = true; + } finally { + if (!success) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception during deleteAll"); + } + } + } + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "deleteAll"); + } finally { + docWriter.unlockAllAfterAbortAll(this); + } + } } - private synchronized void finishMerges(boolean waitForMerges) throws IOException { - if (!waitForMerges) { + /** Aborts running merges. Be careful when using this + * method: when you abort a long-running merge, you lose + * a lot of work that must later be redone. */ + public synchronized void abortMerges() { + stopMerges = true; - stopMerges = true; + // Abort all pending & running merges: + for (final MergePolicy.OneMerge merge : pendingMerges) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "now abort pending merge " + segString(merge.segments)); + } + merge.abort(); + mergeFinish(merge); + } + pendingMerges.clear(); - // Abort all pending & running merges: - Iterator it = pendingMerges.iterator(); - while(it.hasNext()) { - final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next(); - if (infoStream != null) - message("now abort pending merge " + merge.segString(directory)); - merge.abort(); - mergeFinish(merge); + for (final MergePolicy.OneMerge merge : runningMerges) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "now abort running merge " + segString(merge.segments)); } - pendingMerges.clear(); - - it = runningMerges.iterator(); - while(it.hasNext()) { - final MergePolicy.OneMerge merge = (MergePolicy.OneMerge) it.next(); - if (infoStream != null) - message("now abort running merge " + merge.segString(directory)); - merge.abort(); + merge.abort(); + } + + // These merges periodically check whether they have + // been aborted, and stop if so. We wait here to make + // sure they all stop. It should not take very long + // because the merge threads periodically check if + // they are aborted. + while(runningMerges.size() > 0) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "now wait for " + runningMerges.size() + " running merge/s to abort"); } + doWait(); + } - // Ensure any running addIndexes finishes. It's fine - // if a new one attempts to start because its merges - // will quickly see the stopMerges == true and abort. - acquireRead(); - releaseRead(); + stopMerges = false; + notifyAll(); - // These merges periodically check whether they have - // been aborted, and stop if so. We wait here to make - // sure they all stop. It should not take very long - // because the merge threads periodically check if - // they are aborted. - while(runningMerges.size() > 0) { - if (infoStream != null) - message("now wait for " + runningMerges.size() + " running merge to abort"); - doWait(); - } + assert 0 == mergingSegments.size(); - stopMerges = false; - notifyAll(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "all running merges have aborted"); + } + } - assert 0 == mergingSegments.size(); + /** + * Wait for any currently outstanding merges to finish. + * + *

    It is guaranteed that any merges started prior to calling this method + * will have completed once this method completes.

    + */ + public void waitForMerges() throws IOException { - if (infoStream != null) - message("all running merges have aborted"); + // Give merge scheduler last chance to run, in case + // any pending merges are waiting. We can't hold IW's lock + // when going into merge because it can lead to deadlock. + mergeScheduler.merge(this, MergeTrigger.CLOSING, false); - } else { - // Ensure any running addIndexes finishes. It's fine - // if a new one attempts to start because from our - // caller above the call will see that we are in the - // process of closing, and will throw an - // AlreadyClosedException. - acquireRead(); - releaseRead(); - while(pendingMerges.size() > 0 || runningMerges.size() > 0) + synchronized (this) { + ensureOpen(false); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "waitForMerges"); + } + + + while (pendingMerges.size() > 0 || runningMerges.size() > 0) { doWait(); + } + + // sanity check assert 0 == mergingSegments.size(); + + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "waitForMerges done"); + } } } - - /* + + /** * Called whenever the SegmentInfos has been updated and * the index files referenced exist (correctly) in the * index directory. */ - private synchronized void checkpoint() throws IOException { + synchronized void checkpoint() throws IOException { + changed(); + deleter.checkpoint(segmentInfos, false); + } + + /** Checkpoints with IndexFileDeleter, so it's aware of + * new files, and increments changeCount, so on + * close/commit we will write a new segments file, but + * does NOT bump segmentInfos.version. */ + synchronized void checkpointNoSIS() throws IOException { changeCount++; deleter.checkpoint(segmentInfos, false); } - private void finishAddIndexes() { - releaseWrite(); + /** Called internally if any index state has changed. */ + synchronized void changed() { + changeCount++; + segmentInfos.changed(); } - private void blockAddIndexes(boolean includePendingClose) { - - acquireRead(); - - boolean success = false; - try { - - // Make sure we are still open since we could have - // waited quite a while for last addIndexes to finish - ensureOpen(includePendingClose); - success = true; - } finally { - if (!success) - releaseRead(); + synchronized void publishFrozenUpdates(FrozenBufferedUpdates packet) { + assert packet != null && packet.any(); + synchronized (bufferedUpdatesStream) { + bufferedUpdatesStream.push(packet); } } - - private void resumeAddIndexes() { - releaseRead(); - } - - /** Merges all segments from an array of indexes into this index. - * @deprecated Use {@link #addIndexesNoOptimize} instead, - * then separately call {@link #optimize} afterwards if - * you need to. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + + /** + * Atomically adds the segment private delete packet and publishes the flushed + * segments SegmentInfo to the index writer. */ - public void addIndexes(Directory[] dirs) - throws CorruptIndexException, IOException { - - ensureOpen(); - - noDupDirs(dirs); - - // Do not allow add docs or deletes while we are running: - docWriter.pauseAllThreads(); - + void publishFlushedSegment(SegmentCommitInfo newSegment, + FrozenBufferedUpdates packet, FrozenBufferedUpdates globalPacket) throws IOException { try { - - if (infoStream != null) - message("flush at addIndexes"); - flush(true, false, true); - - boolean success = false; - - startTransaction(false); - - try { - - int docCount = 0; - synchronized(this) { - ensureOpen(); - for (int i = 0; i < dirs.length; i++) { - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - final SegmentInfo info = sis.info(j); - docCount += info.docCount; - assert !segmentInfos.contains(info); - segmentInfos.add(info); // add each info - } + synchronized (this) { + // Lock order IW -> BDS + ensureOpen(false); + synchronized (bufferedUpdatesStream) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "publishFlushedSegment"); } + + if (globalPacket != null && globalPacket.any()) { + bufferedUpdatesStream.push(globalPacket); + } + // Publishing the segment must be synched on IW -> BDS to make the sure + // that no merge prunes away the seg. private delete packet + final long nextGen; + if (packet != null && packet.any()) { + nextGen = bufferedUpdatesStream.push(packet); + } else { + // Since we don't have a delete packet to apply we can get a new + // generation right away + nextGen = bufferedUpdatesStream.getNextGen(); + } + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "publish sets newSegment delGen=" + nextGen + " seg=" + segString(newSegment)); + } + newSegment.setBufferedDeletesGen(nextGen); + segmentInfos.add(newSegment); + checkpoint(); } - - // Notify DocumentsWriter that the flushed count just increased - docWriter.updateFlushedDocCount(docCount); - - optimize(); - - success = true; - } finally { - if (success) { - commitTransaction(); - } else { - rollbackTransaction(); - } } - } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; } finally { - docWriter.resumeAllThreads(); + flushCount.incrementAndGet(); + doAfterFlush(); } } private synchronized void resetMergeExceptions() { - mergeExceptions = new ArrayList(); + mergeExceptions = new ArrayList<>(); mergeGen++; } - private void noDupDirs(Directory[] dirs) { - HashSet dups = new HashSet(); + private void noDupDirs(Directory... dirs) { + HashSet dups = new HashSet<>(); for(int i=0;i acquireWriteLocks(Directory... dirs) throws IOException { + List locks = new ArrayList<>(); + for(int i=0;iThis may be used to parallelize batch indexing. A large document - * collection can be broken into sub-collections. Each sub-collection can be - * indexed in parallel, on a different thread, process or machine. The + *

    This may be used to parallelize batch indexing. A large document + * collection can be broken into sub-collections. Each sub-collection can be + * indexed in parallel, on a different thread, process or machine. The * complete index can then be created by merging sub-collection indexes * with this method. * - *

    NOTE: the index in each Directory must not be - * changed (opened by a writer) while this method is - * running. This method does not acquire a write lock in - * each input Directory, so it is up to the caller to - * enforce this. + *

    + * NOTE: this method acquires the write lock in + * each directory, to ensure that no {@code IndexWriter} + * is currently open or tries to open while this is + * running. * - *

    NOTE: while this is running, any attempts to - * add or delete documents (with another thread) will be - * paused until this method completes. - * *

    This method is transactional in how Exceptions are * handled: it does not commit a new segments_N file until * all indexes are added. This means if an Exception * occurs (for example disk full), then either no indexes - * will have been added or they all will have been.

    + * will have been added or they all will have been. * *

    Note that this requires temporary free space in the - * Directory up to 2X the sum of all input indexes - * (including the starting index). If readers/searchers + * {@link Directory} up to 2X the sum of all input indexes + * (including the starting index). If readers/searchers * are open against the starting index, then temporary * free space required will be higher by the size of the - * starting index (see {@link #optimize()} for details). - *

    + * starting index (see {@link #forceMerge(int)} for details). * - *

    Once this completes, the final size of the index - * will be less than the sum of all input index sizes - * (including the starting index). It could be quite a - * bit smaller (if there were many pending deletes) or - * just slightly smaller.

    - * - *

    - * This requires this index not be among those to be added. + *

    This requires this index not be among those to be added. * * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error + * @throws LockObtainFailedException if we were unable to + * acquire the write lock in at least one directory */ - public void addIndexesNoOptimize(Directory[] dirs) - throws CorruptIndexException, IOException { - + public void addIndexes(Directory... dirs) throws IOException { ensureOpen(); noDupDirs(dirs); - // Do not allow add docs or deletes while we are running: - docWriter.pauseAllThreads(); + List locks = acquireWriteLocks(dirs); + boolean successTop = false; + try { - if (infoStream != null) - message("flush at addIndexesNoOptimize"); - flush(true, false, true); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "flush at addIndexes(Directory...)"); + } - boolean success = false; + flush(false, true); - startTransaction(false); + List infos = new ArrayList<>(); - try { + int totalDocCount = 0; - int docCount = 0; - synchronized(this) { - ensureOpen(); - - for (int i = 0; i < dirs.length; i++) { - if (directory == dirs[i]) { - // cannot add this index: segments may be deleted in merge before added - throw new IllegalArgumentException("Cannot add this index to itself"); - } - - SegmentInfos sis = new SegmentInfos(); // read infos from dir - sis.read(dirs[i]); - for (int j = 0; j < sis.size(); j++) { - SegmentInfo info = sis.info(j); - assert !segmentInfos.contains(info): "dup info dir=" + info.dir + " name=" + info.name; - docCount += info.docCount; - segmentInfos.add(info); // add each info - } + boolean success = false; + try { + for (Directory dir : dirs) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "addIndexes: process directory " + dir); } - } + SegmentInfos sis = new SegmentInfos(); // read infos from dir + sis.read(dir); - // Notify DocumentsWriter that the flushed count just increased - docWriter.updateFlushedDocCount(docCount); + final Set dsFilesCopied = new HashSet<>(); + final Map dsNames = new HashMap<>(); + final Set copiedFiles = new HashSet<>(); - maybeMerge(); + totalDocCount += sis.totalDocCount(); - ensureOpen(); + for (SegmentCommitInfo info : sis) { + assert !infos.contains(info): "dup info dir=" + info.info.dir + " name=" + info.info.name; - // If after merging there remain segments in the index - // that are in a different directory, just copy these - // over into our index. This is necessary (before - // finishing the transaction) to avoid leaving the - // index in an unusable (inconsistent) state. - resolveExternalSegments(); + String newSegName = newSegmentName(); - ensureOpen(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "addIndexes: process segment origName=" + info.info.name + " newName=" + newSegName + " info=" + info); + } - success = true; + IOContext context = new IOContext(new MergeInfo(info.info.getDocCount(), info.sizeInBytes(), true, -1)); + for(FieldInfo fi : SegmentReader.readFieldInfos(info)) { + globalFieldNumberMap.addOrGet(fi.name, fi.number, fi.getDocValuesType()); + } + infos.add(copySegmentAsIs(info, newSegName, dsNames, dsFilesCopied, context, copiedFiles)); + } + } + success = true; } finally { - if (success) { - commitTransaction(); - } else { - rollbackTransaction(); + if (!success) { + for(SegmentCommitInfo sipc : infos) { + for(String file : sipc.files()) { + try { + directory.deleteFile(file); + } catch (Throwable t) { + } + } + } } } - } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; - } finally { - docWriter.resumeAllThreads(); - } - } - private boolean hasExternalSegments() { - return hasExternalSegments(segmentInfos); - } - - private boolean hasExternalSegments(SegmentInfos infos) { - final int numSegments = infos.size(); - for(int i=0;i + * The provided IndexReaders are not closed. + * + *

    + * See {@link #addIndexes} for details on transactional semantics, temporary + * free space required in the Directory, and non-CFS segments on an Exception. + * + *

    + * NOTE: empty segments are dropped by this method and not added to this + * index. + * + *

    + * NOTE: this method merges all given {@link IndexReader}s in one + * merge. If you intend to merge a large number of readers, it may be better + * to call this method multiple times, each time with a small set of readers. + * In principle, if you use a merge policy with a {@code mergeFactor} or + * {@code maxMergeAtOnce} parameter, you should pass that many readers in one + * call. Also, if the given readers are {@link DirectoryReader}s, they can be + * opened with {@code termIndexInterval=-1} to save RAM, since during merge + * the in-memory structure is not used. See + * {@link DirectoryReader#open(Directory, int)}. + * + *

    + * NOTE: if you call {@link #close(boolean)} with false, which + * aborts all running merges, then any thread still running this method might + * hit a {@link MergePolicy.MergeAbortedException}. + * + * @throws CorruptIndexException + * if the index is corrupt + * @throws IOException + * if there is a low-level IO error + */ + public void addIndexes(IndexReader... readers) throws IOException { + ensureOpen(); + int numDocs = 0; - /** Merges the provided indexes into this index. - *

    After this completes, the index is optimized.

    - *

    The provided IndexReaders are not closed.

    + try { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "flush at addIndexes(IndexReader...)"); + } + flush(false, true); - *

    NOTE: the index in each Directory must not be - * changed (opened by a writer) while this method is - * running. This method does not acquire a write lock in - * each input Directory, so it is up to the caller to - * enforce this. - * - *

    NOTE: while this is running, any attempts to - * add or delete documents (with another thread) will be - * paused until this method completes. - * - *

    See {@link #addIndexesNoOptimize(Directory[])} for - * details on transactional semantics, temporary free - * space required in the Directory, and non-CFS segments - * on an Exception.

    - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public void addIndexes(IndexReader[] readers) - throws CorruptIndexException, IOException { + String mergedName = newSegmentName(); + final List mergeReaders = new ArrayList<>(); + for (IndexReader indexReader : readers) { + numDocs += indexReader.numDocs(); + for (AtomicReaderContext ctx : indexReader.leaves()) { + mergeReaders.add(ctx.reader()); + } + } - ensureOpen(); + // Make sure adding the new documents to this index won't + // exceed the limit: + reserveDocs(numDocs); + + final IOContext context = new IOContext(new MergeInfo(numDocs, -1, true, -1)); - // Do not allow add docs or deletes while we are running: - docWriter.pauseAllThreads(); + // TODO: somehow we should fix this merge so it's + // abortable so that IW.close(false) is able to stop it + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); - // We must pre-acquire the write lock here (and not in - // startTransaction below) so that no other addIndexes - // is allowed to start up after we have flushed & - // optimized but before we then start our transaction. - // This is because the merging below requires that only - // one segment is present in the index: - acquireWrite(); + SegmentInfo info = new SegmentInfo(directory, Version.LATEST, mergedName, -1, + false, codec, null); - try { + SegmentMerger merger = new SegmentMerger(mergeReaders, info, infoStream, trackingDir, config.getTermIndexInterval(), + MergeState.CheckAbort.NONE, globalFieldNumberMap, + context, config.getCheckIntegrityAtMerge()); + + if (!merger.shouldMerge()) { + return; + } + MergeState mergeState; boolean success = false; - SegmentInfo info = null; - String mergedName = null; - SegmentMerger merger = null; - try { - flush(true, false, true); - optimize(); // start with zero or 1 seg + mergeState = merger.merge(); // merge 'em success = true; } finally { - // Take care to release the write lock if we hit an - // exception before starting the transaction - if (!success) - releaseWrite(); + if (!success) { + synchronized(this) { + deleter.refresh(info.name); + } + } } - // true means we already have write lock; if this call - // hits an exception it will release the write lock: - startTransaction(true); + SegmentCommitInfo infoPerCommit = new SegmentCommitInfo(info, 0, -1L, -1L, -1L); - try { - mergedName = newSegmentName(); - merger = new SegmentMerger(this, mergedName, null); + info.setFiles(new HashSet<>(trackingDir.getCreatedFiles())); + trackingDir.getCreatedFiles().clear(); + + setDiagnostics(info, SOURCE_ADDINDEXES_READERS); - IndexReader sReader = null; - synchronized(this) { - if (segmentInfos.size() == 1) { // add existing index, if any - sReader = SegmentReader.get(true, segmentInfos.info(0)); - } + final MergePolicy mergePolicy = config.getMergePolicy(); + boolean useCompoundFile; + synchronized(this) { // Guard segmentInfos + if (stopMerges) { + deleter.deleteNewFiles(infoPerCommit.files()); + return; } + ensureOpen(); + useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, infoPerCommit, this); + } + // Now create the compound file if needed + if (useCompoundFile) { + Collection filesToDelete = infoPerCommit.files(); try { - if (sReader != null) - merger.add(sReader); - - for (int i = 0; i < readers.length; i++) // add new indexes - merger.add(readers[i]); - - int docCount = merger.merge(); // merge 'em - - if(sReader != null) { - sReader.close(); - sReader = null; + createCompoundFile(infoStream, directory, MergeState.CheckAbort.NONE, info, context); + } finally { + // delete new non cfs files directly: they were never + // registered with IFD + synchronized(this) { + deleter.deleteNewFiles(filesToDelete); } + } + info.setUseCompoundFile(true); + } + // Have codec write SegmentInfo. Must do this after + // creating CFS so that 1) .si isn't slurped into CFS, + // and 2) .si reflects useCompoundFile=true change + // above: + success = false; + try { + codec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, info, mergeState.fieldInfos, context); + success = true; + } finally { + if (!success) { synchronized(this) { - segmentInfos.clear(); // pop old infos & add new - info = new SegmentInfo(mergedName, docCount, directory, false, true, - -1, null, false, merger.hasProx()); - segmentInfos.add(info); + deleter.refresh(info.name); } + } + } - // Notify DocumentsWriter that the flushed count just increased - docWriter.updateFlushedDocCount(docCount); + info.addFiles(trackingDir.getCreatedFiles()); - success = true; - - } finally { - if (sReader != null) { - sReader.close(); - } + // Register the new segment + synchronized(this) { + if (stopMerges) { + deleter.deleteNewFiles(info.files()); + return; } - } finally { - if (!success) { - if (infoStream != null) - message("hit exception in addIndexes during merge"); - rollbackTransaction(); - } else { - commitTransaction(); - } + ensureOpen(); + segmentInfos.add(infoPerCommit); + checkpoint(); } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "addIndexes(IndexReader...)"); + } + maybeMerge(); + } + + /** Copies the segment files as-is into the IndexWriter's directory. */ + private SegmentCommitInfo copySegmentAsIs(SegmentCommitInfo info, String segName, + Map dsNames, Set dsFilesCopied, IOContext context, + Set copiedFiles) + throws IOException { + // Determine if the doc store of this segment needs to be copied. It's + // only relevant for segments that share doc store with others, + // because the DS might have been copied already, in which case we + // just want to update the DS name of this SegmentInfo. + final String dsName = Lucene3xSegmentInfoFormat.getDocStoreSegment(info.info); + assert dsName != null; + final String newDsName; + if (dsNames.containsKey(dsName)) { + newDsName = dsNames.get(dsName); + } else { + dsNames.put(dsName, segName); + newDsName = segName; + } + + // note: we don't really need this fis (its copied), but we load it up + // so we don't pass a null value to the si writer + FieldInfos fis = SegmentReader.readFieldInfos(info); - if (mergePolicy instanceof LogMergePolicy && getUseCompoundFile()) { + Set docStoreFiles3xOnly = Lucene3xCodec.getDocStoreFiles(info.info); - List files = null; + final Map attributes; + // copy the attributes map, we might modify it below. + // also we need to ensure its read-write, since we will invoke the SIwriter (which might want to set something). + if (info.info.attributes() == null) { + attributes = new HashMap<>(); + } else { + attributes = new HashMap<>(info.info.attributes()); + } + if (docStoreFiles3xOnly != null) { + // only violate the codec this way if it's preflex & + // shares doc stores + // change docStoreSegment to newDsName + attributes.put(Lucene3xSegmentInfoFormat.DS_NAME_KEY, newDsName); + } - synchronized(this) { - // Must incRef our files so that if another thread - // is running merge/optimize, it doesn't delete our - // segment's files before we have a change to - // finish making the compound file. - if (segmentInfos.contains(info)) { - files = info.files(); - deleter.incRef(files); - } - } + //System.out.println("copy seg=" + info.info.name + " version=" + info.info.getVersion()); + // Same SI as before but we change directory, name and docStoreSegment: + SegmentInfo newInfo = new SegmentInfo(directory, info.info.getVersion(), segName, info.info.getDocCount(), + info.info.getUseCompoundFile(), info.info.getCodec(), + info.info.getDiagnostics(), attributes); + SegmentCommitInfo newInfoPerCommit = new SegmentCommitInfo(newInfo, + info.getDelCount(), info.getDelGen(), info.getFieldInfosGen(), + info.getDocValuesGen()); - if (files != null) { + Set segFiles = new HashSet<>(); - success = false; + // Build up new segment's file names. Must do this + // before writing SegmentInfo: + for (String file: info.files()) { + final String newFileName; + if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.contains(file)) { + newFileName = newDsName + IndexFileNames.stripSegmentName(file); + } else { + newFileName = segName + IndexFileNames.stripSegmentName(file); + } + segFiles.add(newFileName); + } + newInfo.setFiles(segFiles); - startTransaction(false); + // We must rewrite the SI file because it references + // segment name (its own name, if its 3.x, and doc + // store segment name): + TrackingDirectoryWrapper trackingDir = new TrackingDirectoryWrapper(directory); + final Codec currentCodec = newInfo.getCodec(); + try { + currentCodec.segmentInfoFormat().getSegmentInfoWriter().write(trackingDir, newInfo, fis, context); + } catch (UnsupportedOperationException uoe) { + if (currentCodec instanceof Lucene3xCodec) { + // OK: 3x codec cannot write a new SI file; + // SegmentInfos will write this on commit + } else { + throw uoe; + } + } - try { - merger.createCompoundFile(mergedName + ".cfs"); - synchronized(this) { - info.setUseCompoundFile(true); - } - - success = true; - - } finally { + final Collection siFiles = trackingDir.getCreatedFiles(); - deleter.decRef(files); + boolean success = false; + try { - if (!success) { - if (infoStream != null) - message("hit exception building compound file in addIndexes during merge"); + // Copy the segment's files + for (String file: info.files()) { - rollbackTransaction(); - } else { - commitTransaction(); - } + final String newFileName; + if (docStoreFiles3xOnly != null && docStoreFiles3xOnly.contains(file)) { + newFileName = newDsName + IndexFileNames.stripSegmentName(file); + if (dsFilesCopied.contains(newFileName)) { + continue; } + dsFilesCopied.add(newFileName); + } else { + newFileName = segName + IndexFileNames.stripSegmentName(file); } + + if (siFiles.contains(newFileName)) { + // We already rewrote this above + continue; + } + + assert !slowFileExists(directory, newFileName): "file \"" + newFileName + "\" already exists; siFiles=" + siFiles; + assert !copiedFiles.contains(file): "file \"" + file + "\" is being copied more than once"; + copiedFiles.add(file); + info.info.dir.copy(directory, file, newFileName, context); } - } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + success = true; } finally { - docWriter.resumeAllThreads(); + if (!success) { + for(String file : newInfo.files()) { + try { + directory.deleteFile(file); + } catch (Throwable t) { + } + } + } } + + return newInfoPerCommit; } + + /** + * A hook for extending classes to execute operations after pending added and + * deleted documents have been flushed to the Directory but before the change + * is committed (new segments_N file written). + */ + protected void doAfterFlush() throws IOException {} - // This is called after pending added and deleted - // documents have been flushed to the Directory but before - // the change is committed (new segments_N file written). - void doAfterFlush() - throws IOException { - } - /** - * Flush all in-memory buffered updates (adds and deletes) - * to the Directory. - *

    Note: while this will force buffered docs to be - * pushed into the index, it will not make these docs - * visible to a reader. Use {@link #commit()} instead - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - * @deprecated please call {@link #commit()}) instead + * A hook for extending classes to execute operations before pending added and + * deleted documents are flushed to the Directory. */ - public final void flush() throws CorruptIndexException, IOException { - flush(true, false, true); - } + protected void doBeforeFlush() throws IOException {} - /**

    Expert: prepare for commit. This does the first - * phase of 2-phase commit. You can only call this when - * autoCommit is false. This method does all steps - * necessary to commit changes since this writer was - * opened: flushes pending added and deleted docs, syncs - * the index files, writes most of next segments_N file. - * After calling this you must call either {@link + /**

    Expert: prepare for commit. This does the + * first phase of 2-phase commit. This method does all + * steps necessary to commit changes since this writer + * was opened: flushes pending added and deleted docs, + * syncs the index files, writes most of next segments_N + * file. After calling this you must call either {@link * #commit()} to finish the commit, or {@link * #rollback()} to revert the commit and undo all changes * done since the writer was opened.

    * - * You can also just call {@link #commit()} directly - * without prepareCommit first in which case that method - * will internally call prepareCommit. + *

    You can also just call {@link #commit()} directly + * without prepareCommit first in which case that method + * will internally call prepareCommit. */ - public final void prepareCommit() throws CorruptIndexException, IOException { + @Override + public final void prepareCommit() throws IOException { ensureOpen(); - prepareCommit(false); + prepareCommitInternal(config.getMergePolicy()); } - private final void prepareCommit(boolean internal) throws CorruptIndexException, IOException { + private void prepareCommitInternal(MergePolicy mergePolicy) throws IOException { + startCommitTime = System.nanoTime(); + synchronized(commitLock) { + ensureOpen(false); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "prepareCommit: flush"); + infoStream.message("IW", " index before flush " + segString()); + } - if (hitOOM) - throw new IllegalStateException("this writer hit an OutOfMemoryError; cannot commit"); + if (tragedy != null) { + throw new IllegalStateException("this writer hit an unrecoverable error; cannot commit", tragedy); + } - if (autoCommit && !internal) - throw new IllegalStateException("this method can only be used when autoCommit is false"); + if (pendingCommit != null) { + throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit"); + } - if (!autoCommit && pendingCommit != null) - throw new IllegalStateException("prepareCommit was already called with no corresponding call to commit"); + doBeforeFlush(); + testPoint("startDoFlush"); + SegmentInfos toCommit = null; + boolean anySegmentsFlushed = false; - message("prepareCommit: flush"); + // This is copied from doFlush, except it's modified to + // clone & incRef the flushed SegmentInfos inside the + // sync block: - flush(true, true, true); + try { - startCommit(0); - } + synchronized (fullFlushLock) { + boolean flushSuccess = false; + boolean success = false; + try { + anySegmentsFlushed = docWriter.flushAllThreads(this); + if (!anySegmentsFlushed) { + // prevent double increment since docWriter#doFlush increments the flushcount + // if we flushed anything. + flushCount.incrementAndGet(); + } + processEvents(false, true); + flushSuccess = true; - private void commit(long sizeInBytes) throws IOException { - startCommit(sizeInBytes); - finishCommit(); - } + synchronized(this) { + maybeApplyDeletes(true); - private boolean committing; + readerPool.commit(segmentInfos); - synchronized private void waitForCommit() { - // Only allow a single thread to do the commit, at a time: - while(committing) - doWait(); - committing = true; - } + // Must clone the segmentInfos while we still + // hold fullFlushLock and while sync'd so that + // no partial changes (eg a delete w/o + // corresponding add from an updateDocument) can + // sneak into the commit point: + toCommit = segmentInfos.clone(); - synchronized private void doneCommit() { - committing = false; - notifyAll(); + pendingCommitChangeCount = changeCount; + + // This protects the segmentInfos we are now going + // to commit. This is important in case, eg, while + // we are trying to sync all referenced files, a + // merge completes which would otherwise have + // removed the files we are now syncing. + filesToCommit = toCommit.files(directory, false); + deleter.incRef(filesToCommit); + } + success = true; + } finally { + if (!success) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception during prepareCommit"); + } + } + // Done: finish the full flush! + docWriter.finishFullFlush(flushSuccess); + doAfterFlush(); + } + } + } catch (OutOfMemoryError oom) { + tragicEvent(oom, "prepareCommit"); + } + + boolean success = false; + try { + if (anySegmentsFlushed) { + maybeMerge(mergePolicy, MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); + } + startCommit(toCommit); + success = true; + } finally { + if (!success) { + synchronized (this) { + if (filesToCommit != null) { + deleter.decRefWhileHandlingException(filesToCommit); + filesToCommit = null; + } + } + } + } + } } + + /** + * Sets the commit user data map. That method is considered a transaction by + * {@link IndexWriter} and will be {@link #commit() committed} even if no other + * changes were made to the writer instance. Note that you must call this method + * before {@link #prepareCommit()}, or otherwise it won't be included in the + * follow-on {@link #commit()}. + *

    + * NOTE: the map is cloned internally, therefore altering the map's + * contents after calling this method has no effect. + */ + public final synchronized void setCommitData(Map commitUserData) { + segmentInfos.setUserData(new HashMap<>(commitUserData)); + ++changeCount; + } + + /** + * Returns the commit user data map that was last committed, or the one that + * was set on {@link #setCommitData(Map)}. + */ + public final synchronized Map getCommitData() { + return segmentInfos.getUserData(); + } + + // Used only by commit and prepareCommit, below; lock + // order is commitLock -> IW + private final Object commitLock = new Object(); /** - *

    Commits all pending updates (added & deleted - * documents) to the index, and syncs all referenced index - * files, such that a reader will see the changes and the - * index updates will survive an OS or machine crash or - * power loss. Note that this does not wait for any - * running background merges to finish. This may be a + *

    Commits all pending changes (added & deleted + * documents, segment merges, added + * indexes, etc.) to the index, and syncs all referenced + * index files, such that a reader will see the changes + * and the index updates will survive an OS or machine + * crash or power loss. Note that this does not wait for + * any running background merges to finish. This may be a * costly operation, so you should test the cost in your * application and do it only when really necessary.

    * @@ -3381,380 +3058,530 @@ * * @see #prepareCommit */ - - public final void commit() throws CorruptIndexException, IOException { - + @Override + public final void commit() throws IOException { ensureOpen(); + commitInternal(config.getMergePolicy()); + } - // Only let one thread do the prepare/finish at a time - waitForCommit(); + /** Returns true if there may be changes that have not been + * committed. There are cases where this may return true + * when there are no actual "real" changes to the index, + * for example if you've deleted by Term or Query but + * that Term or Query does not match any documents. + * Also, if a merge kicked off as a result of flushing a + * new segment during {@link #commit}, or a concurrent + * merged finished, this method may return true right + * after you had just called {@link #commit}. */ + public final boolean hasUncommittedChanges() { + return changeCount != lastCommitChangeCount || docWriter.anyChanges() || bufferedUpdatesStream.any(); + } - try { - message("commit: start"); + private final void commitInternal(MergePolicy mergePolicy) throws IOException { - if (autoCommit || pendingCommit == null) { - message("commit: now prepare"); - prepareCommit(true); - } else - message("commit: already prepared"); - - finishCommit(); - } finally { - doneCommit(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: start"); } - } - private synchronized final void finishCommit() throws CorruptIndexException, IOException { + synchronized(commitLock) { + ensureOpen(false); - if (pendingCommit != null) { - try { - message("commit: pendingCommit != null"); - pendingCommit.finishCommit(directory); - lastCommitChangeCount = pendingCommitChangeCount; - segmentInfos.updateGeneration(pendingCommit); - setRollbackSegmentInfos(pendingCommit); - deleter.checkpoint(pendingCommit, true); - } finally { - deleter.decRef(pendingCommit); - pendingCommit = null; - notifyAll(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: enter lock"); } - } else - message("commit: pendingCommit == null; skip"); + if (pendingCommit == null) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: now prepare"); + } + prepareCommitInternal(mergePolicy); + } else { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: already prepared"); + } + } - message("commit: done"); + finishCommit(); + } } - /** - * Flush all in-memory buffered udpates (adds and deletes) - * to the Directory. - * @param triggerMerge if true, we may merge segments (if - * deletes or docs were flushed) if necessary - * @param flushDocStores if false we are allowed to keep - * doc stores open to share with the next segment - * @param flushDeletes whether pending deletes should also - * be flushed - */ - protected final void flush(boolean triggerMerge, boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException { - // We can be called during close, when closing==true, so we must pass false to ensureOpen: - ensureOpen(false); - if (doFlush(flushDocStores, flushDeletes) && triggerMerge) - maybeMerge(); - } + private final void finishCommit() throws IOException { - // TODO: this method should not have to be entirely - // synchronized, ie, merges should be allowed to commit - // even while a flush is happening - private synchronized final boolean doFlush(boolean flushDocStores, boolean flushDeletes) throws CorruptIndexException, IOException { + boolean commitCompleted = false; + boolean finished = false; + String committedSegmentsFileName = null; - ensureOpen(false); - - assert testPoint("startDoFlush"); - - flushCount++; - - // Make sure no threads are actively adding a document - - flushDeletes |= docWriter.deletesFull(); - - // When autoCommit=true we must always flush deletes - // when flushing a segment; otherwise deletes may become - // visible before their corresponding added document - // from an updateDocument call - flushDeletes |= autoCommit; - - // Returns true if docWriter is currently aborting, in - // which case we skip flushing this segment - if (docWriter.pauseAllThreads()) { - docWriter.resumeAllThreads(); - return false; - } - try { + synchronized(this) { + if (pendingCommit != null) { + try { - SegmentInfo newSegment = null; + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: pendingCommit != null"); + } - final int numDocs = docWriter.getNumDocsInRAM(); + committedSegmentsFileName = pendingCommit.finishCommit(directory); - // Always flush docs if there are any - boolean flushDocs = numDocs > 0; + // we committed, if anything goes wrong after this, we are screwed and it's a tragedy: + commitCompleted = true; - // With autoCommit=true we always must flush the doc - // stores when we flush - flushDocStores |= autoCommit; - String docStoreSegment = docWriter.getDocStoreSegment(); - if (docStoreSegment == null) - flushDocStores = false; + // NOTE: don't use this.checkpoint() here, because + // we do not want to increment changeCount: + deleter.checkpoint(pendingCommit, true); - int docStoreOffset = docWriter.getDocStoreOffset(); + lastCommitChangeCount = pendingCommitChangeCount; + rollbackSegments = pendingCommit.createBackupSegmentInfos(); - // docStoreOffset should only be non-zero when - // autoCommit == false - assert !autoCommit || 0 == docStoreOffset; - - boolean docStoreIsCompoundFile = false; - - if (infoStream != null) { - message(" flush: segment=" + docWriter.getSegment() + - " docStoreSegment=" + docWriter.getDocStoreSegment() + - " docStoreOffset=" + docStoreOffset + - " flushDocs=" + flushDocs + - " flushDeletes=" + flushDeletes + - " flushDocStores=" + flushDocStores + - " numDocs=" + numDocs + - " numBufDelTerms=" + docWriter.getNumBufferedDeleteTerms()); - message(" index before flush " + segString()); + finished = true; + } finally { + notifyAll(); + try { + if (finished) { + // all is good + deleter.decRef(filesToCommit); + } else if (commitCompleted == false) { + // exc happened in finishCommit: not a tragedy + deleter.decRefWhileHandlingException(filesToCommit); + } + } finally { + pendingCommit = null; + filesToCommit = null; + } + } + } else { + assert filesToCommit == null; + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: pendingCommit == null; skip"); + } + } } - - // Check if the doc stores must be separately flushed - // because other segments, besides the one we are about - // to flush, reference it - if (flushDocStores && (!flushDocs || !docWriter.getSegment().equals(docWriter.getDocStoreSegment()))) { - // We must separately flush the doc store - if (infoStream != null) - message(" flush shared docStore segment " + docStoreSegment); - - docStoreIsCompoundFile = flushDocStores(); - flushDocStores = false; + } catch (Throwable t) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception during finishCommit: " + t.getMessage()); } + if (commitCompleted) { + tragicEvent(t, "finishCommit"); + } else { + IOUtils.reThrow(t); + } + } - String segment = docWriter.getSegment(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commit: wrote segments file \"" + committedSegmentsFileName + "\""); + infoStream.message("IW", String.format(Locale.ROOT, "commit: took %.1f msec", (System.nanoTime()-startCommitTime)/1000000.0)); + infoStream.message("IW", "commit: done"); + } + } - // If we are flushing docs, segment must not be null: - assert segment != null || !flushDocs; + // Ensures only one flush() is actually flushing segments + // at a time: + private final Object fullFlushLock = new Object(); + + // for assert + boolean holdsFullFlushLock() { + return Thread.holdsLock(fullFlushLock); + } - if (flushDocs) { + /** + * Flush all in-memory buffered updates (adds and deletes) + * to the Directory. + * @param triggerMerge if true, we may merge segments (if + * deletes or docs were flushed) if necessary + * @param applyAllDeletes whether pending deletes should also + */ + protected final void flush(boolean triggerMerge, boolean applyAllDeletes) throws IOException { - boolean success = false; - final int flushedDocCount; + // NOTE: this method cannot be sync'd because + // maybeMerge() in turn calls mergeScheduler.merge which + // in turn can take a long time to run and we don't want + // to hold the lock for that. In the case of + // ConcurrentMergeScheduler this can lead to deadlock + // when it stalls due to too many running merges. - try { - flushedDocCount = docWriter.flush(flushDocStores); - success = true; - } finally { - if (!success) { - if (infoStream != null) - message("hit exception flushing segment " + segment); - deleter.refresh(segment); - } - } - - if (0 == docStoreOffset && flushDocStores) { - // This means we are flushing private doc stores - // with this segment, so it will not be shared - // with other segments - assert docStoreSegment != null; - assert docStoreSegment.equals(segment); - docStoreOffset = -1; - docStoreIsCompoundFile = false; - docStoreSegment = null; - } + // We can be called during close, when closing==true, so we must pass false to ensureOpen: + ensureOpen(false); + if (doFlush(applyAllDeletes) && triggerMerge) { + maybeMerge(config.getMergePolicy(), MergeTrigger.FULL_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); + } + } - // Create new SegmentInfo, but do not add to our - // segmentInfos until deletes are flushed - // successfully. - newSegment = new SegmentInfo(segment, - flushedDocCount, - directory, false, true, - docStoreOffset, docStoreSegment, - docStoreIsCompoundFile, - docWriter.hasProx()); - } + private boolean doFlush(boolean applyAllDeletes) throws IOException { + if (tragedy != null) { + throw new IllegalStateException("this writer hit an unrecoverable error; cannot flush", tragedy); + } - docWriter.pushDeletes(); + doBeforeFlush(); + testPoint("startDoFlush"); + boolean success = false; + try { - if (flushDocs) - segmentInfos.add(newSegment); - - if (flushDeletes) { - flushDeletesCount++; - applyDeletes(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", " start flush: applyAllDeletes=" + applyAllDeletes); + infoStream.message("IW", " index before flush " + segString()); } + final boolean anySegmentFlushed; - doAfterFlush(); - - if (flushDocs) - checkpoint(); - - if (flushDocs && mergePolicy.useCompoundFile(segmentInfos, newSegment)) { - // Now build compound file - boolean success = false; + synchronized (fullFlushLock) { + boolean flushSuccess = false; try { - docWriter.createCompoundFile(segment); - success = true; + anySegmentFlushed = docWriter.flushAllThreads(this); + flushSuccess = true; } finally { - if (!success) { - if (infoStream != null) - message("hit exception creating compound file for newly flushed segment " + segment); - deleter.deleteFile(segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); - } + docWriter.finishFullFlush(flushSuccess); + processEvents(false, true); } - - newSegment.setUseCompoundFile(true); - checkpoint(); } - - return flushDocs; - + synchronized(this) { + maybeApplyDeletes(applyAllDeletes); + doAfterFlush(); + if (!anySegmentFlushed) { + // flushCount is incremented in flushAllThreads + flushCount.incrementAndGet(); + } + success = true; + return anySegmentFlushed; + } } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "doFlush"); + // never hit + return false; } finally { - docWriter.clearFlushPending(); - docWriter.resumeAllThreads(); + if (!success) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception during flush"); + } + } } } + + final synchronized void maybeApplyDeletes(boolean applyAllDeletes) throws IOException { + if (applyAllDeletes) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "apply all deletes during flush"); + } + applyAllDeletesAndUpdates(); + } else if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "don't apply deletes now delTermCount=" + bufferedUpdatesStream.numTerms() + " bytesUsed=" + bufferedUpdatesStream.ramBytesUsed()); + } + } + + final synchronized void applyAllDeletesAndUpdates() throws IOException { + flushDeletesCount.incrementAndGet(); + final BufferedUpdatesStream.ApplyDeletesResult result; + result = bufferedUpdatesStream.applyDeletesAndUpdates(readerPool, segmentInfos.asList()); + if (result.anyDeletes) { + checkpoint(); + } + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "drop 100% deleted segments: " + segString(result.allDeleted)); + } + for (SegmentCommitInfo info : result.allDeleted) { + // If a merge has already registered for this + // segment, we leave it in the readerPool; the + // merge will skip merging it and will then drop + // it once it's done: + if (!mergingSegments.contains(info)) { + segmentInfos.remove(info); + pendingNumDocs.addAndGet(-info.info.getDocCount()); + readerPool.drop(info); + } + } + checkpoint(); + } + bufferedUpdatesStream.prune(segmentInfos); + } + + /** Expert: Return the total size of all index files currently cached in memory. + * Useful for size management with flushRamDocs() + * @deprecated use #ramBytesUsed() instead + */ + @Deprecated + public final long ramSizeInBytes() { + return ramBytesUsed(); + } - /** Expert: Return the total size of all index files currently cached in memory. - * Useful for size management with flushRamDocs() - */ - public final long ramSizeInBytes() { - ensureOpen(); - return docWriter.getRAMUsed(); + // for testing only + DocumentsWriter getDocsWriter() { + return docWriter; } /** Expert: Return the number of documents currently * buffered in RAM. */ public final synchronized int numRamDocs() { ensureOpen(); - return docWriter.getNumDocsInRAM(); + return docWriter.getNumDocs(); } - private int ensureContiguousMerge(MergePolicy.OneMerge merge) { + private synchronized void ensureValidMerge(MergePolicy.OneMerge merge) { + for(SegmentCommitInfo info : merge.segments) { + if (!segmentInfos.contains(info)) { + throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.info.name + ") that is not in the current index " + segString(), directory); + } + } + } - int first = segmentInfos.indexOf(merge.segments.info(0)); - if (first == -1) - throw new MergePolicy.MergeException("could not find segment " + merge.segments.info(0).name + " in current segments", directory); - - final int numSegments = segmentInfos.size(); + private void skipDeletedDoc(DocValuesFieldUpdates.Iterator[] updatesIters, int deletedDoc) { + for (DocValuesFieldUpdates.Iterator iter : updatesIters) { + if (iter.doc() == deletedDoc) { + iter.nextDoc(); + } + // when entering the method, all iterators must already be beyond the + // deleted document, or right on it, in which case we advance them over + // and they must be beyond it now. + assert iter.doc() > deletedDoc : "updateDoc=" + iter.doc() + " deletedDoc=" + deletedDoc; + } + } + + private static class MergedDeletesAndUpdates { + ReadersAndUpdates mergedDeletesAndUpdates = null; + MergePolicy.DocMap docMap = null; + boolean initializedWritableLiveDocs = false; - final int numSegmentsToMerge = merge.segments.size(); - for(int i=0;i= numSegments || !segmentInfos.info(first+i).equals(info)) { - if (segmentInfos.indexOf(info) == -1) - throw new MergePolicy.MergeException("MergePolicy selected a segment (" + info.name + ") that is not in the index", directory); - else - throw new MergePolicy.MergeException("MergePolicy selected non-contiguous segments to merge (" + merge.segString(directory) + " vs " + segString() + "), which IndexWriter (currently) cannot handle", - directory); + MergedDeletesAndUpdates() {} + + final void init(ReaderPool readerPool, MergePolicy.OneMerge merge, MergeState mergeState, boolean initWritableLiveDocs) throws IOException { + if (mergedDeletesAndUpdates == null) { + mergedDeletesAndUpdates = readerPool.get(merge.info, true); + docMap = merge.getDocMap(mergeState); + assert docMap.isConsistent(merge.info.info.getDocCount()); } + if (initWritableLiveDocs && !initializedWritableLiveDocs) { + mergedDeletesAndUpdates.initWritableLiveDocs(); + this.initializedWritableLiveDocs = true; + } } - - return first; + } + + private void maybeApplyMergedDVUpdates(MergePolicy.OneMerge merge, MergeState mergeState, int docUpto, + MergedDeletesAndUpdates holder, String[] mergingFields, DocValuesFieldUpdates[] dvFieldUpdates, + DocValuesFieldUpdates.Iterator[] updatesIters, int curDoc) throws IOException { + int newDoc = -1; + for (int idx = 0; idx < mergingFields.length; idx++) { + DocValuesFieldUpdates.Iterator updatesIter = updatesIters[idx]; + if (updatesIter.doc() == curDoc) { // document has an update + if (holder.mergedDeletesAndUpdates == null) { + holder.init(readerPool, merge, mergeState, false); + } + if (newDoc == -1) { // map once per all field updates, but only if there are any updates + newDoc = holder.docMap.map(docUpto); + } + DocValuesFieldUpdates dvUpdates = dvFieldUpdates[idx]; + dvUpdates.add(newDoc, updatesIter.value()); + updatesIter.nextDoc(); // advance to next document + } else { + assert updatesIter.doc() > curDoc : "field=" + mergingFields[idx] + " updateDoc=" + updatesIter.doc() + " curDoc=" + curDoc; + } + } + } - /** Carefully merges deletes for the segments we just - * merged. This is tricky because, although merging will - * clear all deletes (compacts the documents), new - * deletes may have been flushed to the segments since - * the merge was started. This method "carries over" - * such new deletes onto the newly merged segment, and - * saves the resulting deletes file (incrementing the - * delete generation for merge.info). If no deletes were - * flushed, no new deletes file is saved. */ - synchronized private void commitMergedDeletes(MergePolicy.OneMerge merge) throws IOException { + /** + * Carefully merges deletes and updates for the segments we just merged. This + * is tricky because, although merging will clear all deletes (compacts the + * documents) and compact all the updates, new deletes and updates may have + * been flushed to the segments since the merge was started. This method + * "carries over" such new deletes and updates onto the newly merged segment, + * and saves the resulting deletes and updates files (incrementing the delete + * and DV generations for merge.info). If no deletes were flushed, no new + * deletes file is saved. + */ + synchronized private ReadersAndUpdates commitMergedDeletesAndUpdates(MergePolicy.OneMerge merge, MergeState mergeState) throws IOException { - assert testPoint("startCommitMergeDeletes"); + testPoint("startCommitMergeDeletes"); - final SegmentInfos sourceSegmentsClone = merge.segmentsClone; - final SegmentInfos sourceSegments = merge.segments; + final List sourceSegments = merge.segments; - if (infoStream != null) - message("commitMergeDeletes " + merge.segString(directory)); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "commitMergeDeletes " + segString(merge.segments)); + } // Carefully merge deletes that occurred after we // started merging: - - BitVector deletes = null; int docUpto = 0; - int delCount = 0; + long minGen = Long.MAX_VALUE; - final int numSegmentsToMerge = sourceSegments.size(); - for(int i=0;i 0 || dropSegment; + + assert merge.info.info.getDocCount() != 0 || keepFullyDeletedSegments || dropSegment; + + if (mergedUpdates != null) { + boolean success = false; + try { + if (dropSegment) { + mergedUpdates.dropChanges(); } + // Pass false for assertInfoLive because the merged + // segment is not yet live (only below do we commit it + // to the segmentInfos): + readerPool.release(mergedUpdates, false); + success = true; + } finally { + if (!success) { + mergedUpdates.dropChanges(); + readerPool.drop(merge.info); + } } } - merge.info.setHasProx(merger.hasProx()); + // Must do this after readerPool.release, in case an + // exception is hit e.g. writing the live docs for the + // merge segment, in which case we need to abort the + // merge: + segmentInfos.applyMergeChanges(merge, dropSegment); - segmentInfos.subList(start, start + merge.segments.size()).clear(); - assert !segmentInfos.contains(merge.info); - segmentInfos.add(start, merge.info); + // Now deduct the deleted docs that we just reclaimed from this + // merge: + int delDocCount = merge.totalDocCount - merge.info.info.getDocCount(); + assert delDocCount >= 0; + pendingNumDocs.addAndGet(-delDocCount); - // Must checkpoint before decrefing so any newly - // referenced files in the new merge.info are incref'd - // first: - checkpoint(); + if (dropSegment) { + assert !segmentInfos.contains(merge.info); + readerPool.drop(merge.info); + deleter.deleteNewFiles(merge.info.files()); + } - decrefMergeSegments(merge); + boolean success = false; + try { + // Must close before checkpoint, otherwise IFD won't be + // able to delete the held-open files from the merge + // readers: + closeMergeReaders(merge, false); + success = true; + } finally { + // Must note the change to segmentInfos so any commits + // in-flight don't lose it (IFD will incRef/protect the + // new files we created): + if (success) { + checkpoint(); + } else { + try { + checkpoint(); + } catch (Throwable t) { + // Ignore so we keep throwing original exception. + } + } + } - if (merge.optimize) - segmentsToOptimize.add(merge.info); - return true; - } + deleter.deletePendingFiles(); - private void decrefMergeSegments(MergePolicy.OneMerge merge) throws IOException { - final SegmentInfos sourceSegmentsClone = merge.segmentsClone; - final int numSegmentsToMerge = sourceSegmentsClone.size(); - assert merge.increfDone; - merge.increfDone = false; - for(int i=0;i 0; if (stopMerges) { merge.abort(); - throw new MergePolicy.MergeAbortedException("merge is aborted: " + merge.segString(directory)); + throw new MergePolicy.MergeAbortedException("merge is aborted: " + segString(merge.segments)); } - final int count = merge.segments.size(); boolean isExternal = false; - for(int i=0;i 0; + assert merge.maxNumSegments == -1 || merge.maxNumSegments > 0; - if (merge.info != null) + if (tragedy != null) { + throw new IllegalStateException("this writer hit an unrecoverable error; cannot merge", tragedy); + } + + if (merge.info != null) { // mergeInit already done return; + } - if (merge.isAborted()) + if (merge.isAborted()) { return; - - boolean changed = applyDeletes(); - - // If autoCommit == true then all deletes should have - // been flushed when we flushed the last segment - assert !changed || !autoCommit; - - final SegmentInfos sourceSegments = merge.segments; - final int end = sourceSegments.size(); - - // Check whether this merge will allow us to skip - // merging the doc stores (stored field & vectors). - // This is a very substantial optimization (saves tons - // of IO) that can only be applied with - // autoCommit=false. - - Directory lastDir = directory; - String lastDocStoreSegment = null; - int next = -1; - - boolean mergeDocStores = false; - boolean doFlushDocStore = false; - final String currentDocStoreSegment = docWriter.getDocStoreSegment(); - - // Test each segment to be merged: check if we need to - // flush/merge doc stores - for (int i = 0; i < end; i++) { - SegmentInfo si = sourceSegments.info(i); - - // If it has deletions we must merge the doc stores - if (si.hasDeletions()) - mergeDocStores = true; - - // If it has its own (private) doc stores we must - // merge the doc stores - if (-1 == si.getDocStoreOffset()) - mergeDocStores = true; - - // If it has a different doc store segment than - // previous segments, we must merge the doc stores - String docStoreSegment = si.getDocStoreSegment(); - if (docStoreSegment == null) - mergeDocStores = true; - else if (lastDocStoreSegment == null) - lastDocStoreSegment = docStoreSegment; - else if (!lastDocStoreSegment.equals(docStoreSegment)) - mergeDocStores = true; - - // Segments' docScoreOffsets must be in-order, - // contiguous. For the default merge policy now - // this will always be the case but for an arbitrary - // merge policy this may not be the case - if (-1 == next) - next = si.getDocStoreOffset() + si.docCount; - else if (next != si.getDocStoreOffset()) - mergeDocStores = true; - else - next = si.getDocStoreOffset() + si.docCount; - - // If the segment comes from a different directory - // we must merge - if (lastDir != si.dir) - mergeDocStores = true; - - // If the segment is referencing the current "live" - // doc store outputs then we must merge - if (si.getDocStoreOffset() != -1 && currentDocStoreSegment != null && si.getDocStoreSegment().equals(currentDocStoreSegment)) { - doFlushDocStore = true; - } } - final int docStoreOffset; - final String docStoreSegment; - final boolean docStoreIsCompoundFile; + // TODO: in the non-pool'd case this is somewhat + // wasteful, because we open these readers, close them, + // and then open them again for merging. Maybe we + // could pre-pool them somehow in that case... - if (mergeDocStores) { - docStoreOffset = -1; - docStoreSegment = null; - docStoreIsCompoundFile = false; - } else { - SegmentInfo si = sourceSegments.info(0); - docStoreOffset = si.getDocStoreOffset(); - docStoreSegment = si.getDocStoreSegment(); - docStoreIsCompoundFile = si.getDocStoreIsCompoundFile(); + // Lock order: IW -> BD + final BufferedUpdatesStream.ApplyDeletesResult result = bufferedUpdatesStream.applyDeletesAndUpdates(readerPool, merge.segments); + + if (result.anyDeletes) { + checkpoint(); } - if (mergeDocStores && doFlushDocStore) { - // SegmentMerger intends to merge the doc stores - // (stored fields, vectors), and at least one of the - // segments to be merged refers to the currently - // live doc stores. - - // TODO: if we know we are about to merge away these - // newly flushed doc store files then we should not - // make compound file out of them... - if (infoStream != null) - message("now flush at merge"); - doFlush(true, false); - //flush(false, true, false); + if (!keepFullyDeletedSegments && result.allDeleted != null) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "drop 100% deleted segments: " + result.allDeleted); + } + for(SegmentCommitInfo info : result.allDeleted) { + segmentInfos.remove(info); + pendingNumDocs.addAndGet(-info.info.getDocCount()); + if (merge.segments.contains(info)) { + mergingSegments.remove(info); + merge.segments.remove(info); + } + readerPool.drop(info); + } + checkpoint(); } - // We must take a full copy at this point so that we can - // properly merge deletes in commitMerge() - merge.segmentsClone = (SegmentInfos) merge.segments.clone(); - - for (int i = 0; i < end; i++) { - SegmentInfo si = merge.segmentsClone.info(i); - - // IncRef all files for this segment info to make sure - // they are not removed while we are trying to merge. - if (si.dir == directory) - deleter.incRef(si.files()); - } - - merge.increfDone = true; - - merge.mergeDocStores = mergeDocStores; - // Bind a new segment name here so even with // ConcurrentMergePolicy we keep deterministic segment // names. - merge.info = new SegmentInfo(newSegmentName(), 0, - directory, false, true, - docStoreOffset, - docStoreSegment, - docStoreIsCompoundFile, - false); + final String mergeSegmentName = newSegmentName(); + SegmentInfo si = new SegmentInfo(directory, Version.LATEST, mergeSegmentName, -1, false, codec, null); + Map details = new HashMap<>(); + details.put("mergeMaxNumSegments", "" + merge.maxNumSegments); + details.put("mergeFactor", Integer.toString(merge.segments.size())); + setDiagnostics(si, SOURCE_MERGE, details); + merge.setInfo(new SegmentCommitInfo(si, 0, -1L, -1L, -1L)); - // Also enroll the merged segment into mergingSegments; - // this prevents it from getting selected for a merge - // after our merge is done but while we are building the - // CFS: - mergingSegments.add(merge.info); +// System.out.println("[" + Thread.currentThread().getName() + "] IW._mergeInit: " + segString(merge.segments) + " into " + si); + + // Lock order: IW -> BD + bufferedUpdatesStream.prune(segmentInfos); + + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "merge seg=" + merge.info.info.name + " " + segString(merge.segments)); + } } - /** This is called after merging a segment and before - * building its CFS. Return true if the files should be - * sync'd. If you return false, then the source segment - * files that were merged cannot be deleted until the CFS - * file is built & sync'd. So, returning false consumes - * more transient disk space, but saves performance of - * not having to sync files which will shortly be deleted - * anyway. - * @deprecated -- this will be removed in 3.0 when - * autoCommit is hardwired to false */ - private synchronized boolean doCommitBeforeMergeCFS(MergePolicy.OneMerge merge) throws IOException { - long freeableBytes = 0; - final int size = merge.segments.size(); - for(int i=0;i details) { + Map diagnostics = new HashMap<>(); + diagnostics.put("source", source); + diagnostics.put("lucene.version", Version.LATEST.toString()); + diagnostics.put("os", Constants.OS_NAME); + diagnostics.put("os.arch", Constants.OS_ARCH); + diagnostics.put("os.version", Constants.OS_VERSION); + diagnostics.put("java.version", Constants.JAVA_VERSION); + diagnostics.put("java.vendor", Constants.JAVA_VENDOR); + diagnostics.put("timestamp", Long.toString(new Date().getTime())); + if (details != null) { + diagnostics.putAll(details); } - // If we would free up more than 1/3rd of the index by - // committing now, then do so: - long totalBytes = 0; - final int numSegments = segmentInfos.size(); - for(int i=0;i totalBytes) - return true; - else - return false; + info.setDiagnostics(diagnostics); } /** Does fininishing for a merge, which is fast but holds * the synchronized lock on IndexWriter instance. */ - final synchronized void mergeFinish(MergePolicy.OneMerge merge) throws IOException { - - // Optimize, addIndexes or finishMerges may be waiting + final synchronized void mergeFinish(MergePolicy.OneMerge merge) { + + // forceMerge, addIndexes or waitForMerges may be waiting // on merges to finish. notifyAll(); - if (merge.increfDone) - decrefMergeSegments(merge); + // It's possible we are called twice, eg if there was an + // exception inside mergeInit + if (merge.registerDone) { + final List sourceSegments = merge.segments; + for (SegmentCommitInfo info : sourceSegments) { + mergingSegments.remove(info); + } + merge.registerDone = false; + } - assert merge.registerDone; + runningMerges.remove(merge); + } - final SegmentInfos sourceSegments = merge.segments; - final int end = sourceSegments.size(); - for(int i=0;i sourceSegments = merge.segments; - SegmentMerger merger = null; + IOContext context = new IOContext(merge.getMergeInfo()); - int mergedDocCount = 0; + final MergeState.CheckAbort checkAbort = new MergeState.CheckAbort(merge, directory); + final TrackingDirectoryWrapper dirWrapper = new TrackingDirectoryWrapper(directory); - SegmentInfos sourceSegments = merge.segments; - SegmentInfos sourceSegmentsClone = merge.segmentsClone; - final int numSegments = sourceSegments.size(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "merging " + segString(merge.segments)); + } - if (infoStream != null) - message("merging " + merge.segString(directory)); + merge.readers = new ArrayList<>(); - merger = new SegmentMerger(this, mergedName, merge); - - boolean success = false; - // This is try/finally to make sure merger's readers are // closed: + boolean success = false; try { - int totDocCount = 0; + int segUpto = 0; + while(segUpto < sourceSegments.size()) { - for (int i = 0; i < numSegments; i++) { - SegmentInfo si = sourceSegmentsClone.info(i); - IndexReader reader = SegmentReader.get(true, si, MERGE_READ_BUFFER_SIZE, merge.mergeDocStores); // no need to set deleter (yet) - merger.add(reader); - totDocCount += reader.numDocs(); + final SegmentCommitInfo info = sourceSegments.get(segUpto); + + // Hold onto the "live" reader; we will use this to + // commit merged deletes + final ReadersAndUpdates rld = readerPool.get(info, true); + + // Carefully pull the most recent live docs and reader + SegmentReader reader; + final Bits liveDocs; + final int delCount; + + synchronized (this) { + // Must sync to ensure BufferedDeletesStream cannot change liveDocs, + // pendingDeleteCount and field updates while we pull a copy: + reader = rld.getReaderForMerge(context); + liveDocs = rld.getReadOnlyLiveDocs(); + delCount = rld.getPendingDeleteCount() + info.getDelCount(); + + assert reader != null; + assert rld.verifyDocCounts(); + + if (infoStream.isEnabled("IW")) { + if (rld.getPendingDeleteCount() != 0) { + infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount() + " pendingDelCount=" + rld.getPendingDeleteCount()); + } else if (info.getDelCount() != 0) { + infoStream.message("IW", "seg=" + segString(info) + " delCount=" + info.getDelCount()); + } else { + infoStream.message("IW", "seg=" + segString(info) + " no deletes"); + } + } + } + + // Deletes might have happened after we pulled the merge reader and + // before we got a read-only copy of the segment's actual live docs + // (taking pending deletes into account). In that case we need to + // make a new reader with updated live docs and del count. + if (reader.numDeletedDocs() != delCount) { + // fix the reader's live docs and del count + assert delCount > reader.numDeletedDocs(); // beware of zombies + + SegmentReader newReader = new SegmentReader(info, reader, liveDocs, info.info.getDocCount() - delCount); + boolean released = false; + try { + rld.release(reader); + released = true; + } finally { + if (!released) { + newReader.decRef(); + } + } + + reader = newReader; + } + + merge.readers.add(reader); + assert delCount <= info.info.getDocCount(): "delCount=" + delCount + " info.docCount=" + info.info.getDocCount() + " rld.pendingDeleteCount=" + rld.getPendingDeleteCount() + " info.getDelCount()=" + info.getDelCount(); + segUpto++; } - if (infoStream != null) { - message("merge: total "+totDocCount+" docs"); - } +// System.out.println("[" + Thread.currentThread().getName() + "] IW.mergeMiddle: merging " + merge.getMergeReaders()); + + // we pass merge.getMergeReaders() instead of merge.readers to allow the + // OneMerge to return a view over the actual segments to merge + final SegmentMerger merger = new SegmentMerger(merge.getMergeReaders(), + merge.info.info, infoStream, dirWrapper, config.getTermIndexInterval(), + checkAbort, globalFieldNumberMap, + context, config.getCheckIntegrityAtMerge()); + merge.checkAborted(directory); // This is where all the work happens: - mergedDocCount = merge.info.docCount = merger.merge(merge.mergeDocStores); + MergeState mergeState; + boolean success3 = false; + try { + if (!merger.shouldMerge()) { + // would result in a 0 document segment: nothing to merge! + mergeState = new MergeState(new ArrayList(), merge.info.info, infoStream, checkAbort); + } else { + mergeState = merger.merge(); + } + success3 = true; + } finally { + if (!success3) { + synchronized(this) { + deleter.refresh(merge.info.info.name); + } + } + } + assert mergeState.segmentInfo == merge.info.info; + merge.info.info.setFiles(new HashSet<>(dirWrapper.getCreatedFiles())); - assert mergedDocCount == totDocCount; + // Record which codec was used to write the segment - success = true; + if (infoStream.isEnabled("IW")) { + if (merge.info.info.getDocCount() == 0) { + infoStream.message("IW", "merge away fully deleted segments"); + } else { + infoStream.message("IW", "merge codec=" + codec + " docCount=" + merge.info.info.getDocCount() + "; merged segment has " + + (mergeState.fieldInfos.hasVectors() ? "vectors" : "no vectors") + "; " + + (mergeState.fieldInfos.hasNorms() ? "norms" : "no norms") + "; " + + (mergeState.fieldInfos.hasDocValues() ? "docValues" : "no docValues") + "; " + + (mergeState.fieldInfos.hasProx() ? "prox" : "no prox") + "; " + + (mergeState.fieldInfos.hasProx() ? "freqs" : "no freqs")); + } + } - } finally { - // close readers before we attempt to delete - // now-obsolete segments - if (merger != null) { - merger.closeReaders(); + // Very important to do this before opening the reader + // because codec must know if prox was written for + // this segment: + //System.out.println("merger set hasProx=" + merger.hasProx() + " seg=" + merge.info.name); + boolean useCompoundFile; + synchronized (this) { // Guard segmentInfos + useCompoundFile = mergePolicy.useCompoundFile(segmentInfos, merge.info, this); } - } - if (!commitMerge(merge, merger, mergedDocCount)) - // commitMerge will return false if this merge was aborted - return 0; + if (useCompoundFile) { + success = false; - if (merge.useCompoundFile) { + Collection filesToRemove = merge.info.files(); - // Maybe force a sync here to allow reclaiming of the - // disk space used by the segments we just merged: - if (autoCommit && doCommitBeforeMergeCFS(merge)) { - final long size; - synchronized(this) { - size = merge.info.sizeInBytes(); + try { + filesToRemove = createCompoundFile(infoStream, directory, checkAbort, merge.info.info, context); + success = true; + } catch (IOException ioe) { + synchronized(this) { + if (merge.isAborted()) { + // This can happen if rollback or close(false) + // is called -- fall through to logic below to + // remove the partially created CFS: + } else { + handleMergeException(ioe, merge); + } + } + } catch (Throwable t) { + handleMergeException(t, merge); + } finally { + if (!success) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception creating compound file during merge"); + } + + synchronized(this) { + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + deleter.deleteNewFiles(merge.info.files()); + } + } } - commit(size); - } - - success = false; - final String compoundFileName = mergedName + "." + IndexFileNames.COMPOUND_FILE_EXTENSION; - try { - merger.createCompoundFile(compoundFileName); - success = true; - } catch (IOException ioe) { + // So that, if we hit exc in deleteNewFiles (next) + // or in commitMerge (later), we close the + // per-segment readers in the finally clause below: + success = false; + synchronized(this) { + + // delete new non cfs files directly: they were never + // registered with IFD + deleter.deleteNewFiles(filesToRemove); + if (merge.isAborted()) { - // This can happen if rollback or close(false) - // is called -- fall through to logic below to - // remove the partially created CFS: - success = true; - } else - handleMergeException(ioe, merge); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "abort merge after building CFS"); + } + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_EXTENSION)); + deleter.deleteFile(IndexFileNames.segmentFileName(mergedName, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + return 0; + } } - } catch (Throwable t) { - handleMergeException(t, merge); + + merge.info.info.setUseCompoundFile(true); + } else { + // So that, if we hit exc in commitMerge (later), + // we close the per-segment readers in the finally + // clause below: + success = false; + } + + // Have codec write SegmentInfo. Must do this after + // creating CFS so that 1) .si isn't slurped into CFS, + // and 2) .si reflects useCompoundFile=true change + // above: + boolean success2 = false; + try { + codec.segmentInfoFormat().getSegmentInfoWriter().write(directory, merge.info.info, mergeState.fieldInfos, context); + success2 = true; } finally { - if (!success) { - if (infoStream != null) - message("hit exception creating compound file during merge"); + if (!success2) { synchronized(this) { - deleter.deleteFile(compoundFileName); + deleter.deleteNewFiles(merge.info.files()); } } } - if (merge.isAborted()) { - if (infoStream != null) - message("abort merge after building CFS"); - deleter.deleteFile(compoundFileName); - return 0; + // TODO: ideally we would freeze merge.info here!! + // because any changes after writing the .si will be + // lost... + + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", String.format(Locale.ROOT, "merged segment size=%.3f MB vs estimate=%.3f MB", merge.info.sizeInBytes()/1024./1024., merge.estimatedMergeBytes/1024/1024.)); } - synchronized(this) { - if (segmentInfos.indexOf(merge.info) == -1 || merge.isAborted()) { - // Our segment (committed in non-compound - // format) got merged away while we were - // building the compound format. - deleter.deleteFile(compoundFileName); - } else { - merge.info.setUseCompoundFile(true); - checkpoint(); + final IndexReaderWarmer mergedSegmentWarmer = config.getMergedSegmentWarmer(); + if (poolReaders && mergedSegmentWarmer != null && merge.info.info.getDocCount() != 0) { + final ReadersAndUpdates rld = readerPool.get(merge.info, true); + final SegmentReader sr = rld.getReader(IOContext.READ); + try { + mergedSegmentWarmer.warm(sr); + } finally { + synchronized(this) { + rld.release(sr); + readerPool.release(rld); + } } } - } - // Force a sync after commiting the merge. Once this - // sync completes then all index files referenced by the - // current segmentInfos are on stable storage so if the - // OS/machine crashes, or power cord is yanked, the - // index will be intact. Note that this is just one - // (somewhat arbitrary) policy; we could try other - // policies like only sync if it's been > X minutes or - // more than Y bytes have been written, etc. - if (autoCommit) { - final long size; - synchronized(this) { - size = merge.info.sizeInBytes(); + // Force READ context because we merge deletes onto + // this reader: + if (!commitMerge(merge, mergeState)) { + // commitMerge will return false if this merge was + // aborted + return 0; } - commit(size); + + success = true; + + } finally { + // Readers are already closed in commitMerge if we didn't hit + // an exc: + if (!success) { + closeMergeReaders(merge, true); + } } - return mergedDocCount; + return merge.info.info.getDocCount(); } synchronized void addMergeException(MergePolicy.OneMerge merge) { assert merge.getException() != null; - if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen) + if (!mergeExceptions.contains(merge) && mergeGen == merge.mergeGen) { mergeExceptions.add(merge); - } - - // Apply buffered deletes to all segments. - private final synchronized boolean applyDeletes() throws CorruptIndexException, IOException { - assert testPoint("startApplyDeletes"); - SegmentInfos rollback = (SegmentInfos) segmentInfos.clone(); - boolean success = false; - boolean changed; - try { - changed = docWriter.applyDeletes(segmentInfos); - success = true; - } finally { - if (!success) { - if (infoStream != null) - message("hit exception flushing deletes"); - - // Carefully remove any partially written .del - // files - final int size = rollback.size(); - for(int i=0;i 0 ? segmentInfos.info(segmentInfos.size()-1) : null; } + /** Returns a string description of all segments, for + * debugging. + * + * @lucene.internal */ public synchronized String segString() { return segString(segmentInfos); } - private synchronized String segString(SegmentInfos infos) { - StringBuffer buffer = new StringBuffer(); - final int count = infos.size(); - for(int i = 0; i < count; i++) { - if (i > 0) { + /** Returns a string description of the specified + * segments, for debugging. + * + * @lucene.internal */ + public synchronized String segString(Iterable infos) { + final StringBuilder buffer = new StringBuilder(); + for(final SegmentCommitInfo info : infos) { + if (buffer.length() > 0) { buffer.append(' '); } - final SegmentInfo info = infos.info(i); - buffer.append(info.segString(directory)); - if (info.dir != directory) - buffer.append("**"); + buffer.append(segString(info)); } return buffer.toString(); } - // Files that have been sync'd already - private HashSet synced = new HashSet(); + /** Returns a string description of the specified + * segment, for debugging. + * + * @lucene.internal */ + public synchronized String segString(SegmentCommitInfo info) { + return info.toString(info.info.dir, numDeletedDocs(info) - info.getDelCount()); + } - // Files that are now being sync'd - private HashSet syncing = new HashSet(); - - private boolean startSync(String fileName, Collection pending) { - synchronized(synced) { - if (!synced.contains(fileName)) { - if (!syncing.contains(fileName)) { - syncing.add(fileName); - return true; - } else { - pending.add(fileName); - return false; - } - } else - return false; + private synchronized void doWait() { + // NOTE: the callers of this method should in theory + // be able to do simply wait(), but, as a defense + // against thread timing hazards where notifyAll() + // fails to be called, we wait for at most 1 second + // and then return so caller can check if wait + // conditions are satisfied: + try { + wait(1000); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); } } - private void finishSync(String fileName, boolean success) { - synchronized(synced) { - assert syncing.contains(fileName); - syncing.remove(fileName); - if (success) - synced.add(fileName); - synced.notifyAll(); - } + private boolean keepFullyDeletedSegments; + + /** Only for testing. + * + * @lucene.internal */ + void setKeepFullyDeletedSegments(boolean v) { + keepFullyDeletedSegments = v; } - /** Blocks until all files in syncing are sync'd */ - private boolean waitForAllSynced(Collection syncing) throws IOException { - synchronized(synced) { - Iterator it = syncing.iterator(); - while(it.hasNext()) { - final String fileName = (String) it.next(); - while(!synced.contains(fileName)) { - if (!syncing.contains(fileName)) - // There was an error because a file that was - // previously syncing failed to appear in synced - return false; - else - try { - synced.wait(); - } catch (InterruptedException ie) { - continue; - } - } - } - return true; - } + boolean getKeepFullyDeletedSegments() { + return keepFullyDeletedSegments; } - /** Pauses before syncing. On Windows, at least, it's - * best (performance-wise) to pause in order to let OS - * flush writes to disk on its own, before forcing a - * sync. - * @deprecated -- this will be removed in 3.0 when - * autoCommit is hardwired to false */ - private void syncPause(long sizeInBytes) { - if (mergeScheduler instanceof ConcurrentMergeScheduler && maxSyncPauseSeconds > 0) { - // Rough heuristic: for every 10 MB, we pause for 1 - // second, up until the max - long pauseTime = (long) (1000*sizeInBytes/10/1024/1024); - final long maxPauseTime = (long) (maxSyncPauseSeconds*1000); - if (pauseTime > maxPauseTime) - pauseTime = maxPauseTime; - final int sleepCount = (int) (pauseTime / 100); - for(int i=0;i files = toSync.files(directory, false); + for(final String fileName: files) { + assert slowFileExists(directory, fileName): "file " + fileName + " does not exist; files=" + Arrays.toString(directory.listAll()); + // If this trips it means we are missing a call to + // .checkpoint somewhere, because by the time we + // are called, deleter should know about every + // file referenced by the current head + // segmentInfos: + assert deleter.exists(fileName): "IndexFileDeleter doesn't know about file " + fileName; } + return true; } - private synchronized void doWait() { - try { - // NOTE: the callers of this method should in theory - // be able to do simply wait(), but, as a defense - // against thread timing hazards where notifyAll() - // falls to be called, we wait for at most 1 second - // and then return so caller can check if wait - // conditions are satisified: - wait(1000); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); + // For infoStream output + synchronized SegmentInfos toLiveInfos(SegmentInfos sis) { + final SegmentInfos newSIS = new SegmentInfos(); + final Map liveSIS = new HashMap<>(); + for(SegmentCommitInfo info : segmentInfos) { + liveSIS.put(info, info); } + for(SegmentCommitInfo info : sis) { + SegmentCommitInfo liveInfo = liveSIS.get(info); + if (liveInfo != null) { + info = liveInfo; + } + newSIS.add(info); + } + + return newSIS; } /** Walk through all files referenced by the current * segmentInfos and ask the Directory to sync each file, * if it wasn't already. If that succeeds, then we * prepare a new segments_N file but do not fully commit * it. */ - private void startCommit(long sizeInBytes) throws IOException { + private void startCommit(final SegmentInfos toSync) throws IOException { - assert testPoint("startStartCommit"); + testPoint("startStartCommit"); + assert pendingCommit == null; - if (hitOOM) - return; + if (tragedy != null) { + throw new IllegalStateException("this writer hit an unrecoverable error; cannot commit", tragedy); + } try { - if (infoStream != null) - message("startCommit(): start sizeInBytes=" + sizeInBytes); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "startCommit(): start"); + } - if (sizeInBytes > 0) - syncPause(sizeInBytes); - - SegmentInfos toSync = null; - final long myChangeCount; - synchronized(this) { - // sizeInBytes > 0 means this is an autoCommit at - // the end of a merge. If at this point stopMerges - // is true (which means a rollback() or - // rollbackTransaction() is waiting for us to - // finish), we skip the commit to avoid deadlock - if (sizeInBytes > 0 && stopMerges) - return; + assert lastCommitChangeCount <= changeCount: "lastCommitChangeCount=" + lastCommitChangeCount + " changeCount=" + changeCount; - // Wait for any running addIndexes to complete - // first, then block any from running until we've - // copied the segmentInfos we intend to sync: - blockAddIndexes(false); - - assert !hasExternalSegments(); - - try { - - assert lastCommitChangeCount <= changeCount; - - if (changeCount == lastCommitChangeCount) { - if (infoStream != null) - message(" skip startCommit(): no changes pending"); - return; + if (pendingCommitChangeCount == lastCommitChangeCount) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", " skip startCommit(): no changes pending"); } + try { + deleter.decRef(filesToCommit); + } finally { + filesToCommit = null; + } + return; + } - // First, we clone & incref the segmentInfos we intend - // to sync, then, without locking, we sync() each file - // referenced by toSync, in the background. Multiple - // threads can be doing this at once, if say a large - // merge and a small merge finish at the same time: - - if (infoStream != null) - message("startCommit index=" + segString(segmentInfos) + " changeCount=" + changeCount); - - toSync = (SegmentInfos) segmentInfos.clone(); - deleter.incRef(toSync, false); - myChangeCount = changeCount; - } finally { - resumeAddIndexes(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "startCommit index=" + segString(toLiveInfos(toSync)) + " changeCount=" + changeCount); } + + assert filesExist(toSync); } - assert testPoint("midStartCommit"); + testPoint("midStartCommit"); - boolean setPending = false; + boolean pendingCommitSet = false; try { - // Loop until all files toSync references are sync'd: - while(true) { + testPoint("midStartCommit2"); - final Collection pending = new ArrayList(); - - for(int i=0;i lastCommitChangeCount && (pendingCommit == null || myChangeCount > pendingCommitChangeCount)) { - // Wait now for any current pending commit to complete: - while(pendingCommit != null) { - message("wait for existing pendingCommit to finish..."); - doWait(); - } + assert pendingCommit == null; - if (segmentInfos.getGeneration() > toSync.getGeneration()) - toSync.updateGeneration(segmentInfos); + assert segmentInfos.getGeneration() == toSync.getGeneration(); - boolean success = false; - try { + // Exception here means nothing is prepared + // (this method unwinds everything it did on + // an exception) + toSync.prepareCommit(directory); + //System.out.println("DONE prepareCommit"); - // Exception here means nothing is prepared - // (this method unwinds everything it did on - // an exception) - try { - toSync.prepareCommit(directory); - } finally { - // Have our master segmentInfos record the - // generations we just prepared. We do this - // on error or success so we don't - // double-write a segments_N file. - segmentInfos.updateGeneration(toSync); - } + pendingCommitSet = true; + pendingCommit = toSync; + } - assert pendingCommit == null; - setPending = true; - pendingCommit = toSync; - pendingCommitChangeCount = myChangeCount; - success = true; - } finally { - if (!success) - message("hit exception committing segments file"); - } - } else - message("sync superseded by newer infos"); + // This call can take a long time -- 10s of seconds + // or more. We do it without syncing on this: + boolean success = false; + final Collection filesToSync; + try { + filesToSync = toSync.files(directory, false); + directory.sync(filesToSync); + success = true; + } finally { + if (!success) { + pendingCommitSet = false; + pendingCommit = null; + toSync.rollbackCommit(directory); + } } - message("done all syncs"); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "done all syncs: " + filesToSync); + } - assert testPoint("midStartCommitSuccess"); + testPoint("midStartCommitSuccess"); } finally { synchronized(this) { - if (!setPending) - deleter.decRef(toSync); + // Have our master segmentInfos record the + // generations we just prepared. We do this + // on error or success so we don't + // double-write a segments_N file. + segmentInfos.updateGeneration(toSync); + + if (!pendingCommitSet) { + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit exception committing segments file"); + } + + // Hit exception + deleter.decRefWhileHandlingException(filesToCommit); + filesToCommit = null; + } } } } catch (OutOfMemoryError oom) { - hitOOM = true; - throw oom; + tragicEvent(oom, "startCommit"); } - assert testPoint("finishStartCommit"); + testPoint("finishStartCommit"); } /** @@ -4678,83 +4568,70 @@ } /** - * Returns true iff the index in the named directory is - * currently locked. - * @param directory the directory to check for a lock - * @throws IOException if there is a low-level IO error - */ - public static boolean isLocked(String directory) throws IOException { - Directory dir = FSDirectory.getDirectory(directory); - try { - return isLocked(dir); - } finally { - dir.close(); - } - } - - /** * Forcibly unlocks the index in the named directory. *

    * Caution: this should only be used by failure recovery code, * when it is known that no other process nor thread is in fact * currently accessing this index. */ public static void unlock(Directory directory) throws IOException { - directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release(); + directory.makeLock(IndexWriter.WRITE_LOCK_NAME).close(); } - /** - * Specifies maximum field length in {@link IndexWriter} constructors. - * {@link #setMaxFieldLength(int)} overrides the value set by - * the constructor. - */ - public static final class MaxFieldLength { + /** If {@link DirectoryReader#open(IndexWriter,boolean)} has + * been called (ie, this writer is in near real-time + * mode), then after a merge completes, this class can be + * invoked to warm the reader on the newly merged + * segment, before the merge commits. This is not + * required for near real-time search, but will reduce + * search latency on opening a new near real-time reader + * after a merge completes. + * + * @lucene.experimental + * + *

    NOTE: warm is called before any deletes have + * been carried over to the merged segment. */ + public static abstract class IndexReaderWarmer { - private int limit; - private String name; - - /** - * Private type-safe-enum-pattern constructor. - * - * @param name instance name - * @param limit maximum field length - */ - private MaxFieldLength(String name, int limit) { - this.name = name; - this.limit = limit; + /** Sole constructor. (For invocation by subclass + * constructors, typically implicit.) */ + protected IndexReaderWarmer() { } - /** - * Public constructor to allow users to specify the maximum field size limit. - * - * @param limit The maximum field length - */ - public MaxFieldLength(int limit) { - this("User-specified", limit); + /** Invoked on the {@link AtomicReader} for the newly + * merged segment, before that segment is made visible + * to near-real-time readers. */ + public abstract void warm(AtomicReader reader) throws IOException; + } + + private void tragicEvent(Throwable tragedy, String location) { + // We cannot hold IW's lock here else it can lead to deadlock: + assert Thread.holdsLock(this) == false; + + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "hit " + tragedy.getClass().getSimpleName() + " inside " + location); } - - public int getLimit() { - return limit; + synchronized (this) { + // its possible you could have a really bad day + if (this.tragedy == null) { + this.tragedy = tragedy; + } } - - public String toString() - { - return name + ":" + limit; + // if we are already closed (e.g. called by rollback), this will be a no-op. + synchronized(commitLock) { + if (closing == false) { + try { + rollback(); + } catch (Throwable ignored) { + // it would be confusing to addSuppressed here, its unrelated to the disaster, + // and its possible our internal state is amiss anyway. + } + } } - - /** Sets the maximum field length to {@link Integer#MAX_VALUE}. */ - public static final MaxFieldLength UNLIMITED - = new MaxFieldLength("UNLIMITED", Integer.MAX_VALUE); - - /** - * Sets the maximum field length to - * {@link #DEFAULT_MAX_FIELD_LENGTH} - * */ - public static final MaxFieldLength LIMITED - = new MaxFieldLength("LIMITED", DEFAULT_MAX_FIELD_LENGTH); + IOUtils.reThrowUnchecked(tragedy); } - // Used only by assert for testing. Current points: + // Used for testing. Current points: // startDoFlush // startCommitMerge // startStartCommit @@ -4764,9 +4641,221 @@ // finishStartCommit // startCommitMergeDeletes // startMergeInit - // startApplyDeletes // DocumentsWriter.ThreadState.init start - boolean testPoint(String name) { - return true; + private final void testPoint(String message) { + if (infoStream.isEnabled("TP")) { + infoStream.message("TP", message); + } } + + synchronized boolean nrtIsCurrent(SegmentInfos infos) { + //System.out.println("IW.nrtIsCurrent " + (infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedDeletesStream.any())); + ensureOpen(); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "nrtIsCurrent: infoVersion matches: " + (infos.version == segmentInfos.version) + "; DW changes: " + docWriter.anyChanges() + "; BD changes: "+ bufferedUpdatesStream.any()); + } + return infos.version == segmentInfos.version && !docWriter.anyChanges() && !bufferedUpdatesStream.any(); + } + + synchronized boolean isClosed() { + return closed; + } + + /** Expert: remove any index files that are no longer + * used. + * + *

    IndexWriter normally deletes unused files itself, + * during indexing. However, on Windows, which disallows + * deletion of open files, if there is a reader open on + * the index then those files cannot be deleted. This is + * fine, because IndexWriter will periodically retry + * the deletion.

    + * + *

    However, IndexWriter doesn't try that often: only + * on open, close, flushing a new segment, and finishing + * a merge. If you don't do any of these actions with your + * IndexWriter, you'll see the unused files linger. If + * that's a problem, call this method to delete them + * (once you've closed the open readers that were + * preventing their deletion). + * + *

    In addition, you can call this method to delete + * unreferenced index commits. This might be useful if you + * are using an {@link IndexDeletionPolicy} which holds + * onto index commits until some criteria are met, but those + * commits are no longer needed. Otherwise, those commits will + * be deleted the next time commit() is called. + */ + public synchronized void deleteUnusedFiles() throws IOException { + ensureOpen(false); + deleter.deletePendingFiles(); + deleter.revisitPolicy(); + } + + private synchronized void deletePendingFiles() throws IOException { + deleter.deletePendingFiles(); + } + + /** + * NOTE: this method creates a compound file for all files returned by + * info.files(). While, generally, this may include separate norms and + * deletion files, this SegmentInfo must not reference such files when this + * method is called, because they are not allowed within a compound file. + */ + static final Collection createCompoundFile(InfoStream infoStream, Directory directory, CheckAbort checkAbort, final SegmentInfo info, IOContext context) + throws IOException { + + final String fileName = IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_EXTENSION); + if (infoStream.isEnabled("IW")) { + infoStream.message("IW", "create compound file " + fileName); + } + assert Lucene3xSegmentInfoFormat.getDocStoreOffset(info) == -1; + // Now merge all added files + Collection files = info.files(); + CompoundFileDirectory cfsDir = new CompoundFileDirectory(directory, fileName, context, true); + boolean success = false; + try { + for (String file : files) { + directory.copy(cfsDir, file, file, context); + checkAbort.work(directory.fileLength(file)); + } + success = true; + } finally { + if (success) { + IOUtils.close(cfsDir); + } else { + IOUtils.closeWhileHandlingException(cfsDir); + try { + directory.deleteFile(fileName); + } catch (Throwable t) { + } + try { + directory.deleteFile(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + } catch (Throwable t) { + } + } + } + + // Replace all previous files with the CFS/CFE files: + Set siFiles = new HashSet<>(); + siFiles.add(fileName); + siFiles.add(IndexFileNames.segmentFileName(info.name, "", IndexFileNames.COMPOUND_FILE_ENTRIES_EXTENSION)); + info.setFiles(siFiles); + + return files; + } + + /** + * Tries to delete the given files if unreferenced + * @param files the files to delete + * @throws IOException if an {@link IOException} occurs + * @see IndexFileDeleter#deleteNewFiles(Collection) + */ + synchronized final void deleteNewFiles(Collection files) throws IOException { + deleter.deleteNewFiles(files); + } + + /** + * Cleans up residuals from a segment that could not be entirely flushed due to an error + * @see IndexFileDeleter#refresh(String) + */ + synchronized final void flushFailed(SegmentInfo info) throws IOException { + deleter.refresh(info.name); + } + + final int purge(boolean forced) throws IOException { + return docWriter.purgeBuffer(this, forced); + } + + final void applyDeletesAndPurge(boolean forcePurge) throws IOException { + try { + purge(forcePurge); + } finally { + applyAllDeletesAndUpdates(); + flushCount.incrementAndGet(); + } + } + + final void doAfterSegmentFlushed(boolean triggerMerge, boolean forcePurge) throws IOException { + try { + purge(forcePurge); + } finally { + if (triggerMerge) { + maybeMerge(config.getMergePolicy(), MergeTrigger.SEGMENT_FLUSH, UNBOUNDED_MAX_MERGE_SEGMENTS); + } + } + + } + + synchronized void incRefDeleter(SegmentInfos segmentInfos) throws IOException { + ensureOpen(); + deleter.incRef(segmentInfos, false); + } + + synchronized void decRefDeleter(SegmentInfos segmentInfos) throws IOException { + ensureOpen(); + deleter.decRef(segmentInfos); + } + + private boolean processEvents(boolean triggerMerge, boolean forcePurge) throws IOException { + return processEvents(eventQueue, triggerMerge, forcePurge); + } + + private boolean processEvents(Queue queue, boolean triggerMerge, boolean forcePurge) throws IOException { + Event event; + boolean processed = false; + while((event = queue.poll()) != null) { + processed = true; + event.process(this, triggerMerge, forcePurge); + } + return processed; + } + + /** + * Interface for internal atomic events. See {@link DocumentsWriter} for details. Events are executed concurrently and no order is guaranteed. + * Each event should only rely on the serializeability within it's process method. All actions that must happen before or after a certain action must be + * encoded inside the {@link #process(IndexWriter, boolean, boolean)} method. + * + */ + static interface Event { + + /** + * Processes the event. This method is called by the {@link IndexWriter} + * passed as the first argument. + * + * @param writer + * the {@link IndexWriter} that executes the event. + * @param triggerMerge + * false iff this event should not trigger any segment merges + * @param clearBuffers + * true iff this event should clear all buffers associated with the event. + * @throws IOException + * if an {@link IOException} occurs + */ + void process(IndexWriter writer, boolean triggerMerge, boolean clearBuffers) throws IOException; + } + + /** Used only by asserts: returns true if the file exists + * (can be opened), false if it cannot be opened, and + * (unlike Java's File.exists) throws IOException if + * there's some unexpected error. */ + private static boolean slowFileExists(Directory dir, String fileName) throws IOException { + try { + dir.openInput(fileName, IOContext.DEFAULT).close(); + return true; + } catch (NoSuchFileException | FileNotFoundException e) { + return false; + } + } + + /** Anything that will add N docs to the index should reserve first to + * make sure it's allowed. This will throw {@code + * IllegalStateException} if it's not allowed. */ + private void reserveDocs(int numDocs) { + if (pendingNumDocs.addAndGet(numDocs) > actualMaxDocs) { + // Reserve failed + pendingNumDocs.addAndGet(-numDocs); + throw new IllegalStateException("number of documents in the index cannot exceed " + actualMaxDocs); + } + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexWriterConfig.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexableField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IndexableFieldType.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/IntBlockPool.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/InvertedDocConsumer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/InvertedDocConsumerPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/InvertedDocConsumerPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/InvertedDocEndConsumer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/InvertedDocEndConsumerPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/InvertedDocEndConsumerPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/KeepOnlyLastCommitDeletionPolicy.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -26,25 +26,31 @@ * the default deletion policy. */ -public final class KeepOnlyLastCommitDeletionPolicy implements IndexDeletionPolicy { +public final class KeepOnlyLastCommitDeletionPolicy extends IndexDeletionPolicy { + /** Sole constructor. */ + public KeepOnlyLastCommitDeletionPolicy() { + } + /** * Deletes all commits except the most recent one. */ - public void onInit(List commits) { + @Override + public void onInit(List commits) { // Note that commits.size() should normally be 1: onCommit(commits); } /** * Deletes all commits except the most recent one. */ - public void onCommit(List commits) { + @Override + public void onCommit(List commits) { // Note that commits.size() should normally be 2 (if not // called by onInit above): int size = commits.size(); for(int i=0;iDetermines the largest segment (measured by total @@ -54,14 +62,31 @@ maxMergeSize = (long) (mb*1024*1024); } - /** Returns the largest segment (meaured by total byte + /** Returns the largest segment (measured by total byte * size of the segment's files, in MB) that may be merged * with other segments. * @see #setMaxMergeMB */ public double getMaxMergeMB() { return ((double) maxMergeSize)/1024/1024; } + /**

    Determines the largest segment (measured by total + * byte size of the segment's files, in MB) that may be + * merged with other segments during forceMerge. Setting + * it low will leave the index with more than 1 segment, + * even if {@link IndexWriter#forceMerge} is called.*/ + public void setMaxMergeMBForForcedMerge(double mb) { + maxMergeSizeForForcedMerge = (long) (mb*1024*1024); + } + + /** Returns the largest segment (measured by total byte + * size of the segment's files, in MB) that may be merged + * with other segments during forceMerge. + * @see #setMaxMergeMBForForcedMerge */ + public double getMaxMergeMBForForcedMerge() { + return ((double) maxMergeSizeForForcedMerge)/1024/1024; + } + /** Sets the minimum size for the lowest level segments. * Any segments below this size are considered to be on * the same level (even if they vary drastically in size) @@ -82,4 +107,3 @@ return ((double) minMergeSize)/1024/1024; } } - Index: 3rdParty_sources/lucene/org/apache/lucene/index/LogDocMergePolicy.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/LogDocMergePolicy.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/LogDocMergePolicy.java 17 Aug 2012 14:55:03 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/LogDocMergePolicy.java 16 Dec 2014 11:31:41 -0000 1.1.2.1 @@ -1,6 +1,8 @@ package org.apache.lucene.index; -/** +import java.io.IOException; + +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -26,16 +28,20 @@ /** Default minimum segment size. @see setMinMergeDocs */ public static final int DEFAULT_MIN_MERGE_DOCS = 1000; + /** Sole constructor, setting all settings to their + * defaults. */ public LogDocMergePolicy() { - super(); minMergeSize = DEFAULT_MIN_MERGE_DOCS; - - // maxMergeSize is never used by LogDocMergePolicy; set + + // maxMergeSize(ForForcedMerge) are never used by LogDocMergePolicy; set // it to Long.MAX_VALUE to disable it maxMergeSize = Long.MAX_VALUE; + maxMergeSizeForForcedMerge = Long.MAX_VALUE; } - protected long size(SegmentInfo info) { - return info.docCount; + + @Override + protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { + return sizeDocs(info, writer); } /** Sets the minimum size for the lowest level segments. @@ -58,4 +64,3 @@ return (int) minMergeSize; } } - Index: 3rdParty_sources/lucene/org/apache/lucene/index/LogMergePolicy.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/LogMergePolicy.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/LogMergePolicy.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/LogMergePolicy.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,15 +18,22 @@ */ import java.io.IOException; -import java.util.Set; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Locale; +import java.util.Map; -import org.apache.lucene.store.Directory; -/**

    This class implements a {@link MergePolicy} that tries - * to merge segments into levels of exponentially - * increasing size, where each level has < mergeFactor - * segments in it. Whenever a given levle has mergeFactor - * segments or more in it, they will be merged.

    +/** + *

    This class implements a {@link MergePolicy} that tries + * to merge segments into levels of exponentially + * increasing size, where each level has fewer segments than + * the value of the merge factor. Whenever extra segments + * (beyond the merge factor upper bound) are encountered, + * all segments within the level are merged. You can get or + * set the merge factor using {@link #getMergeFactor()} and + * {@link #setMergeFactor(int)} respectively.

    * *

    This class is abstract and requires a subclass to * define the {@link #size} method which specifies how a @@ -41,7 +48,7 @@ /** Defines the allowed range of log(size) for each * level. A level is computed by taking the max segment - * log size, minuse LEVEL_LOG_SPAN, and finding all + * log size, minus LEVEL_LOG_SPAN, and finding all * segments falling within that range. */ public static final double LEVEL_LOG_SPAN = 0.75; @@ -53,21 +60,57 @@ * or larger will never be merged. @see setMaxMergeDocs */ public static final int DEFAULT_MAX_MERGE_DOCS = Integer.MAX_VALUE; - private int mergeFactor = DEFAULT_MERGE_FACTOR; + /** Default noCFSRatio. If a merge's size is >= 10% of + * the index, then we disable compound file for it. + * @see MergePolicy#setNoCFSRatio */ + public static final double DEFAULT_NO_CFS_RATIO = 0.1; - long minMergeSize; - long maxMergeSize; - int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + /** How many segments to merge at a time. */ + protected int mergeFactor = DEFAULT_MERGE_FACTOR; - private boolean useCompoundFile = true; - private boolean useCompoundDocStore = true; - private IndexWriter writer; + /** Any segments whose size is smaller than this value + * will be rounded up to this value. This ensures that + * tiny segments are aggressively merged. */ + protected long minMergeSize; - private void message(String message) { - if (writer != null) - writer.message("LMP: " + message); + /** If the size of a segment exceeds this value then it + * will never be merged. */ + protected long maxMergeSize; + + // Although the core MPs set it explicitly, we must default in case someone + // out there wrote his own LMP ... + /** If the size of a segment exceeds this value then it + * will never be merged during {@link IndexWriter#forceMerge}. */ + protected long maxMergeSizeForForcedMerge = Long.MAX_VALUE; + + /** If a segment has more than this many documents then it + * will never be merged. */ + protected int maxMergeDocs = DEFAULT_MAX_MERGE_DOCS; + + /** If true, we pro-rate a segment's size by the + * percentage of non-deleted documents. */ + protected boolean calibrateSizeByDeletes = true; + + /** Sole constructor. (For invocation by subclass + * constructors, typically implicit.) */ + public LogMergePolicy() { + super(DEFAULT_NO_CFS_RATIO, MergePolicy.DEFAULT_MAX_CFS_SEGMENT_SIZE); } + /** Returns true if {@code LMP} is enabled in {@link + * IndexWriter}'s {@code infoStream}. */ + protected boolean verbose(IndexWriter writer) { + return writer != null && writer.infoStream.isEnabled("LMP"); + } + + /** Print a debug message to {@link IndexWriter}'s {@code + * infoStream}. */ + protected void message(String message, IndexWriter writer) { + if (verbose(writer)) { + writer.infoStream.message("LMP", message); + } + } + /**

    Returns the number of segments that are merged at * once and also controls the total number of segments * allowed to accumulate in the index.

    */ @@ -77,10 +120,10 @@ /** Determines how often segment indices are merged by * addDocument(). With smaller values, less RAM is used - * while indexing, and searches on unoptimized indices are + * while indexing, and searches are * faster, but indexing speed is slower. With larger * values, more RAM is used during indexing, and while - * searches on unoptimized indices are slower, indexing is + * searches is slower, indexing is * faster. Thus larger values (> 10) are best for batch * index creation, and smaller values (< 10) for indices * that are interactively maintained. */ @@ -90,237 +133,360 @@ this.mergeFactor = mergeFactor; } - // Javadoc inherited - public boolean useCompoundFile(SegmentInfos infos, SegmentInfo info) { - return useCompoundFile; + /** Sets whether the segment size should be calibrated by + * the number of deletes when choosing segments for merge. */ + public void setCalibrateSizeByDeletes(boolean calibrateSizeByDeletes) { + this.calibrateSizeByDeletes = calibrateSizeByDeletes; } - /** Sets whether compound file format should be used for - * newly flushed and newly merged segments. */ - public void setUseCompoundFile(boolean useCompoundFile) { - this.useCompoundFile = useCompoundFile; + /** Returns true if the segment size should be calibrated + * by the number of deletes when choosing segments for merge. */ + public boolean getCalibrateSizeByDeletes() { + return calibrateSizeByDeletes; } - /** Returns true if newly flushed and newly merge segments - * are written in compound file format. @see - * #setUseCompoundFile */ - public boolean getUseCompoundFile() { - return useCompoundFile; + /** Return the number of documents in the provided {@link + * SegmentCommitInfo}, pro-rated by percentage of + * non-deleted documents if {@link + * #setCalibrateSizeByDeletes} is set. */ + protected long sizeDocs(SegmentCommitInfo info, IndexWriter writer) throws IOException { + if (calibrateSizeByDeletes) { + int delCount = writer.numDeletedDocs(info); + assert delCount <= info.info.getDocCount(); + return (info.info.getDocCount() - (long)delCount); + } else { + return info.info.getDocCount(); + } } - // Javadoc inherited - public boolean useCompoundDocStore(SegmentInfos infos) { - return useCompoundDocStore; + /** Return the byte size of the provided {@link + * SegmentCommitInfo}, pro-rated by percentage of + * non-deleted documents if {@link + * #setCalibrateSizeByDeletes} is set. */ + protected long sizeBytes(SegmentCommitInfo info, IndexWriter writer) throws IOException { + if (calibrateSizeByDeletes) { + return super.size(info, writer); + } + return info.sizeInBytes(); } + + /** Returns true if the number of segments eligible for + * merging is less than or equal to the specified {@code + * maxNumSegments}. */ + protected boolean isMerged(SegmentInfos infos, int maxNumSegments, Map segmentsToMerge, IndexWriter writer) throws IOException { + final int numSegments = infos.size(); + int numToMerge = 0; + SegmentCommitInfo mergeInfo = null; + boolean segmentIsOriginal = false; + for(int i=0;i segments = infos.asList(); - public void close() {} - - abstract protected long size(SegmentInfo info) throws IOException; - - private boolean isOptimized(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException { - final int numSegments = infos.size(); - int numToOptimize = 0; - SegmentInfo optimizeInfo = null; - for(int i=0;i= 0) { + SegmentCommitInfo info = infos.info(start); + if (size(info, writer) > maxMergeSizeForForcedMerge || sizeDocs(info, writer) > maxMergeDocs) { + if (verbose(writer)) { + message("findForcedMergesSizeLimit: skip segment=" + info + ": size is > maxMergeSize (" + maxMergeSizeForForcedMerge + ") or sizeDocs is > maxMergeDocs (" + maxMergeDocs + ")", writer); + } + // need to skip that segment + add a merge for the 'right' segments, + // unless there is only 1 which is merged. + if (last - start - 1 > 1 || (start != last - 1 && !isMerged(infos, infos.info(start + 1), writer))) { + // there is more than 1 segment to the right of + // this one, or a mergeable single segment. + spec.add(new OneMerge(segments.subList(start + 1, last))); + } + last = start; + } else if (last - start == mergeFactor) { + // mergeFactor eligible segments were found, add them as a merge. + spec.add(new OneMerge(segments.subList(start, last))); + last = start; } + --start; } - return numToOptimize <= maxNumSegments && - (numToOptimize != 1 || isOptimized(writer, optimizeInfo)); - } + // Add any left-over segments, unless there is just 1 + // already fully merged + if (last > 0 && (++start + 1 < last || !isMerged(infos, infos.info(start), writer))) { + spec.add(new OneMerge(segments.subList(start, last))); + } - /** Returns true if this single nfo is optimized (has no - * pending norms or deletes, is in the same dir as the - * writer, and matches the current compound file setting */ - private boolean isOptimized(IndexWriter writer, SegmentInfo info) - throws IOException { - return !info.hasDeletions() && - !info.hasSeparateNorms() && - info.dir == writer.getDirectory() && - info.getUseCompoundFile() == useCompoundFile; + return spec.merges.size() == 0 ? null : spec; } + + /** + * Returns the merges necessary to forceMerge the index. This method constraints + * the returned merges only by the {@code maxNumSegments} parameter, and + * guaranteed that exactly that number of segments will remain in the index. + */ + private MergeSpecification findForcedMergesMaxNumSegments(SegmentInfos infos, int maxNumSegments, int last, IndexWriter writer) throws IOException { + MergeSpecification spec = new MergeSpecification(); + final List segments = infos.asList(); - /** Returns the merges necessary to optimize the index. - * This merge policy defines "optimized" to mean only one - * segment in the index, where that segment has no - * deletions pending nor separate norms, and it is in - * compound file format if the current useCompoundFile - * setting is true. This method returns multiple merges - * (mergeFactor at a time) so the {@link MergeScheduler} - * in use may make use of concurrency. */ - public MergeSpecification findMergesForOptimize(SegmentInfos infos, IndexWriter writer, int maxNumSegments, Set segmentsToOptimize) throws IOException { - MergeSpecification spec; + // First, enroll all "full" merges (size + // mergeFactor) to potentially be run concurrently: + while (last - maxNumSegments + 1 >= mergeFactor) { + spec.add(new OneMerge(segments.subList(last - mergeFactor, last))); + last -= mergeFactor; + } - assert maxNumSegments > 0; + // Only if there are no full merges pending do we + // add a final partial (< mergeFactor segments) merge: + if (0 == spec.merges.size()) { + if (maxNumSegments == 1) { - if (!isOptimized(infos, writer, maxNumSegments, segmentsToOptimize)) { - - // Find the newest (rightmost) segment that needs to - // be optimized (other segments may have been flushed - // since optimize started): - int last = infos.size(); - while(last > 0) { - final SegmentInfo info = infos.info(--last); - if (segmentsToOptimize.contains(info)) { - last++; - break; + // Since we must merge down to 1 segment, the + // choice is simple: + if (last > 1 || !isMerged(infos, infos.info(0), writer)) { + spec.add(new OneMerge(segments.subList(0, last))); } - } + } else if (last > maxNumSegments) { - if (last > 0) { + // Take care to pick a partial merge that is + // least cost, but does not make the index too + // lopsided. If we always just picked the + // partial tail then we could produce a highly + // lopsided index over time: - spec = new MergeSpecification(); + // We must merge this many segments to leave + // maxNumSegments in the index (from when + // forceMerge was first kicked off): + final int finalMergeSize = last - maxNumSegments + 1; - // First, enroll all "full" merges (size - // mergeFactor) to potentially be run concurrently: - while (last - maxNumSegments + 1 >= mergeFactor) { - spec.add(new OneMerge(infos.range(last-mergeFactor, last), useCompoundFile)); - last -= mergeFactor; + // Consider all possible starting points: + long bestSize = 0; + int bestStart = 0; + + for(int i=0;i segmentsToMerge, IndexWriter writer) throws IOException { - // Since we must optimize down to 1 segment, the - // choice is simple: - if (last > 1 || !isOptimized(writer, infos.info(0))) - spec.add(new OneMerge(infos.range(0, last), useCompoundFile)); - } else if (last > maxNumSegments) { + assert maxNumSegments > 0; + if (verbose(writer)) { + message("findForcedMerges: maxNumSegs=" + maxNumSegments + " segsToMerge="+ segmentsToMerge, writer); + } - // Take care to pick a partial merge that is - // least cost, but does not make the index too - // lopsided. If we always just picked the - // partial tail then we could produce a highly - // lopsided index over time: + // If the segments are already merged (e.g. there's only 1 segment), or + // there are 0) { + final SegmentCommitInfo info = infos.info(--last); + if (segmentsToMerge.get(info) != null) { + last++; + break; + } + } - // Consider all possible starting points: - long bestSize = 0; - int bestStart = 0; + if (last == 0) { + if (verbose(writer)) { + message("last == 0; skip", writer); + } + return null; + } + + // There is only one segment already, and it is merged + if (maxNumSegments == 1 && last == 1 && isMerged(infos, infos.info(0), writer)) { + if (verbose(writer)) { + message("already 1 seg; skip", writer); + } + return null; + } - for(int i=0;i maxMergeSizeForForcedMerge || sizeDocs(info, writer) > maxMergeDocs) { + anyTooLarge = true; + break; + } + } - spec.add(new OneMerge(infos.range(bestStart, bestStart+finalMergeSize), useCompoundFile)); - } - } - - } else - spec = null; - } else - spec = null; - - return spec; + if (anyTooLarge) { + return findForcedMergesSizeLimit(infos, maxNumSegments, last, writer); + } else { + return findForcedMergesMaxNumSegments(infos, maxNumSegments, last, writer); + } } /** - * Finds merges necessary to expunge all deletes from the + * Finds merges necessary to force-merge all deletes from the * index. We simply merge adjacent segments that have * deletes, up to mergeFactor at a time. */ - public MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos, - IndexWriter writer) - throws CorruptIndexException, IOException - { - this.writer = writer; + @Override + public MergeSpecification findForcedDeletesMerges(SegmentInfos segmentInfos, IndexWriter writer) + throws IOException { + final List segments = segmentInfos.asList(); + final int numSegments = segments.size(); - final int numSegments = segmentInfos.size(); + if (verbose(writer)) { + message("findForcedDeleteMerges: " + numSegments + " segments", writer); + } - message("findMergesToExpungeDeletes: " + numSegments + " segments"); - MergeSpecification spec = new MergeSpecification(); int firstSegmentWithDeletions = -1; + assert writer != null; for(int i=0;i 0) { + if (verbose(writer)) { + message(" segment " + info.info.name + " has deletions", writer); + } if (firstSegmentWithDeletions == -1) firstSegmentWithDeletions = i; else if (i - firstSegmentWithDeletions == mergeFactor) { // We've seen mergeFactor segments in a row with // deletions, so force a merge now: - message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile)); + if (verbose(writer)) { + message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", writer); + } + spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = i; } } else if (firstSegmentWithDeletions != -1) { // End of a sequence of segments with deletions, so, // merge those past segments even if it's fewer than // mergeFactor segments - message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive"); - spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, i), useCompoundFile)); + if (verbose(writer)) { + message(" add merge " + firstSegmentWithDeletions + " to " + (i-1) + " inclusive", writer); + } + spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, i))); firstSegmentWithDeletions = -1; } } if (firstSegmentWithDeletions != -1) { - message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive"); - spec.add(new OneMerge(segmentInfos.range(firstSegmentWithDeletions, numSegments), useCompoundFile)); + if (verbose(writer)) { + message(" add merge " + firstSegmentWithDeletions + " to " + (numSegments-1) + " inclusive", writer); + } + spec.add(new OneMerge(segments.subList(firstSegmentWithDeletions, numSegments))); } return spec; } + private static class SegmentInfoAndLevel implements Comparable { + SegmentCommitInfo info; + float level; + int index; + + public SegmentInfoAndLevel(SegmentCommitInfo info, float level, int index) { + this.info = info; + this.level = level; + this.index = index; + } + + // Sorts largest to smallest + @Override + public int compareTo(SegmentInfoAndLevel other) { + return Float.compare(other.level, level); + } + } + /** Checks if any merges are now necessary and returns a * {@link MergePolicy.MergeSpecification} if so. A merge * is necessary when there are more than {@link * #setMergeFactor} segments at a given level. When * multiple levels have too many segments, this method * will return multiple merges, allowing the {@link * MergeScheduler} to use concurrency. */ - public MergeSpecification findMerges(SegmentInfos infos, IndexWriter writer) throws IOException { + @Override + public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos infos, IndexWriter writer) throws IOException { final int numSegments = infos.size(); - this.writer = writer; - message("findMerges: " + numSegments + " segments"); + if (verbose(writer)) { + message("findMerges: " + numSegments + " segments", writer); + } // Compute levels, which is just log (base mergeFactor) // of the size of each segment - float[] levels = new float[numSegments]; + final List levels = new ArrayList<>(); final float norm = (float) Math.log(mergeFactor); - final Directory directory = writer.getDirectory(); + final Collection mergingSegments = writer.getMergingSegments(); for(int i=0;i= maxMergeSize) { + extra += " [skip: too large]"; + } + message("seg=" + writer.segString(info) + " level=" + infoLevel.level + " size=" + String.format(Locale.ROOT, "%.3f MB", segBytes/1024/1024.) + extra, writer); + } } final float levelFloor; @@ -338,57 +504,78 @@ MergeSpecification spec = null; + final int numMergeableSegments = levels.size(); + int start = 0; - while(start < numSegments) { + while(start < numMergeableSegments) { // Find max level of all segments not already // quantized. - float maxLevel = levels[start]; - for(int i=1+start;i maxLevel) + float maxLevel = levels.get(start).level; + for(int i=1+start;i maxLevel) { maxLevel = level; + } } // Now search backwards for the rightmost segment that // falls into this level: float levelBottom; - if (maxLevel < levelFloor) + if (maxLevel <= levelFloor) { // All remaining segments fall into the min level levelBottom = -1.0F; - else { + } else { levelBottom = (float) (maxLevel - LEVEL_LOG_SPAN); // Force a boundary at the level floor - if (levelBottom < levelFloor && maxLevel >= levelFloor) + if (levelBottom < levelFloor && maxLevel >= levelFloor) { levelBottom = levelFloor; + } } - int upto = numSegments-1; + int upto = numMergeableSegments-1; while(upto >= start) { - if (levels[upto] >= levelBottom) { + if (levels.get(upto).level >= levelBottom) { break; } upto--; } - message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments"); + if (verbose(writer)) { + message(" level " + levelBottom + " to " + maxLevel + ": " + (1+upto-start) + " segments", writer); + } // Finally, record all merges that are viable at this level: int end = start + mergeFactor; while(end <= 1+upto) { boolean anyTooLarge = false; + boolean anyMerging = false; for(int i=start;i= maxMergeSize || info.docCount >= maxMergeDocs); + final SegmentCommitInfo info = levels.get(i).info; + anyTooLarge |= (size(info, writer) >= maxMergeSize || sizeDocs(info, writer) >= maxMergeDocs); + if (mergingSegments.contains(info)) { + anyMerging = true; + break; + } } - if (!anyTooLarge) { + if (anyMerging) { + // skip + } else if (!anyTooLarge) { if (spec == null) spec = new MergeSpecification(); - message(" " + start + " to " + end + ": add this merge"); - spec.add(new OneMerge(infos.range(start, end), useCompoundFile)); - } else - message(" " + start + " to " + end + ": contains segment over maxMergeSize or maxMergeDocs; skipping"); + final List mergeInfos = new ArrayList<>(); + for(int i=start;iExpert: a MergePolicy determines the sequence of - * primitive merge operations to be used for overall merge - * and optimize operations.

    + * primitive merge operations.

    * *

    Whenever the segments in an index have been altered by * {@link IndexWriter}, either the addition of a newly @@ -38,8 +40,8 @@ * merges that are now required. This method returns a * {@link MergeSpecification} instance describing the set of * merges that should be done, or null if no merges are - * necessary. When IndexWriter.optimize is called, it calls - * {@link #findMergesForOptimize} and the MergePolicy should + * necessary. When IndexWriter.forceMerge is called, it calls + * {@link #findForcedMerges(SegmentInfos,int,Map, IndexWriter)} and the MergePolicy should * then return the necessary merges.

    * *

    Note that the policy can return more than one merge at @@ -49,13 +51,37 @@ * ConcurrentMergeScheduler} they will be run concurrently.

    * *

    The default MergePolicy is {@link - * LogByteSizeMergePolicy}.

    - *

    NOTE: This API is new and still experimental - * (subject to change suddenly in the next release)

    + * TieredMergePolicy}.

    + * + * @lucene.experimental */ - public abstract class MergePolicy { + /** A map of doc IDs. */ + public static abstract class DocMap { + /** Sole constructor, typically invoked from sub-classes constructors. */ + protected DocMap() {} + + /** Return the new doc ID according to its old value. */ + public abstract int map(int old); + + /** Useful from an assert. */ + boolean isConsistent(int maxDoc) { + final FixedBitSet targets = new FixedBitSet(maxDoc); + for (int i = 0; i < maxDoc; ++i) { + final int target = map(i); + if (target < 0 || target >= maxDoc) { + assert false : "out of range: " + target + " not in [0-" + maxDoc + "["; + return false; + } else if (targets.get(target)) { + assert false : target + " is already taken (" + i + ")"; + return false; + } + } + return true; + } + } + /** OneMerge provides the information necessary to perform * an individual primitive merge operation, resulting in * a single new segment. The merge spec includes the @@ -64,28 +90,85 @@ public static class OneMerge { - SegmentInfo info; // used by IndexWriter - boolean mergeDocStores; // used by IndexWriter - boolean optimize; // used by IndexWriter - SegmentInfos segmentsClone; // used by IndexWriter - boolean increfDone; // used by IndexWriter + SegmentCommitInfo info; // used by IndexWriter boolean registerDone; // used by IndexWriter long mergeGen; // used by IndexWriter boolean isExternal; // used by IndexWriter - int maxNumSegmentsOptimize; // used by IndexWriter + int maxNumSegments = -1; // used by IndexWriter - final SegmentInfos segments; - final boolean useCompoundFile; + /** Estimated size in bytes of the merged segment. */ + public volatile long estimatedMergeBytes; // used by IndexWriter + + // Sum of sizeInBytes of all SegmentInfos; set by IW.mergeInit + volatile long totalMergeBytes; + + List readers; // used by IndexWriter + + /** Segments to be merged. */ + public final List segments; + + /** Total number of documents in segments to be merged, not accounting for deletions. */ + public final int totalDocCount; boolean aborted; Throwable error; + boolean paused; - public OneMerge(SegmentInfos segments, boolean useCompoundFile) { + /** Sole constructor. + * @param segments List of {@link SegmentCommitInfo}s + * to be merged. */ + public OneMerge(List segments) { if (0 == segments.size()) throw new RuntimeException("segments must include at least one segment"); - this.segments = segments; - this.useCompoundFile = useCompoundFile; + // clone the list, as the in list may be based off original SegmentInfos and may be modified + this.segments = new ArrayList<>(segments); + int count = 0; + for(SegmentCommitInfo info : segments) { + count += info.info.getDocCount(); + } + totalDocCount = count; } + /** Expert: Get the list of readers to merge. Note that this list does not + * necessarily match the list of segments to merge and should only be used + * to feed SegmentMerger to initialize a merge. When a {@link OneMerge} + * reorders doc IDs, it must override {@link #getDocMap} too so that + * deletes that happened during the merge can be applied to the newly + * merged segment. */ + public List getMergeReaders() throws IOException { + if (readers == null) { + throw new IllegalStateException("IndexWriter has not initialized readers from the segment infos yet"); + } + final List readers = new ArrayList<>(this.readers.size()); + for (AtomicReader reader : this.readers) { + if (reader.numDocs() > 0) { + readers.add(reader); + } + } + return Collections.unmodifiableList(readers); + } + + /** + * Expert: Sets the {@link SegmentCommitInfo} of this {@link OneMerge}. + * Allows sub-classes to e.g. set diagnostics properties. + */ + public void setInfo(SegmentCommitInfo info) { + this.info = info; + } + + /** Expert: If {@link #getMergeReaders()} reorders document IDs, this method + * must be overridden to return a mapping from the natural doc ID + * (the doc ID that would result from a natural merge) to the actual doc + * ID. This mapping is used to apply deletions that happened during the + * merge to the new segment. */ + public DocMap getDocMap(MergeState mergeState) { + return new DocMap() { + @Override + public int map(int docID) { + return docID; + } + }; + } + /** Record that an exception occurred while executing * this merge */ synchronized void setException(Throwable error) { @@ -103,31 +186,99 @@ * not be committed. */ synchronized void abort() { aborted = true; + notifyAll(); } /** Returns true if this merge was aborted. */ synchronized boolean isAborted() { return aborted; } - synchronized void checkAborted(Directory dir) throws MergeAbortedException { - if (aborted) + /** Called periodically by {@link IndexWriter} while + * merging to see if the merge is aborted. */ + public synchronized void checkAborted(Directory dir) throws MergeAbortedException { + if (aborted) { throw new MergeAbortedException("merge is aborted: " + segString(dir)); + } + + while (paused) { + try { + // In theory we could wait() indefinitely, but we + // do 1000 msec, defensively + wait(1000); + } catch (InterruptedException ie) { + throw new RuntimeException(ie); + } + if (aborted) { + throw new MergeAbortedException("merge is aborted: " + segString(dir)); + } + } } - String segString(Directory dir) { - StringBuffer b = new StringBuffer(); + /** Set or clear whether this merge is paused paused (for example + * {@link ConcurrentMergeScheduler} will pause merges + * if too many are running). */ + synchronized public void setPause(boolean paused) { + this.paused = paused; + if (!paused) { + // Wakeup merge thread, if it's waiting + notifyAll(); + } + } + + /** Returns true if this merge is paused. + * + * @see #setPause(boolean) */ + synchronized public boolean getPause() { + return paused; + } + + /** Returns a readable description of the current merge + * state. */ + public String segString(Directory dir) { + StringBuilder b = new StringBuilder(); final int numSegments = segments.size(); for(int i=0;i 0) b.append(' '); - b.append(segments.info(i).segString(dir)); + b.append(segments.get(i).toString(dir, 0)); } - if (info != null) - b.append(" into ").append(info.name); - if (optimize) - b.append(" [optimize]"); + if (info != null) { + b.append(" into ").append(info.info.name); + } + if (maxNumSegments != -1) + b.append(" [maxNumSegments=" + maxNumSegments + "]"); + if (aborted) { + b.append(" [ABORTED]"); + } return b.toString(); } + + /** + * Returns the total size in bytes of this merge. Note that this does not + * indicate the size of the merged segment, but the + * input total size. This is only set once the merge is + * initialized by IndexWriter. + */ + public long totalBytesSize() throws IOException { + return totalMergeBytes; + } + + /** + * Returns the total number of documents that are included with this merge. + * Note that this does not indicate the number of documents after the merge. + * */ + public int totalNumDocs() throws IOException { + int total = 0; + for (SegmentCommitInfo info : segments) { + total += info.info.getDocCount(); + } + return total; + } + + /** Return {@link MergeInfo} describing this merge. */ + public MergeInfo getMergeInfo() { + return new MergeInfo(totalDocCount, estimatedMergeBytes, isExternal, maxNumSegments); + } } /** @@ -142,18 +293,27 @@ * The subset of segments to be included in the primitive merge. */ - public List merges = new ArrayList(); + public final List merges = new ArrayList<>(); + /** Sole constructor. Use {@link + * #add(MergePolicy.OneMerge)} to add merges. */ + public MergeSpecification() { + } + + /** Adds the provided {@link OneMerge} to this + * specification. */ public void add(OneMerge merge) { merges.add(merge); } + /** Returns a description of the merges in this + * specification. */ public String segString(Directory dir) { - StringBuffer b = new StringBuffer(); + StringBuilder b = new StringBuilder(); b.append("MergeSpec:\n"); final int count = merges.size(); for(int i=0;ifalse. Normally this exception is + * privately caught and suppresed by {@link IndexWriter}. */ public static class MergeAbortedException extends IOException { + /** Create a {@link MergeAbortedException}. */ public MergeAbortedException() { super("merge is aborted"); } + + /** Create a {@link MergeAbortedException} with a + * specified message. */ public MergeAbortedException(String message) { super(message); } } - + /** - * Determine what set of merge operations are now - * necessary on the index. The IndexWriter calls this - * whenever there is a change to the segments. This call - * is always synchronized on the IndexWriter instance so - * only one thread at a time will call this method. - * - * @param segmentInfos the total set of segments in the index - * @param writer IndexWriter instance + * Default ratio for compound file system usage. Set to 1.0, always use + * compound file system. */ - abstract MergeSpecification findMerges(SegmentInfos segmentInfos, - IndexWriter writer) - throws CorruptIndexException, IOException; + protected static final double DEFAULT_NO_CFS_RATIO = 1.0; /** - * Determine what set of merge operations is necessary in - * order to optimize the index. The IndexWriter calls - * this when its optimize() method is called. This call - * is always synchronized on the IndexWriter instance so - * only one thread at a time will call this method. - * - * @param segmentInfos the total set of segments in the index - * @param writer IndexWriter instance - * @param maxSegmentCount requested maximum number of - * segments in the index (currently this is always 1) - * @param segmentsToOptimize contains the specific - * SegmentInfo instances that must be merged away. This - * may be a subset of all SegmentInfos. + * Default max segment size in order to use compound file system. Set to {@link Long#MAX_VALUE}. */ - abstract MergeSpecification findMergesForOptimize(SegmentInfos segmentInfos, - IndexWriter writer, - int maxSegmentCount, - Set segmentsToOptimize) - throws CorruptIndexException, IOException; + protected static final long DEFAULT_MAX_CFS_SEGMENT_SIZE = Long.MAX_VALUE; + /** If the size of the merge segment exceeds this ratio of + * the total index size then it will remain in + * non-compound format */ + protected double noCFSRatio = DEFAULT_NO_CFS_RATIO; + + /** If the size of the merged segment exceeds + * this value then it will not use compound file format. */ + protected long maxCFSSegmentSize = DEFAULT_MAX_CFS_SEGMENT_SIZE; + /** - * Determine what set of merge operations is necessary in - * order to expunge all deletes from the index. - * @param segmentInfos the total set of segments in the index - * @param writer IndexWriter instance + * Creates a new merge policy instance. */ - MergeSpecification findMergesToExpungeDeletes(SegmentInfos segmentInfos, - IndexWriter writer) - throws CorruptIndexException, IOException - { - throw new RuntimeException("not implemented"); + public MergePolicy() { + this(DEFAULT_NO_CFS_RATIO, DEFAULT_MAX_CFS_SEGMENT_SIZE); } + + /** + * Creates a new merge policy instance with default settings for noCFSRatio + * and maxCFSSegmentSize. This ctor should be used by subclasses using different + * defaults than the {@link MergePolicy} + */ + protected MergePolicy(double defaultNoCFSRatio, long defaultMaxCFSSegmentSize) { + this.noCFSRatio = defaultNoCFSRatio; + this.maxCFSSegmentSize = defaultMaxCFSSegmentSize; + } /** - * Release all resources for the policy. + * Determine what set of merge operations are now necessary on the index. + * {@link IndexWriter} calls this whenever there is a change to the segments. + * This call is always synchronized on the {@link IndexWriter} instance so + * only one thread at a time will call this method. + * @param mergeTrigger the event that triggered the merge + * @param segmentInfos + * the total set of segments in the index + * @param writer the IndexWriter to find the merges on */ - abstract void close(); + public abstract MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) + throws IOException; /** - * Returns true if a newly flushed (not from merge) - * segment should use the compound file format. + * Determine what set of merge operations is necessary in + * order to merge to <= the specified segment count. {@link IndexWriter} calls this when its + * {@link IndexWriter#forceMerge} method is called. This call is always + * synchronized on the {@link IndexWriter} instance so only one thread at a + * time will call this method. + * + * @param segmentInfos + * the total set of segments in the index + * @param maxSegmentCount + * requested maximum number of segments in the index (currently this + * is always 1) + * @param segmentsToMerge + * contains the specific SegmentInfo instances that must be merged + * away. This may be a subset of all + * SegmentInfos. If the value is True for a + * given SegmentInfo, that means this segment was + * an original segment present in the + * to-be-merged index; else, it was a segment + * produced by a cascaded merge. + * @param writer the IndexWriter to find the merges on */ - abstract boolean useCompoundFile(SegmentInfos segments, SegmentInfo newSegment); + public abstract MergeSpecification findForcedMerges( + SegmentInfos segmentInfos, int maxSegmentCount, Map segmentsToMerge, IndexWriter writer) + throws IOException; /** - * Returns true if the doc store files should use the - * compound file format. + * Determine what set of merge operations is necessary in order to expunge all + * deletes from the index. + * + * @param segmentInfos + * the total set of segments in the index + * @param writer the IndexWriter to find the merges on */ - abstract boolean useCompoundDocStore(SegmentInfos segments); + public abstract MergeSpecification findForcedDeletesMerges( + SegmentInfos segmentInfos, IndexWriter writer) throws IOException; + + /** + * Returns true if a new segment (regardless of its origin) should use the + * compound file format. The default implementation returns true + * iff the size of the given mergedInfo is less or equal to + * {@link #getMaxCFSSegmentSizeMB()} and the size is less or equal to the + * TotalIndexSize * {@link #getNoCFSRatio()} otherwise false. + */ + public boolean useCompoundFile(SegmentInfos infos, SegmentCommitInfo mergedInfo, IndexWriter writer) throws IOException { + if (getNoCFSRatio() == 0.0) { + return false; + } + long mergedInfoSize = size(mergedInfo, writer); + if (mergedInfoSize > maxCFSSegmentSize) { + return false; + } + if (getNoCFSRatio() >= 1.0) { + return true; + } + long totalSize = 0; + for (SegmentCommitInfo info : infos) { + totalSize += size(info, writer); + } + return mergedInfoSize <= getNoCFSRatio() * totalSize; + } + + /** Return the byte size of the provided {@link + * SegmentCommitInfo}, pro-rated by percentage of + * non-deleted documents is set. */ + protected long size(SegmentCommitInfo info, IndexWriter writer) throws IOException { + long byteSize = info.sizeInBytes(); + int delCount = writer.numDeletedDocs(info); + double delRatio = (info.info.getDocCount() <= 0 ? 0.0f : ((float)delCount / (float)info.info.getDocCount())); + assert delRatio <= 1.0; + return (info.info.getDocCount() <= 0 ? byteSize : (long)(byteSize * (1.0 - delRatio))); + } + + /** Returns true if this single info is already fully merged (has no + * pending deletes, is in the same dir as the + * writer, and matches the current compound file setting */ + protected final boolean isMerged(SegmentInfos infos, SegmentCommitInfo info, IndexWriter writer) throws IOException { + assert writer != null; + boolean hasDeletions = writer.numDeletedDocs(info) > 0; + return !hasDeletions && + !info.info.hasSeparateNorms() && + info.info.dir == writer.getDirectory() && + useCompoundFile(infos, info, writer) == info.info.getUseCompoundFile(); + } + + /** Returns current {@code noCFSRatio}. + * + * @see #setNoCFSRatio */ + public final double getNoCFSRatio() { + return noCFSRatio; + } + + /** If a merged segment will be more than this percentage + * of the total size of the index, leave the segment as + * non-compound file even if compound file is enabled. + * Set to 1.0 to always use CFS regardless of merge + * size. */ + public final void setNoCFSRatio(double noCFSRatio) { + if (noCFSRatio < 0.0 || noCFSRatio > 1.0) { + throw new IllegalArgumentException("noCFSRatio must be 0.0 to 1.0 inclusive; got " + noCFSRatio); + } + this.noCFSRatio = noCFSRatio; + } + + /** Returns the largest size allowed for a compound file segment */ + public final double getMaxCFSSegmentSizeMB() { + return maxCFSSegmentSize/1024/1024.; + } + + /** If a merged segment will be more than this value, + * leave the segment as + * non-compound file even if compound file is enabled. + * Set this to Double.POSITIVE_INFINITY (default) and noCFSRatio to 1.0 + * to always use CFS regardless of merge size. */ + public final void setMaxCFSSegmentSizeMB(double v) { + if (v < 0.0) { + throw new IllegalArgumentException("maxCFSSegmentSizeMB must be >=0 (got " + v + ")"); + } + v *= 1024 * 1024; + this.maxCFSSegmentSize = (v > Long.MAX_VALUE) ? Long.MAX_VALUE : (long) v; + } + } Index: 3rdParty_sources/lucene/org/apache/lucene/index/MergeScheduler.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/MergeScheduler.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/MergeScheduler.java 17 Aug 2012 14:54:59 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/MergeScheduler.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,23 +17,33 @@ * limitations under the License. */ +import java.io.Closeable; import java.io.IOException; /**

    Expert: {@link IndexWriter} uses an instance * implementing this interface to execute the merges * selected by a {@link MergePolicy}. The default * MergeScheduler is {@link ConcurrentMergeScheduler}.

    - *

    NOTE: This API is new and still experimental - * (subject to change suddenly in the next release)

    + *

    Implementers of sub-classes should make sure that {@link #clone()} + * returns an independent instance able to work with any {@link IndexWriter} + * instance.

    + * @lucene.experimental */ +public abstract class MergeScheduler implements Closeable { -public abstract class MergeScheduler { + /** Sole constructor. (For invocation by subclass + * constructors, typically implicit.) */ + protected MergeScheduler() { + } - /** Run the merges provided by {@link IndexWriter#getNextMerge()}. */ - abstract void merge(IndexWriter writer) - throws CorruptIndexException, IOException; + /** Run the merges provided by {@link IndexWriter#getNextMerge()}. + * @param writer the {@link IndexWriter} to obtain the merges from. + * @param trigger the {@link MergeTrigger} that caused this merge to happen + * @param newMergesFound true iff any new merges were found by the caller otherwise false + * */ + public abstract void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException; /** Close this MergeScheduler. */ - abstract void close() - throws CorruptIndexException, IOException; + @Override + public abstract void close() throws IOException; } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MergeState.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MergeTrigger.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiBits.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiDocsAndPositionsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiDocsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiFields.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiLevelSkipListReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiLevelSkipListWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/MultiReader.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/MultiReader.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/MultiReader.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/MultiReader.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,355 +18,68 @@ */ import java.io.IOException; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.Map; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.MultiSegmentReader.MultiTermDocs; -import org.apache.lucene.index.MultiSegmentReader.MultiTermEnum; -import org.apache.lucene.index.MultiSegmentReader.MultiTermPositions; - -/** An IndexReader which reads multiple indexes, appending their content. - * - * @version $Id$ +/** A {@link CompositeReader} which reads multiple indexes, appending + * their content. It can be used to create a view on several + * sub-readers (like {@link DirectoryReader}) and execute searches on it. + * + *

    For efficiency, in this API documents are often referred to via + * document numbers, non-negative integers which each name a unique + * document in the index. These document numbers are ephemeral -- they may change + * as documents are added to and deleted from an index. Clients should thus not + * rely on a given document having the same number between sessions. + * + *

    NOTE: {@link + * IndexReader} instances are completely thread + * safe, meaning multiple threads can call any of its methods, + * concurrently. If your application requires external + * synchronization, you should not synchronize on the + * IndexReader instance; use your own + * (non-Lucene) objects instead. */ -public class MultiReader extends IndexReader { - protected IndexReader[] subReaders; - private int[] starts; // 1st docno for each segment - private boolean[] decrefOnClose; // remember which subreaders to decRef on close - private Map normsCache = new HashMap(); - private int maxDoc = 0; - private int numDocs = -1; - private boolean hasDeletions = false; +public class MultiReader extends BaseCompositeReader { + private final boolean closeSubReaders; /** *

    Construct a MultiReader aggregating the named set of (sub)readers. - * Directory locking for delete, undeleteAll, and setNorm operations is - * left to the subreaders.

    *

    Note that all subreaders are closed if this Multireader is closed.

    * @param subReaders set of (sub)readers - * @throws IOException */ - public MultiReader(IndexReader[] subReaders) { - initialize(subReaders, true); + public MultiReader(IndexReader... subReaders) { + this(subReaders, true); } /** *

    Construct a MultiReader aggregating the named set of (sub)readers. - * Directory locking for delete, undeleteAll, and setNorm operations is - * left to the subreaders.

    + * @param subReaders set of (sub)readers; this array will be cloned. * @param closeSubReaders indicates whether the subreaders should be closed * when this MultiReader is closed - * @param subReaders set of (sub)readers - * @throws IOException */ public MultiReader(IndexReader[] subReaders, boolean closeSubReaders) { - initialize(subReaders, closeSubReaders); - } - - private void initialize(IndexReader[] subReaders, boolean closeSubReaders) { - this.subReaders = (IndexReader[]) subReaders.clone(); - starts = new int[subReaders.length + 1]; // build starts array - decrefOnClose = new boolean[subReaders.length]; - for (int i = 0; i < subReaders.length; i++) { - starts[i] = maxDoc; - maxDoc += subReaders[i].maxDoc(); // compute maxDocs - - if (!closeSubReaders) { + super(subReaders.clone()); + this.closeSubReaders = closeSubReaders; + if (!closeSubReaders) { + for (int i = 0; i < subReaders.length; i++) { subReaders[i].incRef(); - decrefOnClose[i] = true; - } else { - decrefOnClose[i] = false; } - - if (subReaders[i].hasDeletions()) - hasDeletions = true; } - starts[subReaders.length] = maxDoc; } - /** - * Tries to reopen the subreaders. - *
    - * If one or more subreaders could be re-opened (i. e. subReader.reopen() - * returned a new instance != subReader), then a new MultiReader instance - * is returned, otherwise this instance is returned. - *

    - * A re-opened instance might share one or more subreaders with the old - * instance. Index modification operations result in undefined behavior - * when performed before the old instance is closed. - * (see {@link IndexReader#reopen()}). - *

    - * If subreaders are shared, then the reference count of those - * readers is increased to ensure that the subreaders remain open - * until the last referring reader is closed. - * - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public IndexReader reopen() throws CorruptIndexException, IOException { - ensureOpen(); - - boolean reopened = false; - IndexReader[] newSubReaders = new IndexReader[subReaders.length]; - boolean[] newDecrefOnClose = new boolean[subReaders.length]; - - boolean success = false; - try { - for (int i = 0; i < subReaders.length; i++) { - newSubReaders[i] = subReaders[i].reopen(); - // if at least one of the subreaders was updated we remember that - // and return a new MultiReader - if (newSubReaders[i] != subReaders[i]) { - reopened = true; - // this is a new subreader instance, so on close() we don't - // decRef but close it - newDecrefOnClose[i] = false; + @Override + protected synchronized void doClose() throws IOException { + IOException ioe = null; + for (final IndexReader r : getSequentialSubReaders()) { + try { + if (closeSubReaders) { + r.close(); + } else { + r.decRef(); } + } catch (IOException e) { + if (ioe == null) ioe = e; } - - if (reopened) { - for (int i = 0; i < subReaders.length; i++) { - if (newSubReaders[i] == subReaders[i]) { - newSubReaders[i].incRef(); - newDecrefOnClose[i] = true; - } - } - - MultiReader mr = new MultiReader(newSubReaders); - mr.decrefOnClose = newDecrefOnClose; - success = true; - return mr; - } else { - success = true; - return this; - } - } finally { - if (!success && reopened) { - for (int i = 0; i < newSubReaders.length; i++) { - if (newSubReaders[i] != null) { - try { - if (newDecrefOnClose[i]) { - newSubReaders[i].decRef(); - } else { - newSubReaders[i].close(); - } - } catch (IOException ignore) { - // keep going - we want to clean up as much as possible - } - } - } - } } + // throw the first exception + if (ioe != null) throw ioe; } - - public TermFreqVector[] getTermFreqVectors(int n) throws IOException { - ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].getTermFreqVectors(n - starts[i]); // dispatch to segment - } - - public TermFreqVector getTermFreqVector(int n, String field) - throws IOException { - ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].getTermFreqVector(n - starts[i], field); - } - - - public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { - ensureOpen(); - int i = readerIndex(docNumber); // find segment num - subReaders[i].getTermFreqVector(docNumber - starts[i], field, mapper); - } - - public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { - ensureOpen(); - int i = readerIndex(docNumber); // find segment num - subReaders[i].getTermFreqVector(docNumber - starts[i], mapper); - } - - public boolean isOptimized() { - return false; - } - - public synchronized int numDocs() { - // Don't call ensureOpen() here (it could affect performance) - if (numDocs == -1) { // check cache - int n = 0; // cache miss--recompute - for (int i = 0; i < subReaders.length; i++) - n += subReaders[i].numDocs(); // sum from readers - numDocs = n; - } - return numDocs; - } - - public int maxDoc() { - // Don't call ensureOpen() here (it could affect performance) - return maxDoc; - } - - // inherit javadoc - public Document document(int n, FieldSelector fieldSelector) throws CorruptIndexException, IOException { - ensureOpen(); - int i = readerIndex(n); // find segment num - return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader - } - - public boolean isDeleted(int n) { - // Don't call ensureOpen() here (it could affect performance) - int i = readerIndex(n); // find segment num - return subReaders[i].isDeleted(n - starts[i]); // dispatch to segment reader - } - - public boolean hasDeletions() { - // Don't call ensureOpen() here (it could affect performance) - return hasDeletions; - } - - protected void doDelete(int n) throws CorruptIndexException, IOException { - numDocs = -1; // invalidate cache - int i = readerIndex(n); // find segment num - subReaders[i].deleteDocument(n - starts[i]); // dispatch to segment reader - hasDeletions = true; - } - - protected void doUndeleteAll() throws CorruptIndexException, IOException { - for (int i = 0; i < subReaders.length; i++) - subReaders[i].undeleteAll(); - - hasDeletions = false; - numDocs = -1; // invalidate cache - } - - private int readerIndex(int n) { // find reader for doc n: - return MultiSegmentReader.readerIndex(n, this.starts, this.subReaders.length); - } - - public boolean hasNorms(String field) throws IOException { - ensureOpen(); - for (int i = 0; i < subReaders.length; i++) { - if (subReaders[i].hasNorms(field)) return true; - } - return false; - } - - private byte[] ones; - private byte[] fakeNorms() { - if (ones==null) ones=SegmentReader.createFakeNorms(maxDoc()); - return ones; - } - - public synchronized byte[] norms(String field) throws IOException { - ensureOpen(); - byte[] bytes = (byte[])normsCache.get(field); - if (bytes != null) - return bytes; // cache hit - if (!hasNorms(field)) - return fakeNorms(); - - bytes = new byte[maxDoc()]; - for (int i = 0; i < subReaders.length; i++) - subReaders[i].norms(field, bytes, starts[i]); - normsCache.put(field, bytes); // update cache - return bytes; - } - - public synchronized void norms(String field, byte[] result, int offset) - throws IOException { - ensureOpen(); - byte[] bytes = (byte[])normsCache.get(field); - if (bytes==null && !hasNorms(field)) bytes=fakeNorms(); - if (bytes != null) // cache hit - System.arraycopy(bytes, 0, result, offset, maxDoc()); - - for (int i = 0; i < subReaders.length; i++) // read from segments - subReaders[i].norms(field, result, offset + starts[i]); - } - - protected void doSetNorm(int n, String field, byte value) - throws CorruptIndexException, IOException { - synchronized (normsCache) { - normsCache.remove(field); // clear cache - } - int i = readerIndex(n); // find segment num - subReaders[i].setNorm(n-starts[i], field, value); // dispatch - } - - public TermEnum terms() throws IOException { - ensureOpen(); - return new MultiTermEnum(subReaders, starts, null); - } - - public TermEnum terms(Term term) throws IOException { - ensureOpen(); - return new MultiTermEnum(subReaders, starts, term); - } - - public int docFreq(Term t) throws IOException { - ensureOpen(); - int total = 0; // sum freqs in segments - for (int i = 0; i < subReaders.length; i++) - total += subReaders[i].docFreq(t); - return total; - } - - public TermDocs termDocs() throws IOException { - ensureOpen(); - return new MultiTermDocs(subReaders, starts); - } - - public TermPositions termPositions() throws IOException { - ensureOpen(); - return new MultiTermPositions(subReaders, starts); - } - - protected void doCommit() throws IOException { - for (int i = 0; i < subReaders.length; i++) - subReaders[i].commit(); - } - - protected synchronized void doClose() throws IOException { - for (int i = 0; i < subReaders.length; i++) { - if (decrefOnClose[i]) { - subReaders[i].decRef(); - } else { - subReaders[i].close(); - } - } - } - - public Collection getFieldNames (IndexReader.FieldOption fieldNames) { - ensureOpen(); - return MultiSegmentReader.getFieldNames(fieldNames, this.subReaders); - } - - /** - * Checks recursively if all subreaders are up to date. - */ - public boolean isCurrent() throws CorruptIndexException, IOException { - for (int i = 0; i < subReaders.length; i++) { - if (!subReaders[i].isCurrent()) { - return false; - } - } - - // all subreaders are up to date - return true; - } - - /** Not implemented. - * @throws UnsupportedOperationException - */ - public long getVersion() { - throw new UnsupportedOperationException("MultiReader does not support this method."); - } - - // for testing - IndexReader[] getSubReaders() { - return subReaders; - } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiSegmentReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiTerms.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultiTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/MultipleTermPositions.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NoDeletionPolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NoMergePolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NoMergeScheduler.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NormsWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NormsWriterPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NormsWriterPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NumericDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NumericDocValuesFieldUpdates.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/NumericDocValuesWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/OrdTermState.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ParallelAtomicReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ParallelCompositeReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ParallelPostingsArray.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ParallelReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/Payload.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/PersistentSnapshotDeletionPolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/PositionBasedTermVectorMapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/PrefixCodedTerms.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/RandomAccessOrds.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/RawPostingList.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReadOnlyMultiSegmentReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReadOnlySegmentReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReaderManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReaderSlice.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReaderUtil.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReadersAndUpdates.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/ReusableStringReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentCommitInfo.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentCoreReaders.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfo.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfo.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfo.java 17 Aug 2012 14:55:01 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfo.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,667 +17,331 @@ * limitations under the License. */ -import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.util.BitVector; -import java.io.IOException; -import java.util.List; -import java.util.ArrayList; -final class SegmentInfo { +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; - static final int NO = -1; // e.g. no norms; no deletes; - static final int YES = 1; // e.g. have norms; have deletes; - static final int CHECK_DIR = 0; // e.g. must check dir to see if there are norms/deletions - static final int WITHOUT_GEN = 0; // a file name that has no GEN in it. +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; +import org.apache.lucene.store.Directory; +import org.apache.lucene.store.TrackingDirectoryWrapper; +import org.apache.lucene.util.Version; - public String name; // unique name in dir - public int docCount; // number of docs in seg - public Directory dir; // where segment resides +/** + * Information about a segment such as it's name, directory, and files related + * to the segment. + * + * @lucene.experimental + */ +public final class SegmentInfo { + + // TODO: remove these from this class, for now this is the representation + /** Used by some member fields to mean not present (e.g., + * norms, deletions). */ + public static final int NO = -1; // e.g. no norms; no deletes; - private boolean preLockless; // true if this is a segments file written before - // lock-less commits (2.1) + /** Used by some member fields to mean present (e.g., + * norms, deletions). */ + public static final int YES = 1; // e.g. have norms; have deletes; - private long delGen; // current generation of del file; NO if there - // are no deletes; CHECK_DIR if it's a pre-2.1 segment - // (and we must check filesystem); YES or higher if - // there are deletes at generation N - - private long[] normGen; // current generation of each field's norm file. - // If this array is null, for lockLess this means no - // separate norms. For preLockLess this means we must - // check filesystem. If this array is not null, its - // values mean: NO says this field has no separate - // norms; CHECK_DIR says it is a preLockLess segment and - // filesystem must be checked; >= YES says this field - // has separate norms with the specified generation + /** Unique segment name in the directory. */ + public final String name; - private byte isCompoundFile; // NO if it is not; YES if it is; CHECK_DIR if it's - // pre-2.1 (ie, must check file system to see - // if .cfs and .nrm exist) + private int docCount; // number of docs in seg - private boolean hasSingleNormFile; // true if this segment maintains norms in a single file; - // false otherwise - // this is currently false for segments populated by DocumentWriter - // and true for newly created merged segments (both - // compound and non compound). - - private List files; // cached list of files that this segment uses - // in the Directory + /** Where this segment resides. */ + public final Directory dir; - long sizeInBytes = -1; // total byte size of all of our files (computed on demand) + private boolean isCompoundFile; - private int docStoreOffset; // if this segment shares stored fields & vectors, this - // offset is where in that file this segment's docs begin - private String docStoreSegment; // name used to derive fields/vectors file we share with - // other segments - private boolean docStoreIsCompoundFile; // whether doc store files are stored in compound file (*.cfx) + private Codec codec; - private int delCount; // How many deleted docs in this segment, or -1 if not yet known - // (if it's an older index) + private Map diagnostics; + + /** @deprecated not used anymore */ + @Deprecated + private Map attributes; - private boolean hasProx; // True if this segment has any fields with omitTf==false + // Tracks the Lucene version this segment was created with, since 3.1. Null + // indicates an older than 3.0 index, and it's used to detect a too old index. + // The format expected is "x.y" - "2.x" for pre-3.0 indexes (or null), and + // specific versions afterwards ("3.0.0", "3.1.0" etc.). + // see o.a.l.util.Version. + private Version version; - public SegmentInfo(String name, int docCount, Directory dir) { - this.name = name; - this.docCount = docCount; - this.dir = dir; - delGen = NO; - isCompoundFile = CHECK_DIR; - preLockless = true; - hasSingleNormFile = false; - docStoreOffset = -1; - docStoreSegment = name; - docStoreIsCompoundFile = false; - delCount = 0; - hasProx = true; + void setDiagnostics(Map diagnostics) { + this.diagnostics = diagnostics; } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile) { - this(name, docCount, dir, isCompoundFile, hasSingleNormFile, -1, null, false, true); + /** Returns diagnostics saved into the segment when it was + * written. */ + public Map getDiagnostics() { + return diagnostics; } + + /** + * Construct a new complete SegmentInfo instance from input. + *

    Note: this is public only to allow access from + * the codecs package.

    + */ + public SegmentInfo(Directory dir, Version version, String name, int docCount, + boolean isCompoundFile, Codec codec, Map diagnostics) { + this(dir, version, name, docCount, isCompoundFile, codec, diagnostics, null); + } - public SegmentInfo(String name, int docCount, Directory dir, boolean isCompoundFile, boolean hasSingleNormFile, - int docStoreOffset, String docStoreSegment, boolean docStoreIsCompoundFile, boolean hasProx) { - this(name, docCount, dir); - this.isCompoundFile = (byte) (isCompoundFile ? YES : NO); - this.hasSingleNormFile = hasSingleNormFile; - preLockless = false; - this.docStoreOffset = docStoreOffset; - this.docStoreSegment = docStoreSegment; - this.docStoreIsCompoundFile = docStoreIsCompoundFile; - this.hasProx = hasProx; - delCount = 0; - assert docStoreOffset == -1 || docStoreSegment != null: "dso=" + docStoreOffset + " dss=" + docStoreSegment + " docCount=" + docCount; + /** + * Construct a new complete SegmentInfo instance from input. + *

    Note: this is public only to allow access from + * the codecs package.

    + */ + public SegmentInfo(Directory dir, Version version, String name, int docCount, + boolean isCompoundFile, Codec codec, Map diagnostics, Map attributes) { + assert !(dir instanceof TrackingDirectoryWrapper); + this.dir = dir; + this.version = version; + this.name = name; + this.docCount = docCount; + this.isCompoundFile = isCompoundFile; + this.codec = codec; + this.diagnostics = diagnostics; + this.attributes = attributes; } /** - * Copy everything from src SegmentInfo into our instance. + * @deprecated separate norms are not supported in >= 4.0 */ - void reset(SegmentInfo src) { - clearFiles(); - name = src.name; - docCount = src.docCount; - dir = src.dir; - preLockless = src.preLockless; - delGen = src.delGen; - docStoreOffset = src.docStoreOffset; - docStoreIsCompoundFile = src.docStoreIsCompoundFile; - if (src.normGen == null) { - normGen = null; - } else { - normGen = new long[src.normGen.length]; - System.arraycopy(src.normGen, 0, normGen, 0, src.normGen.length); - } - isCompoundFile = src.isCompoundFile; - hasSingleNormFile = src.hasSingleNormFile; - delCount = src.delCount; + @Deprecated + boolean hasSeparateNorms() { + return getAttribute(Lucene3xSegmentInfoFormat.NORMGEN_KEY) != null; } /** - * Construct a new SegmentInfo instance by reading a - * previously saved SegmentInfo from input. + * Mark whether this segment is stored as a compound file. * - * @param dir directory to load from - * @param format format of the segments info file - * @param input input handle to read segment info from + * @param isCompoundFile true if this is a compound file; + * else, false */ - SegmentInfo(Directory dir, int format, IndexInput input) throws IOException { - this.dir = dir; - name = input.readString(); - docCount = input.readInt(); - if (format <= SegmentInfos.FORMAT_LOCKLESS) { - delGen = input.readLong(); - if (format <= SegmentInfos.FORMAT_SHARED_DOC_STORE) { - docStoreOffset = input.readInt(); - if (docStoreOffset != -1) { - docStoreSegment = input.readString(); - docStoreIsCompoundFile = (1 == input.readByte()); - } else { - docStoreSegment = name; - docStoreIsCompoundFile = false; - } - } else { - docStoreOffset = -1; - docStoreSegment = name; - docStoreIsCompoundFile = false; - } - if (format <= SegmentInfos.FORMAT_SINGLE_NORM_FILE) { - hasSingleNormFile = (1 == input.readByte()); - } else { - hasSingleNormFile = false; - } - int numNormGen = input.readInt(); - if (numNormGen == NO) { - normGen = null; - } else { - normGen = new long[numNormGen]; - for(int j=0;j= YES: this means this segment was written by - // the LOCKLESS code and for certain has - // deletions - // - if (delGen == NO) { - return false; - } else if (delGen >= YES) { - return true; - } else { - return dir.fileExists(getDelFileName()); + /** Returns number of documents in this segment (deletions + * are not taken into account). */ + public int getDocCount() { + if (this.docCount == -1) { + throw new IllegalStateException("docCount isn't set yet"); } + return docCount; } - void advanceDelGen() { - // delGen 0 is reserved for pre-LOCKLESS format - if (delGen == NO) { - delGen = YES; - } else { - delGen++; + // NOTE: leave package private + void setDocCount(int docCount) { + if (this.docCount != -1) { + throw new IllegalStateException("docCount was already set"); } - clearFiles(); + this.docCount = docCount; } - void clearDelGen() { - delGen = NO; - clearFiles(); - } - - public Object clone () { - SegmentInfo si = new SegmentInfo(name, docCount, dir); - si.isCompoundFile = isCompoundFile; - si.delGen = delGen; - si.delCount = delCount; - si.preLockless = preLockless; - si.hasSingleNormFile = hasSingleNormFile; - if (normGen != null) { - si.normGen = (long[]) normGen.clone(); + /** Return all files referenced by this SegmentInfo. */ + public Set files() { + if (setFiles == null) { + throw new IllegalStateException("files were not computed yet"); } - si.docStoreOffset = docStoreOffset; - si.docStoreSegment = docStoreSegment; - si.docStoreIsCompoundFile = docStoreIsCompoundFile; - return si; + return Collections.unmodifiableSet(setFiles); } - String getDelFileName() { - if (delGen == NO) { - // In this case we know there is no deletion filename - // against this segment - return null; - } else { - // If delGen is CHECK_DIR, it's the pre-lockless-commit file format - return IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.DELETES_EXTENSION, delGen); - } + @Override + public String toString() { + return toString(dir, 0); } - /** - * Returns true if this field for this segment has saved a separate norms file (__N.sX). + /** Used for debugging. Format may suddenly change. * - * @param fieldNumber the field index to check + *

    Current format looks like + * _a(3.1):c45/4, which means the segment's + * name is _a; it was created with Lucene 3.1 (or + * '?' if it's unknown); it's using compound file + * format (would be C if not compound); it + * has 45 documents; it has 4 deletions (this part is + * left off when there are no deletions).

    */ - boolean hasSeparateNorms(int fieldNumber) - throws IOException { - if ((normGen == null && preLockless) || (normGen != null && normGen[fieldNumber] == CHECK_DIR)) { - // Must fallback to directory file exists check: - String fileName = name + ".s" + fieldNumber; - return dir.fileExists(fileName); - } else if (normGen == null || normGen[fieldNumber] == NO) { - return false; - } else { - return true; + public String toString(Directory dir, int delCount) { + StringBuilder s = new StringBuilder(); + s.append(name).append('(').append(version == null ? "?" : version).append(')').append(':'); + char cfs = getUseCompoundFile() ? 'c' : 'C'; + s.append(cfs); + + if (this.dir != dir) { + s.append('x'); } - } + s.append(docCount); - /** - * Returns true if any fields in this segment have separate norms. - */ - boolean hasSeparateNorms() - throws IOException { - if (normGen == null) { - if (!preLockless) { - // This means we were created w/ LOCKLESS code and no - // norms are written yet: - return false; - } else { - // This means this segment was saved with pre-LOCKLESS - // code. So we must fallback to the original - // directory list check: - String[] result = dir.list(); - if (result == null) - throw new IOException("cannot read directory " + dir + ": list() returned null"); - - String pattern; - pattern = name + ".s"; - int patternLength = pattern.length(); - for(int i = 0; i < result.length; i++){ - if(result[i].startsWith(pattern) && Character.isDigit(result[i].charAt(patternLength))) - return true; - } - return false; - } - } else { - // This means this segment was saved with LOCKLESS - // code so we first check whether any normGen's are >= 1 - // (meaning they definitely have separate norms): - for(int i=0;i= YES) { - return true; - } - } - // Next we look for any == 0. These cases were - // pre-LOCKLESS and must be checked in directory: - for(int i=0;i + * NOTE: this method is used for internal purposes only - you should + * not modify the version of a SegmentInfo, or it may result in unexpected + * exceptions thrown when you attempt to open the index. * - * @param isCompoundFile true if this is a compound file; - * else, false + * @lucene.internal */ - void setUseCompoundFile(boolean isCompoundFile) { - if (isCompoundFile) { - this.isCompoundFile = YES; - } else { - this.isCompoundFile = NO; - } - clearFiles(); + public void setVersion(Version version) { + this.version = version; } - /** - * Returns true if this segment is stored as a compound - * file; else, false. - */ - boolean getUseCompoundFile() throws IOException { - if (isCompoundFile == NO) { - return false; - } else if (isCompoundFile == YES) { - return true; - } else { - return dir.fileExists(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); - } + /** Returns the version of the code which wrote the segment. */ + public Version getVersion() { + return version; } - int getDelCount() throws IOException { - if (delCount == -1) { - if (hasDeletions()) { - final String delFileName = getDelFileName(); - delCount = new BitVector(dir, delFileName).count(); - } else - delCount = 0; - } - assert delCount <= docCount; - return delCount; + private Set setFiles; + + /** Sets the files written for this segment. */ + public void setFiles(Set files) { + checkFileNames(files); + setFiles = files; } - void setDelCount(int delCount) { - this.delCount = delCount; - assert delCount <= docCount; + /** Add these files to the set of files written for this + * segment. */ + public void addFiles(Collection files) { + checkFileNames(files); + setFiles.addAll(files); } - int getDocStoreOffset() { - return docStoreOffset; + /** Add this file to the set of files written for this + * segment. */ + public void addFile(String file) { + checkFileNames(Collections.singleton(file)); + setFiles.add(file); } - boolean getDocStoreIsCompoundFile() { - return docStoreIsCompoundFile; + private void checkFileNames(Collection files) { + Matcher m = IndexFileNames.CODEC_FILE_PATTERN.matcher(""); + for (String file : files) { + m.reset(file); + if (!m.matches()) { + throw new IllegalArgumentException("invalid codec filename '" + file + "', must match: " + IndexFileNames.CODEC_FILE_PATTERN.pattern()); + } + } } - - void setDocStoreIsCompoundFile(boolean v) { - docStoreIsCompoundFile = v; - clearFiles(); - } - - String getDocStoreSegment() { - return docStoreSegment; - } - - void setDocStoreOffset(int offset) { - docStoreOffset = offset; - clearFiles(); - } - + /** - * Save this segment's info. + * Get a codec attribute value, or null if it does not exist + * + * @deprecated no longer supported */ - void write(IndexOutput output) - throws IOException { - output.writeString(name); - output.writeInt(docCount); - output.writeLong(delGen); - output.writeInt(docStoreOffset); - if (docStoreOffset != -1) { - output.writeString(docStoreSegment); - output.writeByte((byte) (docStoreIsCompoundFile ? 1:0)); - } - - output.writeByte((byte) (hasSingleNormFile ? 1:0)); - if (normGen == null) { - output.writeInt(NO); + @Deprecated + public String getAttribute(String key) { + if (attributes == null) { + return null; } else { - output.writeInt(normGen.length); - for(int j = 0; j < normGen.length; j++) { - output.writeLong(normGen[j]); - } + return attributes.get(key); } - output.writeByte(isCompoundFile); - output.writeInt(delCount); - output.writeByte((byte) (hasProx ? 1:0)); } - - void setHasProx(boolean hasProx) { - this.hasProx = hasProx; - clearFiles(); - } - - boolean getHasProx() { - return hasProx; - } - - private void addIfExists(List files, String fileName) throws IOException { - if (dir.fileExists(fileName)) - files.add(fileName); - } - - /* - * Return all files referenced by this SegmentInfo. The - * returns List is a locally cached List so you should not - * modify it. + + /** + * Puts a codec attribute value. + *

    + * This is a key-value mapping for the field that the codec can use to store + * additional metadata, and will be available to the codec when reading the + * segment via {@link #getAttribute(String)} + *

    + * If a value already exists for the field, it will be replaced with the new + * value. + * + * @deprecated no longer supported */ - - public List files() throws IOException { - - if (files != null) { - // Already cached: - return files; + @Deprecated + public String putAttribute(String key, String value) { + if (attributes == null) { + attributes = new HashMap<>(); } - - files = new ArrayList(); - - boolean useCompoundFile = getUseCompoundFile(); - - if (useCompoundFile) { - files.add(name + "." + IndexFileNames.COMPOUND_FILE_EXTENSION); - } else { - final String[] exts = IndexFileNames.NON_STORE_INDEX_EXTENSIONS; - for(int i=0;i= YES || dir.fileExists(delFileName))) { - files.add(delFileName); - } - - // Careful logic for norms files - if (normGen != null) { - for(int i=0;i= YES) { - // Definitely a separate norm file, with generation: - files.add(IndexFileNames.fileNameFromGeneration(name, "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i, gen)); - } else if (NO == gen) { - // No separate norms but maybe plain norms - // in the non compound file case: - if (!hasSingleNormFile && !useCompoundFile) { - String fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; - if (dir.fileExists(fileName)) { - files.add(fileName); - } - } - } else if (CHECK_DIR == gen) { - // Pre-2.1: we have to check file existence - String fileName = null; - if (useCompoundFile) { - fileName = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION + i; - } else if (!hasSingleNormFile) { - fileName = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION + i; - } - if (fileName != null && dir.fileExists(fileName)) { - files.add(fileName); - } - } - } - } else if (preLockless || (!hasSingleNormFile && !useCompoundFile)) { - // Pre-2.1: we have to scan the dir to find all - // matching _X.sN/_X.fN files for our segment: - String prefix; - if (useCompoundFile) - prefix = name + "." + IndexFileNames.SEPARATE_NORMS_EXTENSION; - else - prefix = name + "." + IndexFileNames.PLAIN_NORMS_EXTENSION; - int prefixLength = prefix.length(); - String[] allFiles = dir.list(); - if (allFiles == null) - throw new IOException("cannot read directory " + dir + ": list() returned null"); - for(int i=0;i prefixLength && Character.isDigit(fileName.charAt(prefixLength)) && fileName.startsWith(prefix)) { - files.add(fileName); - } - } - } - return files; + return attributes.put(key, value); } - - /* Called whenever any change is made that affects which - * files this segment has. */ - private void clearFiles() { - files = null; - sizeInBytes = -1; + + /** + * Returns the internal codec attributes map. + * + * @return internal codec attributes map. May be null if no mappings exist. + * + * @deprecated no longer supported + */ + @Deprecated + public Map attributes() { + return attributes; } - /** Used for debugging */ - public String segString(Directory dir) { - String cfs; - try { - if (getUseCompoundFile()) - cfs = "c"; - else - cfs = "C"; - } catch (IOException ioe) { - cfs = "?"; + private static Map cloneMap(Map map) { + if (map != null) { + return new HashMap(map); + } else { + return null; } - - String docStore; - - if (docStoreOffset != -1) - docStore = "->" + docStoreSegment; - else - docStore = ""; - - return name + ":" + - cfs + - (this.dir == dir ? "" : "x") + - docCount + docStore; } - /** We consider another SegmentInfo instance equal if it - * has the same dir and same name. */ - public boolean equals(Object obj) { - SegmentInfo other; - try { - other = (SegmentInfo) obj; - } catch (ClassCastException cce) { - return false; + @Override + public SegmentInfo clone() { + SegmentInfo other = new SegmentInfo(dir, version, name, docCount, isCompoundFile, codec, cloneMap(diagnostics), cloneMap(attributes)); + if (setFiles != null) { + other.setFiles(new HashSet<>(setFiles)); } - return other.dir == dir && other.name.equals(name); + return other; } - - public int hashCode() { - return dir.hashCode() + name.hashCode(); - } } Index: 3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfos.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfos.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfos.java 17 Aug 2012 14:55:01 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/SegmentInfos.java 16 Dec 2014 11:31:41 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,96 +17,189 @@ * limitations under the License. */ +import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.Set; + +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.LiveDocsFormat; +import org.apache.lucene.codecs.lucene3x.Lucene3xCodec; +import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoFormat; +import org.apache.lucene.codecs.lucene3x.Lucene3xSegmentInfoReader; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.DataOutput; import org.apache.lucene.store.Directory; +import org.apache.lucene.store.IOContext; import org.apache.lucene.store.IndexInput; import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.store.ChecksumIndexOutput; -import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.NoSuchDirectoryException; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.Version; -import java.io.File; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.PrintStream; -import java.util.Vector; +/** + * A collection of segmentInfo objects with methods for operating on those + * segments in relation to the file system. + *

    + * The active segments in the index are stored in the segment info file, + * segments_N. There may be one or more segments_N files in + * the index; however, the one with the largest generation is the active one + * (when older segments_N files are present it's because they temporarily cannot + * be deleted, or, a writer is in the process of committing, or a custom + * {@link org.apache.lucene.index.IndexDeletionPolicy IndexDeletionPolicy} is in + * use). This file lists each segment by name and has details about the codec + * and generation of deletes. + *

    + *

    + * There is also a file segments.gen. This file contains the current + * generation (the _N in segments_N) of the index. This is + * used only as a fallback in case the current generation cannot be accurately + * determined by directory listing alone (as is the case for some NFS clients + * with time-based directory cache expiration). This file simply contains an + * {@link DataOutput#writeInt Int32} version header ( + * {@link #FORMAT_SEGMENTS_GEN_CURRENT}), followed by the generation recorded as + * {@link DataOutput#writeLong Int64}, written twice. + *

    + *

    + * Files: + *

      + *
    • segments.gen: GenHeader, Generation, Generation, Footer + *
    • segments_N: Header, Version, NameCounter, SegCount, <SegName, + * SegCodec, DelGen, DeletionCount, FieldInfosGen, DocValuesGen, + * UpdatesFiles>SegCount, CommitUserData, Footer + *
    + *

    + * Data types: + *

    + *

      + *
    • Header --> {@link CodecUtil#writeHeader CodecHeader}
    • + *
    • GenHeader, NameCounter, SegCount, DeletionCount --> + * {@link DataOutput#writeInt Int32}
    • + *
    • Generation, Version, DelGen, Checksum, FieldInfosGen, DocValuesGen --> + * {@link DataOutput#writeLong Int64}
    • + *
    • SegName, SegCodec --> {@link DataOutput#writeString String}
    • + *
    • CommitUserData --> {@link DataOutput#writeStringStringMap + * Map<String,String>}
    • + *
    • UpdatesFiles --> Map<{@link DataOutput#writeInt Int32}, + * {@link DataOutput#writeStringSet(Set) Set<String>}>
    • + *
    • Footer --> {@link CodecUtil#writeFooter CodecFooter}
    • + *
    + *

    + * Field Descriptions: + *

    + *

      + *
    • Version counts how often the index has been changed by adding or deleting + * documents.
    • + *
    • NameCounter is used to generate names for new segment files.
    • + *
    • SegName is the name of the segment, and is used as the file name prefix + * for all of the files that compose the segment's index.
    • + *
    • DelGen is the generation count of the deletes file. If this is -1, there + * are no deletes. Anything above zero means there are deletes stored by + * {@link LiveDocsFormat}.
    • + *
    • DeletionCount records the number of deleted documents in this segment.
    • + *
    • SegCodec is the {@link Codec#getName() name} of the Codec that encoded + * this segment.
    • + *
    • CommitUserData stores an optional user-supplied opaque + * Map<String,String> that was passed to + * {@link IndexWriter#setCommitData(java.util.Map)}.
    • + *
    • FieldInfosGen is the generation count of the fieldInfos file. If this is + * -1, there are no updates to the fieldInfos in that segment. Anything above + * zero means there are updates to fieldInfos stored by {@link FieldInfosFormat} + * .
    • + *
    • DocValuesGen is the generation count of the updatable DocValues. If this + * is -1, there are no updates to DocValues in that segment. Anything above zero + * means there are updates to DocValues stored by {@link DocValuesFormat}.
    • + *
    • UpdatesFiles stores the set of files that were updated in that segment + * per field.
    • + *
    + *

    + * + * @lucene.experimental + */ +public final class SegmentInfos implements Cloneable, Iterable { -final class SegmentInfos extends Vector { + /** The file format version for the segments_N codec header, up to 4.5. */ + public static final int VERSION_40 = 0; - /** The file format version, a negative number. */ - /* Works since counter, the old 1st entry, is always >= 0 */ - public static final int FORMAT = -1; + /** The file format version for the segments_N codec header, since 4.6+. */ + public static final int VERSION_46 = 1; + + /** The file format version for the segments_N codec header, since 4.8+ */ + public static final int VERSION_48 = 2; + + /** The file format version for the segments_N codec header, since 4.9+ */ + public static final int VERSION_49 = 3; - /** This format adds details used for lockless commits. It differs - * slightly from the previous format in that file names - * are never re-used (write once). Instead, each file is - * written to the next generation. For example, - * segments_1, segments_2, etc. This allows us to not use - * a commit lock. See file - * formats for details. - */ - public static final int FORMAT_LOCKLESS = -2; + // Used for the segments.gen file only! + // Whenever you add a new format, make it 1 smaller (negative version logic)! + private static final int FORMAT_SEGMENTS_GEN_47 = -2; + private static final int FORMAT_SEGMENTS_GEN_CHECKSUM = -3; + private static final int FORMAT_SEGMENTS_GEN_START = FORMAT_SEGMENTS_GEN_47; + /** Current format of segments.gen */ + public static final int FORMAT_SEGMENTS_GEN_CURRENT = FORMAT_SEGMENTS_GEN_CHECKSUM; - /** This format adds a "hasSingleNormFile" flag into each segment info. - * See LUCENE-756 - * for details. - */ - public static final int FORMAT_SINGLE_NORM_FILE = -3; + /** Used to name new segments. */ + // TODO: should this be a long ...? + public int counter; + + /** Counts how often the index has been changed. */ + public long version; - /** This format allows multiple segments to share a single - * vectors and stored fields file. */ - public static final int FORMAT_SHARED_DOC_STORE = -4; + private long generation; // generation of the "segments_N" for the next commit + private long lastGeneration; // generation of the "segments_N" file we last successfully read + // or wrote; this is normally the same as generation except if + // there was an IOException that had interrupted a commit - /** This format adds a checksum at the end of the file to - * ensure all bytes were successfully written. */ - public static final int FORMAT_CHECKSUM = -5; - - /** This format adds the deletion count for each segment. - * This way IndexWriter can efficiently report numDocs(). */ - public static final int FORMAT_DEL_COUNT = -6; - - /** This format adds the boolean hasProx to record if any - * fields in the segment store prox information (ie, have - * omitTf==false) */ - public static final int FORMAT_HAS_PROX = -7; - - /* This must always point to the most recent file format. */ - static final int CURRENT_FORMAT = FORMAT_HAS_PROX; + /** Opaque Map<String, String> that user can specify during IndexWriter.commit */ + public Map userData = Collections.emptyMap(); - public int counter = 0; // used to name new segments + private List segments = new ArrayList<>(); + /** - * counts how often the index has been changed by adding or deleting docs. - * starting with the current time in milliseconds forces to create unique version numbers. - */ - private long version = System.currentTimeMillis(); - - private long generation = 0; // generation of the "segments_N" for the next commit - private long lastGeneration = 0; // generation of the "segments_N" file we last successfully read - // or wrote; this is normally the same as generation except if - // there was an IOException that had interrupted a commit - - /** * If non-null, information about loading segments_N files * will be printed here. @see #setInfoStream. */ - private static PrintStream infoStream; + private static PrintStream infoStream = null; - public final SegmentInfo info(int i) { - return (SegmentInfo) get(i); + /** Sole constructor. Typically you call this and then + * use {@link #read(Directory) or + * #read(Directory,String)} to populate each {@link + * SegmentCommitInfo}. Alternatively, you can add/remove your + * own {@link SegmentCommitInfo}s. */ + public SegmentInfos() { } + /** Returns {@link SegmentCommitInfo} at the provided + * index. */ + public SegmentCommitInfo info(int i) { + return segments.get(i); + } + /** - * Get the generation (N) of the current segments_N file - * from a list of files. + * Get the generation of the most recent commit to the + * list of index files (N in the segments_N file). * * @param files -- array of file names to check */ - public static long getCurrentSegmentGeneration(String[] files) { + public static long getLastCommitGeneration(String[] files) { if (files == null) { return -1; } long max = -1; - for (int i = 0; i < files.length; i++) { - String file = files[i]; + for (String file : files) { if (file.startsWith(IndexFileNames.SEGMENTS) && !file.equals(IndexFileNames.SEGMENTS_GEN)) { long gen = generationFromSegmentsFileName(file); if (gen > max) { @@ -118,52 +211,53 @@ } /** - * Get the generation (N) of the current segments_N file - * in the directory. + * Get the generation of the most recent commit to the + * index in this directory (N in the segments_N file). * * @param directory -- directory to search for the latest segments_N file */ - public static long getCurrentSegmentGeneration(Directory directory) throws IOException { - String[] files = directory.list(); - if (files == null) - throw new IOException("cannot read directory " + directory + ": list() returned null"); - return getCurrentSegmentGeneration(files); + public static long getLastCommitGeneration(Directory directory) throws IOException { + try { + return getLastCommitGeneration(directory.listAll()); + } catch (NoSuchDirectoryException nsde) { + return -1; + } } /** - * Get the filename of the current segments_N file - * from a list of files. + * Get the filename of the segments_N file for the most + * recent commit in the list of index files. * * @param files -- array of file names to check */ - public static String getCurrentSegmentFileName(String[] files) throws IOException { + public static String getLastCommitSegmentsFileName(String[] files) { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - getCurrentSegmentGeneration(files)); + getLastCommitGeneration(files)); } /** - * Get the filename of the current segments_N file - * in the directory. + * Get the filename of the segments_N file for the most + * recent commit to the index in this Directory. * * @param directory -- directory to search for the latest segments_N file */ - public static String getCurrentSegmentFileName(Directory directory) throws IOException { + public static String getLastCommitSegmentsFileName(Directory directory) throws IOException { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", - getCurrentSegmentGeneration(directory)); + getLastCommitGeneration(directory)); } /** * Get the segments_N filename in use by this segment infos. */ - public String getCurrentSegmentFileName() { + public String getSegmentsFileName() { return IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", lastGeneration); } - + /** * Parse the generation off the segments file name and * return it. @@ -179,6 +273,35 @@ } } + /** + * A utility for writing the {@link IndexFileNames#SEGMENTS_GEN} file to a + * {@link Directory}. + * + *

    + * NOTE: this is an internal utility which is kept public so that it's + * accessible by code from other packages. You should avoid calling this + * method unless you're absolutely sure what you're doing! + * + * @lucene.internal + */ + public static void writeSegmentsGen(Directory dir, long generation) { + try { + IndexOutput genOutput = dir.createOutput(IndexFileNames.SEGMENTS_GEN, IOContext.READONCE); + try { + genOutput.writeInt(FORMAT_SEGMENTS_GEN_CURRENT); + genOutput.writeLong(generation); + genOutput.writeLong(generation); + CodecUtil.writeFooter(genOutput); + } finally { + genOutput.close(); + dir.sync(Collections.singleton(IndexFileNames.SEGMENTS_GEN)); + } + } catch (Throwable t) { + // It's OK if we fail to write this file since it's + // used only as one of the retry fallbacks. + IOUtils.deleteFilesIgnoringExceptions(dir, IndexFileNames.SEGMENTS_GEN); + } + } /** * Get the next segments_N filename that will be written. @@ -205,73 +328,129 @@ * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public final void read(Directory directory, String segmentFileName) throws CorruptIndexException, IOException { + public final void read(Directory directory, String segmentFileName) throws IOException { boolean success = false; // Clear any previous segments: - clear(); + this.clear(); - ChecksumIndexInput input = new ChecksumIndexInput(directory.openInput(segmentFileName)); - generation = generationFromSegmentsFileName(segmentFileName); lastGeneration = generation; + ChecksumIndexInput input = directory.openChecksumInput(segmentFileName, IOContext.READ); try { - int format = input.readInt(); - if(format < 0){ // file contains explicit format info - // check that it is a format we can understand - if (format < CURRENT_FORMAT) - throw new CorruptIndexException("Unknown format version: " + format); - version = input.readLong(); // read version - counter = input.readInt(); // read counter + final int format = input.readInt(); + final int actualFormat; + if (format == CodecUtil.CODEC_MAGIC) { + // 4.0+ + actualFormat = CodecUtil.checkHeaderNoMagic(input, "segments", VERSION_40, VERSION_49); + version = input.readLong(); + counter = input.readInt(); + int numSegments = input.readInt(); + if (numSegments < 0) { + throw new CorruptIndexException("invalid segment count: " + numSegments + " (resource: " + input + ")"); + } + for (int seg = 0; seg < numSegments; seg++) { + String segName = input.readString(); + Codec codec = Codec.forName(input.readString()); + //System.out.println("SIS.read seg=" + seg + " codec=" + codec); + SegmentInfo info = codec.segmentInfoFormat().getSegmentInfoReader().read(directory, segName, IOContext.READ); + info.setCodec(codec); + long delGen = input.readLong(); + int delCount = input.readInt(); + if (delCount < 0 || delCount > info.getDocCount()) { + throw new CorruptIndexException("invalid deletion count: " + delCount + " vs docCount=" + info.getDocCount() + " (resource: " + input + ")"); + } + long fieldInfosGen = -1; + if (actualFormat >= VERSION_46) { + fieldInfosGen = input.readLong(); + } + long dvGen = -1; + if (actualFormat >= VERSION_49) { + dvGen = input.readLong(); + } else { + dvGen = fieldInfosGen; + } + SegmentCommitInfo siPerCommit = new SegmentCommitInfo(info, delCount, delGen, fieldInfosGen, dvGen); + if (actualFormat >= VERSION_46) { + if (actualFormat < VERSION_49) { + // Recorded per-generation files, which were buggy (see + // LUCENE-5636). We need to read and keep them so we continue to + // reference those files. Unfortunately it means that the files will + // be referenced even if the fields are updated again, until the + // segment is merged. + final int numGensUpdatesFiles = input.readInt(); + final Map> genUpdatesFiles; + if (numGensUpdatesFiles == 0) { + genUpdatesFiles = Collections.emptyMap(); + } else { + genUpdatesFiles = new HashMap<>(numGensUpdatesFiles); + for (int i = 0; i < numGensUpdatesFiles; i++) { + genUpdatesFiles.put(input.readLong(), input.readStringSet()); + } + } + siPerCommit.setGenUpdatesFiles(genUpdatesFiles); + } else { + siPerCommit.setFieldInfosFiles(input.readStringSet()); + final Map> dvUpdateFiles; + final int numDVFields = input.readInt(); + if (numDVFields == 0) { + dvUpdateFiles = Collections.emptyMap(); + } else { + dvUpdateFiles = new HashMap<>(numDVFields); + for (int i = 0; i < numDVFields; i++) { + dvUpdateFiles.put(input.readInt(), input.readStringSet()); + } + } + siPerCommit.setDocValuesUpdatesFiles(dvUpdateFiles); + } + } + add(siPerCommit); + } + userData = input.readStringStringMap(); + } else { + actualFormat = -1; + Lucene3xSegmentInfoReader.readLegacyInfos(this, directory, input, format); + Codec codec = Codec.forName("Lucene3x"); + for (SegmentCommitInfo info : this) { + info.info.setCodec(codec); + } } - else{ // file is in old format without explicit format info - counter = format; - } - - for (int i = input.readInt(); i > 0; i--) { // read segmentInfos - add(new SegmentInfo(directory, format, input)); - } - - if(format >= 0){ // in old format the version number may be at the end of the file - if (input.getFilePointer() >= input.length()) - version = System.currentTimeMillis(); // old file format without version number - else - version = input.readLong(); // read version - } - if (format <= FORMAT_CHECKSUM) { + if (actualFormat >= VERSION_48) { + CodecUtil.checkFooter(input); + } else { final long checksumNow = input.getChecksum(); final long checksumThen = input.readLong(); - if (checksumNow != checksumThen) - throw new CorruptIndexException("checksum mismatch in segments file"); + if (checksumNow != checksumThen) { + throw new CorruptIndexException("checksum mismatch in segments file (resource: " + input + ")"); + } + CodecUtil.checkEOF(input); } + success = true; - } - finally { - input.close(); + } finally { if (!success) { // Clear any segment infos we had loaded so we // have a clean slate on retry: - clear(); + this.clear(); + IOUtils.closeWhileHandlingException(input); + } else { + input.close(); } } } - /** - * This version of read uses the retry logic (for lock-less - * commits) to find the right segments file to load. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public final void read(Directory directory) throws CorruptIndexException, IOException { - + /** Find the latest commit ({@code segments_N file}) and + * load all {@link SegmentCommitInfo}s. */ + public final void read(Directory directory) throws IOException { generation = lastGeneration = -1; new FindSegmentsFile(directory) { - protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException { + @Override + protected Object doBody(String segmentFileName) throws IOException { read(directory, segmentFileName); return null; } @@ -280,66 +459,202 @@ // Only non-null after prepareCommit has been called and // before finishCommit is called - ChecksumIndexOutput pendingOutput; + IndexOutput pendingSegnOutput; - private final void write(Directory directory) throws IOException { + private static final String SEGMENT_INFO_UPGRADE_CODEC = "SegmentInfo3xUpgrade"; + private static final int SEGMENT_INFO_UPGRADE_VERSION = 0; - String segmentFileName = getNextSegmentFileName(); + private void write(Directory directory) throws IOException { + String segmentsFileName = getNextSegmentFileName(); + // Always advance the generation on write: if (generation == -1) { generation = 1; } else { generation++; } - - ChecksumIndexOutput output = new ChecksumIndexOutput(directory.createOutput(segmentFileName)); - + + IndexOutput segnOutput = null; boolean success = false; + final Set upgradedSIFiles = new HashSet<>(); + try { - output.writeInt(CURRENT_FORMAT); // write FORMAT - output.writeLong(++version); // every write changes - // the index - output.writeInt(counter); // write counter - output.writeInt(size()); // write infos - for (int i = 0; i < size(); i++) { - info(i).write(output); + segnOutput = directory.createOutput(segmentsFileName, IOContext.DEFAULT); + CodecUtil.writeHeader(segnOutput, "segments", VERSION_49); + segnOutput.writeLong(version); + segnOutput.writeInt(counter); // write counter + segnOutput.writeInt(size()); // write infos + for (SegmentCommitInfo siPerCommit : this) { + SegmentInfo si = siPerCommit.info; + segnOutput.writeString(si.name); + segnOutput.writeString(si.getCodec().getName()); + segnOutput.writeLong(siPerCommit.getDelGen()); + int delCount = siPerCommit.getDelCount(); + if (delCount < 0 || delCount > si.getDocCount()) { + throw new IllegalStateException("cannot write segment: invalid docCount segment=" + si.name + " docCount=" + si.getDocCount() + " delCount=" + delCount); + } + segnOutput.writeInt(delCount); + segnOutput.writeLong(siPerCommit.getFieldInfosGen()); + segnOutput.writeLong(siPerCommit.getDocValuesGen()); + segnOutput.writeStringSet(siPerCommit.getFieldInfosFiles()); + final Map> dvUpdatesFiles = siPerCommit.getDocValuesUpdatesFiles(); + segnOutput.writeInt(dvUpdatesFiles.size()); + for (Entry> e : dvUpdatesFiles.entrySet()) { + segnOutput.writeInt(e.getKey()); + segnOutput.writeStringSet(e.getValue()); + } + assert si.dir == directory; + + // If this segment is pre-4.x, perform a one-time + // "ugprade" to write the .si file for it: + Version version = si.getVersion(); + if (version == null || version.onOrAfter(Version.LUCENE_4_0_0_ALPHA) == false) { + + // Defensive check: we are about to write this SI in 3.x format, dropping all codec information, etc. + // so it had better be a 3.x segment or you will get very confusing errors later. + if ((si.getCodec() instanceof Lucene3xCodec) == false) { + throw new IllegalStateException("cannot write 3x SegmentInfo unless codec is Lucene3x (got: " + si.getCodec() + ")"); + } + + if (!segmentWasUpgraded(directory, si)) { + + String markerFileName = IndexFileNames.segmentFileName(si.name, "upgraded", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); + si.addFile(markerFileName); + + final String segmentFileName = write3xInfo(directory, si, IOContext.DEFAULT); + upgradedSIFiles.add(segmentFileName); + directory.sync(Collections.singletonList(segmentFileName)); + + // Write separate marker file indicating upgrade + // is completed. This way, if there is a JVM + // kill/crash, OS crash, power loss, etc. while + // writing the upgraded file, the marker file + // will be missing: + IndexOutput out = directory.createOutput(markerFileName, IOContext.DEFAULT); + try { + CodecUtil.writeHeader(out, SEGMENT_INFO_UPGRADE_CODEC, SEGMENT_INFO_UPGRADE_VERSION); + } finally { + out.close(); + } + upgradedSIFiles.add(markerFileName); + directory.sync(Collections.singletonList(markerFileName)); + } + } } - output.prepareCommit(); + segnOutput.writeStringStringMap(userData); + pendingSegnOutput = segnOutput; success = true; - pendingOutput = output; } finally { if (!success) { // We hit an exception above; try to close the file // but suppress any exception: - try { - output.close(); - } catch (Throwable t) { - // Suppress so we keep throwing the original exception + IOUtils.closeWhileHandlingException(segnOutput); + + for (String fileName : upgradedSIFiles) { + IOUtils.deleteFilesIgnoringExceptions(directory, fileName); } + + // Try not to leave a truncated segments_N file in + // the index: + IOUtils.deleteFilesIgnoringExceptions(directory, segmentsFileName); + } + } + } + + private static boolean segmentWasUpgraded(Directory directory, SegmentInfo si) { + // Check marker file: + String markerFileName = IndexFileNames.segmentFileName(si.name, "upgraded", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); + IndexInput in = null; + try { + in = directory.openInput(markerFileName, IOContext.READONCE); + if (CodecUtil.checkHeader(in, SEGMENT_INFO_UPGRADE_CODEC, SEGMENT_INFO_UPGRADE_VERSION, SEGMENT_INFO_UPGRADE_VERSION) == 0) { + return true; + } + } catch (IOException ioe) { + // Ignore: if something is wrong w/ the marker file, + // we will just upgrade again + } finally { + if (in != null) { + IOUtils.closeWhileHandlingException(in); + } + } + return false; + } + + @Deprecated + public static String write3xInfo(Directory dir, SegmentInfo si, IOContext context) throws IOException { + + // Defensive check: we are about to write this SI in 3.x format, dropping all codec information, etc. + // so it had better be a 3.x segment or you will get very confusing errors later. + if ((si.getCodec() instanceof Lucene3xCodec) == false) { + throw new IllegalStateException("cannot write 3x SegmentInfo unless codec is Lucene3x (got: " + si.getCodec() + ")"); + } + + // NOTE: this is NOT how 3.x is really written... + String fileName = IndexFileNames.segmentFileName(si.name, "", Lucene3xSegmentInfoFormat.UPGRADED_SI_EXTENSION); + si.addFile(fileName); + + //System.out.println("UPGRADE write " + fileName); + boolean success = false; + IndexOutput output = dir.createOutput(fileName, context); + try { + CodecUtil.writeHeader(output, Lucene3xSegmentInfoFormat.UPGRADED_SI_CODEC_NAME, + Lucene3xSegmentInfoFormat.UPGRADED_SI_VERSION_CURRENT); + // Write the Lucene version that created this segment, since 3.1 + output.writeString(si.getVersion().toString()); + output.writeInt(si.getDocCount()); + + output.writeStringStringMap(si.attributes()); + + output.writeByte((byte) (si.getUseCompoundFile() ? SegmentInfo.YES : SegmentInfo.NO)); + output.writeStringStringMap(si.getDiagnostics()); + output.writeStringSet(si.files()); + + output.close(); + + success = true; + } finally { + if (!success) { + IOUtils.closeWhileHandlingException(output); try { - // Try not to leave a truncated segments_N file in - // the index: - directory.deleteFile(segmentFileName); + si.dir.deleteFile(fileName); } catch (Throwable t) { // Suppress so we keep throwing the original exception } } } + + return fileName; } /** * Returns a copy of this instance, also copying each * SegmentInfo. */ - public Object clone() { - SegmentInfos sis = (SegmentInfos) super.clone(); - for(int i=0;i(size()); + for(final SegmentCommitInfo info : this) { + assert info.info.getCodec() != null; + // dont directly access segments, use add method!!! + sis.add(info.clone(cloneSegmentInfo)); + } + sis.userData = new HashMap<>(userData); + return sis; + } catch (CloneNotSupportedException e) { + throw new RuntimeException("should not happen", e); } - return sis; } /** @@ -348,52 +663,17 @@ public long getVersion() { return version; } + + /** Returns current generation. */ public long getGeneration() { return generation; } + + /** Returns last succesfully read or written generation. */ public long getLastGeneration() { return lastGeneration; } - /** - * Current version number from segments file. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static long readCurrentVersion(Directory directory) - throws CorruptIndexException, IOException { - - return ((Long) new FindSegmentsFile(directory) { - protected Object doBody(String segmentFileName) throws CorruptIndexException, IOException { - - IndexInput input = directory.openInput(segmentFileName); - - int format = 0; - long version = 0; - try { - format = input.readInt(); - if(format < 0){ - if (format < CURRENT_FORMAT) - throw new CorruptIndexException("Unknown format version: " + format); - version = input.readLong(); // read version - } - } - finally { - input.close(); - } - - if(format < 0) - return new Long(version); - - // We cannot be sure about the format of the file. - // Therefore we have to read the whole file and cannot simply seek to the version entry. - SegmentInfos sis = new SegmentInfos(); - sis.read(directory, segmentFileName); - return new Long(sis.getVersion()); - } - }.run()).longValue(); - } - /** If non-null, information about retries when loading * the segments file will be printed to this. */ @@ -403,70 +683,49 @@ /* Advanced configuration of retry logic in loading segments_N file */ - private static int defaultGenFileRetryCount = 10; - private static int defaultGenFileRetryPauseMsec = 50; private static int defaultGenLookaheadCount = 10; /** - * Advanced: set how many times to try loading the - * segments.gen file contents to determine current segment - * generation. This file is only referenced when the - * primary method (listing the directory) fails. - */ - public static void setDefaultGenFileRetryCount(int count) { - defaultGenFileRetryCount = count; - } - - /** - * @see #setDefaultGenFileRetryCount - */ - public static int getDefaultGenFileRetryCount() { - return defaultGenFileRetryCount; - } - - /** - * Advanced: set how many milliseconds to pause in between - * attempts to load the segments.gen file. - */ - public static void setDefaultGenFileRetryPauseMsec(int msec) { - defaultGenFileRetryPauseMsec = msec; - } - - /** - * @see #setDefaultGenFileRetryPauseMsec - */ - public static int getDefaultGenFileRetryPauseMsec() { - return defaultGenFileRetryPauseMsec; - } - - /** * Advanced: set how many times to try incrementing the * gen when loading the segments file. This only runs if * the primary (listing directory) and secondary (opening * segments.gen file) methods fail to find the segments * file. + * + * @lucene.experimental */ public static void setDefaultGenLookaheadCount(int count) { defaultGenLookaheadCount = count; } + /** + * Returns the {@code defaultGenLookaheadCount}. + * * @see #setDefaultGenLookaheadCount + * + * @lucene.experimental */ public static int getDefaultGenLookahedCount() { return defaultGenLookaheadCount; } /** + * Returns {@code infoStream}. + * * @see #setInfoStream */ public static PrintStream getInfoStream() { return infoStream; } + /** + * Prints the given message to the infoStream. Note, this method does not + * check for null infoStream. It assumes this check has been performed by the + * caller, which is recommended to avoid the (usually) expensive message + * creation. + */ private static void message(String message) { - if (infoStream != null) { - infoStream.println("SIS [" + Thread.currentThread().getName() + "]: " + message); - } + infoStream.println("SIS [" + Thread.currentThread().getName() + "]: " + message); } /** @@ -479,27 +738,36 @@ * commit finishing. */ public abstract static class FindSegmentsFile { - - File fileDirectory; - Directory directory; - public FindSegmentsFile(File directory) { - this.fileDirectory = directory; - } + final Directory directory; + /** Sole constructor. */ public FindSegmentsFile(Directory directory) { this.directory = directory; } - public Object run() throws CorruptIndexException, IOException { + /** Locate the most recent {@code segments} file and + * run {@link #doBody} on it. */ + public Object run() throws IOException { + return run(null); + } + + /** Run {@link #doBody} on the provided commit. */ + public Object run(IndexCommit commit) throws IOException { + if (commit != null) { + if (directory != commit.getDirectory()) + throw new IOException("the specified commit does not match the specified Directory"); + return doBody(commit.getSegmentsFileName()); + } + String segmentFileName = null; long lastGen = -1; long gen = 0; int genLookaheadCount = 0; IOException exc = null; - boolean retry = false; + int retryCount = 0; - int method = 0; + boolean useFirstMethod = true; // Loop until we succeed in calling doBody() without // hitting an IOException. An IOException most likely @@ -513,14 +781,15 @@ // it. // We have three methods for determining the current - // generation. We try the first two in parallel, and - // fall back to the third when necessary. + // generation. We try the first two in parallel (when + // useFirstMethod is true), and fall back to the third + // when necessary. while(true) { - if (0 == method) { + if (useFirstMethod) { - // Method 1: list the directory and use the highest + // List the directory and use the highest // segments_N file. This method works well as long // as there is no stale caching on the directory // contents (NOTE: NFS clients often have such stale @@ -529,118 +798,102 @@ long genA = -1; - if (directory != null) - files = directory.list(); - else - files = fileDirectory.list(); + files = directory.listAll(); - if (files != null) - genA = getCurrentSegmentGeneration(files); + if (files != null) { + genA = getLastCommitGeneration(files); + } + + if (infoStream != null) { + message("directory listing genA=" + genA); + } - message("directory listing genA=" + genA); - - // Method 2: open segments.gen and read its + // Also open segments.gen and read its // contents. Then we take the larger of the two - // gen's. This way, if either approach is hitting + // gens. This way, if either approach is hitting // a stale cache (NFS) we have a better chance of // getting the right generation. long genB = -1; - if (directory != null) { - for(int i=0;i= 2) { + // Give up on first method -- this is 3rd cycle on + // listing directory and checking gen file to + // attempt to locate the segments file. + useFirstMethod = false; + } + + // Second method: since both directory cache and // file contents cache seem to be stale, just // advance the generation. - if (1 == method || (0 == method && lastGen == gen && retry)) { - - method = 1; - + if (!useFirstMethod) { if (genLookaheadCount < defaultGenLookaheadCount) { gen++; genLookaheadCount++; - message("look ahead increment gen to " + gen); - } - } - - if (lastGen == gen) { - - // This means we're about to try the same - // segments_N last tried. This is allowed, - // exactly once, because writer could have been in - // the process of writing segments_N last time. - - if (retry) { - // OK, we've tried the same segments_N file - // twice in a row, so this must be a real - // error. We throw the original exception we - // got. - throw exc; + if (infoStream != null) { + message("look ahead increment gen to " + gen); + } } else { - retry = true; + // All attempts have failed -- throw first exc: + throw exc; } - - } else if (0 == method) { - // Segment file has advanced since our last loop, so - // reset retry: - retry = false; + } else if (lastGen == gen) { + // This means we're about to try the same + // segments_N last tried. + retryCount++; + } else { + // Segment file has advanced since our last loop + // (we made "progress"), so reset retryCount: + retryCount = 0; } lastGen = gen; @@ -651,46 +904,58 @@ try { Object v = doBody(segmentFileName); - if (exc != null) { + if (infoStream != null) { message("success on " + segmentFileName); } return v; } catch (IOException err) { + // TODO: we should use the new IO apis in Java7 to get better exceptions on why the open failed. E.g. we don't want to fall back + // if the open failed for a "different" reason (too many open files, access denied) than "the commit was in progress" + // Save the original root cause: if (exc == null) { exc = err; } - message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retry=" + retry + "; gen = " + gen); + if (infoStream != null) { + message("primary Exception on '" + segmentFileName + "': " + err + "'; will retry: retryCount=" + retryCount + "; gen = " + gen); + } - if (!retry && gen > 1) { + if (gen > 1 && useFirstMethod && retryCount == 1) { - // This is our first time trying this segments - // file (because retry is false), and, there is + // This is our second time trying this same segments + // file (because retryCount is 1), and, there is // possibly a segments_(N-1) (because gen > 1). // So, check if the segments_(N-1) exists and // try it if so: String prevSegmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, "", gen-1); - final boolean prevExists; - if (directory != null) - prevExists = directory.fileExists(prevSegmentFileName); - else - prevExists = new File(fileDirectory, prevSegmentFileName).exists(); + boolean prevExists; + try { + directory.openInput(prevSegmentFileName, IOContext.DEFAULT).close(); + prevExists = true; + } catch (IOException ioe) { + prevExists = false; + } + if (prevExists) { - message("fallback to prior segment file '" + prevSegmentFileName + "'"); + if (infoStream != null) { + message("fallback to prior segment file '" + prevSegmentFileName + "'"); + } try { Object v = doBody(prevSegmentFileName); - if (exc != null) { + if (infoStream != null) { message("success on fallback " + prevSegmentFileName); } return v; } catch (IOException err2) { - message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); + if (infoStream != null) { + message("secondary Exception on '" + prevSegmentFileName + "': " + err2 + "'; will retry"); + } } } } @@ -704,75 +969,108 @@ * during the processing that could have been caused by * a writer committing. */ - protected abstract Object doBody(String segmentFileName) throws CorruptIndexException, IOException; + protected abstract Object doBody(String segmentFileName) throws IOException; } - /** - * Returns a new SegmentInfos containg the SegmentInfo - * instances in the specified range first (inclusive) to - * last (exclusive), so total number of segments returned - * is last-first. - */ - public SegmentInfos range(int first, int last) { - SegmentInfos infos = new SegmentInfos(); - infos.addAll(super.subList(first, last)); - return infos; - } - // Carry over generation numbers from another SegmentInfos void updateGeneration(SegmentInfos other) { lastGeneration = other.lastGeneration; generation = other.generation; - version = other.version; } - public final void rollbackCommit(Directory dir) throws IOException { - if (pendingOutput != null) { - try { - pendingOutput.close(); - } catch (Throwable t) { - // Suppress so we keep throwing the original exception - // in our caller - } + void setGeneration(long generation) { + this.generation = generation; + this.lastGeneration = generation; + } + final void rollbackCommit(Directory dir) { + if (pendingSegnOutput != null) { + // Suppress so we keep throwing the original exception + // in our caller + IOUtils.closeWhileHandlingException(pendingSegnOutput); + pendingSegnOutput = null; + // Must carefully compute fileName from "generation" // since lastGeneration isn't incremented: - try { - final String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, - "", - generation); - dir.deleteFile(segmentFileName); - } catch (Throwable t) { - // Suppress so we keep throwing the original exception - // in our caller - } - pendingOutput = null; + final String segmentFileName = IndexFileNames.fileNameFromGeneration(IndexFileNames.SEGMENTS, + "", + generation); + // Suppress so we keep throwing the original exception + // in our caller + IOUtils.deleteFilesIgnoringExceptions(dir, segmentFileName); } } /** Call this to start a commit. This writes the new * segments file, but writes an invalid checksum at the * end, so that it is not visible to readers. Once this * is called you must call {@link #finishCommit} to complete - * the commit or {@link #rollbackCommit} to abort it. */ - public final void prepareCommit(Directory dir) throws IOException { - if (pendingOutput != null) + * the commit or {@link #rollbackCommit} to abort it. + *

    + * Note: {@link #changed()} should be called prior to this + * method if changes have been made to this {@link SegmentInfos} instance + *

    + **/ + final void prepareCommit(Directory dir) throws IOException { + if (pendingSegnOutput != null) { throw new IllegalStateException("prepareCommit was already called"); + } write(dir); } - public final void finishCommit(Directory dir) throws IOException { - if (pendingOutput == null) + /** Returns all file names referenced by SegmentInfo + * instances matching the provided Directory (ie files + * associated with any "external" segments are skipped). + * The returned collection is recomputed on each + * invocation. */ + public Collection files(Directory dir, boolean includeSegmentsFile) throws IOException { + HashSet files = new HashSet<>(); + if (includeSegmentsFile) { + final String segmentFileName = getSegmentsFileName(); + if (segmentFileName != null) { + files.add(segmentFileName); + } + } + final int size = size(); + for(int i=0;i + * Note: {@link #changed()} should be called prior to this + * method if changes have been made to this {@link SegmentInfos} instance + *

    + **/ + final void commit(Directory dir) throws IOException { prepareCommit(dir); finishCommit(dir); } - synchronized String segString(Directory directory) { - StringBuffer buffer = new StringBuffer(); + /** Returns readable description of this segment. */ + public String toString(Directory directory) { + StringBuilder buffer = new StringBuilder(); + buffer.append(getSegmentsFileName()).append(": "); final int count = size(); for(int i = 0; i < count; i++) { if (i > 0) { buffer.append(' '); } - final SegmentInfo info = info(i); - buffer.append(info.segString(directory)); - if (info.dir != directory) - buffer.append("**"); + final SegmentCommitInfo info = info(i); + buffer.append(info.toString(directory, 0)); } return buffer.toString(); } + + /** Return {@code userData} saved with this commit. + * + * @see IndexWriter#commit() + */ + public Map getUserData() { + return userData; + } + + void setUserData(Map data) { + if (data == null) { + userData = Collections.emptyMap(); + } else { + userData = data; + } + } + + /** Replaces all segments in this instance, but keeps + * generation, version, counter so that future commits + * remain write once. + */ + void replace(SegmentInfos other) { + rollbackSegmentInfos(other.asList()); + lastGeneration = other.lastGeneration; + } + + /** Returns sum of all segment's docCounts. Note that + * this does not include deletions */ + public int totalDocCount() { + int count = 0; + for(SegmentCommitInfo info : this) { + count += info.info.getDocCount(); + } + return count; + } + + /** Call this before committing if changes have been made to the + * segments. */ + public void changed() { + version++; + } + + /** applies all changes caused by committing a merge to this SegmentInfos */ + void applyMergeChanges(MergePolicy.OneMerge merge, boolean dropSegment) { + final Set mergedAway = new HashSet<>(merge.segments); + boolean inserted = false; + int newSegIdx = 0; + for (int segIdx = 0, cnt = segments.size(); segIdx < cnt; segIdx++) { + assert segIdx >= newSegIdx; + final SegmentCommitInfo info = segments.get(segIdx); + if (mergedAway.contains(info)) { + if (!inserted && !dropSegment) { + segments.set(segIdx, merge.info); + inserted = true; + newSegIdx++; + } + } else { + segments.set(newSegIdx, info); + newSegIdx++; + } + } + + // the rest of the segments in list are duplicates, so don't remove from map, only list! + segments.subList(newSegIdx, segments.size()).clear(); + + // Either we found place to insert segment, or, we did + // not, but only because all segments we merged becamee + // deleted while we are merging, in which case it should + // be the case that the new segment is also all deleted, + // we insert it at the beginning if it should not be dropped: + if (!inserted && !dropSegment) { + segments.add(0, merge.info); + } + } + + List createBackupSegmentInfos() { + final List list = new ArrayList<>(size()); + for(final SegmentCommitInfo info : this) { + assert info.info.getCodec() != null; + list.add(info.clone()); + } + return list; + } + + void rollbackSegmentInfos(List infos) { + this.clear(); + this.addAll(infos); + } + + /** Returns an unmodifiable {@link Iterator} of contained segments in order. */ + // @Override (comment out until Java 6) + @Override + public Iterator iterator() { + return asList().iterator(); + } + + /** Returns all contained segments as an unmodifiable {@link List} view. */ + public List asList() { + return Collections.unmodifiableList(segments); + } + + /** Returns number of {@link SegmentCommitInfo}s. */ + public int size() { + return segments.size(); + } + + /** Appends the provided {@link SegmentCommitInfo}. */ + public void add(SegmentCommitInfo si) { + segments.add(si); + } + + /** Appends the provided {@link SegmentCommitInfo}s. */ + public void addAll(Iterable sis) { + for (final SegmentCommitInfo si : sis) { + this.add(si); + } + } + + /** Clear all {@link SegmentCommitInfo}s. */ + public void clear() { + segments.clear(); + } + + /** Remove the provided {@link SegmentCommitInfo}. + * + *

    WARNING: O(N) cost */ + public void remove(SegmentCommitInfo si) { + segments.remove(si); + } + + /** Remove the {@link SegmentCommitInfo} at the + * provided index. + * + *

    WARNING: O(N) cost */ + void remove(int index) { + segments.remove(index); + } + + /** Return true if the provided {@link + * SegmentCommitInfo} is contained. + * + *

    WARNING: O(N) cost */ + boolean contains(SegmentCommitInfo si) { + return segments.contains(si); + } + + /** Returns index of the provided {@link + * SegmentCommitInfo}. + * + *

    WARNING: O(N) cost */ + int indexOf(SegmentCommitInfo si) { + return segments.indexOf(si); + } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentMergeInfo.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentMergeQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/SegmentMerger.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/SegmentMerger.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/SegmentMerger.java 17 Aug 2012 14:54:58 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/SegmentMerger.java 16 Dec 2014 11:31:41 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,792 +19,393 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Collection; -import java.util.Iterator; import java.util.List; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.document.FieldSelectorResult; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.FieldInfosWriter; +import org.apache.lucene.codecs.FieldsConsumer; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.StoredFieldsWriter; +import org.apache.lucene.codecs.TermVectorsWriter; +import org.apache.lucene.index.FieldInfo.DocValuesType; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.InfoStream; /** - * The SegmentMerger class combines two or more Segments, represented by an IndexReader ({@link #add}, - * into a single Segment. After adding the appropriate readers, call the merge method to combine the + * The SegmentMerger class combines two or more Segments, represented by an + * IndexReader, into a single Segment. Call the merge method to combine the * segments. - *

    - * If the compoundFile flag is set, then the segments will be merged into a compound file. - * - * + * * @see #merge - * @see #add */ final class SegmentMerger { + private final Directory directory; + private final int termIndexInterval; + + private final Codec codec; - /** norms header placeholder */ - static final byte[] NORMS_HEADER = new byte[]{'N','R','M',-1}; + private final IOContext context; - private Directory directory; - private String segment; - private int termIndexInterval = IndexWriter.DEFAULT_TERM_INDEX_INTERVAL; + private final MergeState mergeState; + private final FieldInfos.Builder fieldInfosBuilder; - private List readers = new ArrayList(); - private FieldInfos fieldInfos; - - private int mergedDocs; - - private CheckAbort checkAbort; - - // Whether we should merge doc stores (stored fields and - // vectors files). When all segments we are merging - // already share the same doc store files, we don't need - // to merge the doc stores. - private boolean mergeDocStores; - - /** Maximum number of contiguous documents to bulk-copy - when merging stored fields */ - private final static int MAX_RAW_MERGE_DOCS = 4192; - - /** This ctor used only by test code. - * - * @param dir The Directory to merge the other segments into - * @param name The name of the new segment - */ - SegmentMerger(Directory dir, String name) { + // note, just like in codec apis Directory 'dir' is NOT the same as segmentInfo.dir!! + SegmentMerger(List readers, SegmentInfo segmentInfo, InfoStream infoStream, Directory dir, int termIndexInterval, + MergeState.CheckAbort checkAbort, FieldInfos.FieldNumbers fieldNumbers, IOContext context, boolean validate) throws IOException { + // validate incoming readers + if (validate) { + for (AtomicReader reader : readers) { + reader.checkIntegrity(); + } + } + mergeState = new MergeState(readers, segmentInfo, infoStream, checkAbort); directory = dir; - segment = name; + this.termIndexInterval = termIndexInterval; + this.codec = segmentInfo.getCodec(); + this.context = context; + this.fieldInfosBuilder = new FieldInfos.Builder(fieldNumbers); + mergeState.segmentInfo.setDocCount(setDocMaps()); } - - SegmentMerger(IndexWriter writer, String name, MergePolicy.OneMerge merge) { - directory = writer.getDirectory(); - segment = name; - if (merge != null) - checkAbort = new CheckAbort(merge, directory); - termIndexInterval = writer.getTermIndexInterval(); - } - boolean hasProx() { - return fieldInfos.hasProx(); + /** True if any merging should happen */ + boolean shouldMerge() { + return mergeState.segmentInfo.getDocCount() > 0; } /** - * Add an IndexReader to the collection of readers that are to be merged - * @param reader - */ - final void add(IndexReader reader) { - readers.add(reader); - } - - /** - * - * @param i The index of the reader to return - * @return The ith reader to be merged - */ - final IndexReader segmentReader(int i) { - return (IndexReader) readers.get(i); - } - - /** - * Merges the readers specified by the {@link #add} method into the directory passed to the constructor + * Merges the readers into the directory passed to the constructor * @return The number of documents that were merged * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - final int merge() throws CorruptIndexException, IOException { - return merge(true); - } - - /** - * Merges the readers specified by the {@link #add} method - * into the directory passed to the constructor. - * @param mergeDocStores if false, we will not merge the - * stored fields nor vectors files - * @return The number of documents that were merged - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - final int merge(boolean mergeDocStores) throws CorruptIndexException, IOException { - - this.mergeDocStores = mergeDocStores; - + MergeState merge() throws IOException { + if (!shouldMerge()) { + throw new IllegalStateException("Merge would result in 0 document segment"); + } // NOTE: it's important to add calls to // checkAbort.work(...) if you make any changes to this // method that will spend alot of time. The frequency // of this check impacts how long // IndexWriter.close(false) takes to actually stop the // threads. + mergeFieldInfos(); + setMatchingSegmentReaders(); + long t0 = 0; + if (mergeState.infoStream.isEnabled("SM")) { + t0 = System.nanoTime(); + } + int numMerged = mergeFields(); + if (mergeState.infoStream.isEnabled("SM")) { + long t1 = System.nanoTime(); + mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge stored fields [" + numMerged + " docs]"); + } + assert numMerged == mergeState.segmentInfo.getDocCount(); - mergedDocs = mergeFields(); - mergeTerms(); - mergeNorms(); - - if (mergeDocStores && fieldInfos.hasVectors()) - mergeVectors(); - - return mergedDocs; - } - - /** - * close all IndexReaders that have been added. - * Should not be called before merge(). - * @throws IOException - */ - final void closeReaders() throws IOException { - for (int i = 0; i < readers.size(); i++) { // close readers - IndexReader reader = (IndexReader) readers.get(i); - reader.close(); + final SegmentWriteState segmentWriteState = new SegmentWriteState(mergeState.infoStream, directory, mergeState.segmentInfo, + mergeState.fieldInfos, termIndexInterval, null, context); + if (mergeState.infoStream.isEnabled("SM")) { + t0 = System.nanoTime(); } - } + mergeTerms(segmentWriteState); + if (mergeState.infoStream.isEnabled("SM")) { + long t1 = System.nanoTime(); + mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge postings [" + numMerged + " docs]"); + } - final List createCompoundFile(String fileName) - throws IOException { - CompoundFileWriter cfsWriter = - new CompoundFileWriter(directory, fileName, checkAbort); - - List files = - new ArrayList(IndexFileNames.COMPOUND_EXTENSIONS.length + 1); + if (mergeState.infoStream.isEnabled("SM")) { + t0 = System.nanoTime(); + } + if (mergeState.fieldInfos.hasDocValues()) { + mergeDocValues(segmentWriteState); + } + if (mergeState.infoStream.isEnabled("SM")) { + long t1 = System.nanoTime(); + mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge doc values [" + numMerged + " docs]"); + } - // Basic files - for (int i = 0; i < IndexFileNames.COMPOUND_EXTENSIONS.length; i++) { - String ext = IndexFileNames.COMPOUND_EXTENSIONS[i]; - - if (ext.equals(IndexFileNames.PROX_EXTENSION) && !hasProx()) - continue; - - if (mergeDocStores || (!ext.equals(IndexFileNames.FIELDS_EXTENSION) && - !ext.equals(IndexFileNames.FIELDS_INDEX_EXTENSION))) - files.add(segment + "." + ext); + if (mergeState.fieldInfos.hasNorms()) { + if (mergeState.infoStream.isEnabled("SM")) { + t0 = System.nanoTime(); + } + mergeNorms(segmentWriteState); + if (mergeState.infoStream.isEnabled("SM")) { + long t1 = System.nanoTime(); + mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge norms [" + numMerged + " docs]"); + } } - // Fieldable norm files - for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed && !fi.omitNorms) { - files.add(segment + "." + IndexFileNames.NORMS_EXTENSION); - break; + if (mergeState.fieldInfos.hasVectors()) { + if (mergeState.infoStream.isEnabled("SM")) { + t0 = System.nanoTime(); } + numMerged = mergeVectors(); + if (mergeState.infoStream.isEnabled("SM")) { + long t1 = System.nanoTime(); + mergeState.infoStream.message("SM", ((t1-t0)/1000000) + " msec to merge vectors [" + numMerged + " docs]"); + } + assert numMerged == mergeState.segmentInfo.getDocCount(); } + + // write the merged infos + FieldInfosWriter fieldInfosWriter = codec.fieldInfosFormat().getFieldInfosWriter(); + fieldInfosWriter.write(directory, mergeState.segmentInfo.name, "", mergeState.fieldInfos, context); - // Vector files - if (fieldInfos.hasVectors() && mergeDocStores) { - for (int i = 0; i < IndexFileNames.VECTOR_EXTENSIONS.length; i++) { - files.add(segment + "." + IndexFileNames.VECTOR_EXTENSIONS[i]); + return mergeState; + } + + private void mergeDocValues(SegmentWriteState segmentWriteState) throws IOException { + DocValuesConsumer consumer = codec.docValuesFormat().fieldsConsumer(segmentWriteState); + boolean success = false; + try { + for (FieldInfo field : mergeState.fieldInfos) { + DocValuesType type = field.getDocValuesType(); + if (type != null) { + if (type == DocValuesType.NUMERIC) { + List toMerge = new ArrayList<>(); + List docsWithField = new ArrayList<>(); + for (AtomicReader reader : mergeState.readers) { + NumericDocValues values = reader.getNumericDocValues(field.name); + Bits bits = reader.getDocsWithField(field.name); + if (values == null) { + values = DocValues.emptyNumeric(); + bits = new Bits.MatchNoBits(reader.maxDoc()); + } + toMerge.add(values); + docsWithField.add(bits); + } + consumer.mergeNumericField(field, mergeState, toMerge, docsWithField); + } else if (type == DocValuesType.BINARY) { + List toMerge = new ArrayList<>(); + List docsWithField = new ArrayList<>(); + for (AtomicReader reader : mergeState.readers) { + BinaryDocValues values = reader.getBinaryDocValues(field.name); + Bits bits = reader.getDocsWithField(field.name); + if (values == null) { + values = DocValues.emptyBinary(); + bits = new Bits.MatchNoBits(reader.maxDoc()); + } + toMerge.add(values); + docsWithField.add(bits); + } + consumer.mergeBinaryField(field, mergeState, toMerge, docsWithField); + } else if (type == DocValuesType.SORTED) { + List toMerge = new ArrayList<>(); + for (AtomicReader reader : mergeState.readers) { + SortedDocValues values = reader.getSortedDocValues(field.name); + if (values == null) { + values = DocValues.emptySorted(); + } + toMerge.add(values); + } + consumer.mergeSortedField(field, mergeState, toMerge); + } else if (type == DocValuesType.SORTED_SET) { + List toMerge = new ArrayList<>(); + for (AtomicReader reader : mergeState.readers) { + SortedSetDocValues values = reader.getSortedSetDocValues(field.name); + if (values == null) { + values = DocValues.emptySortedSet(); + } + toMerge.add(values); + } + consumer.mergeSortedSetField(field, mergeState, toMerge); + } else if (type == DocValuesType.SORTED_NUMERIC) { + List toMerge = new ArrayList<>(); + for (AtomicReader reader : mergeState.readers) { + SortedNumericDocValues values = reader.getSortedNumericDocValues(field.name); + if (values == null) { + values = DocValues.emptySortedNumeric(reader.maxDoc()); + } + toMerge.add(values); + } + consumer.mergeSortedNumericField(field, mergeState, toMerge); + } else { + throw new AssertionError("type=" + type); + } + } } + success = true; + } finally { + if (success) { + IOUtils.close(consumer); + } else { + IOUtils.closeWhileHandlingException(consumer); + } } - - // Now merge all added files - Iterator it = files.iterator(); - while (it.hasNext()) { - cfsWriter.addFile((String) it.next()); - } - - // Perform the merge - cfsWriter.close(); - - return files; } - private void addIndexed(IndexReader reader, FieldInfos fieldInfos, Collection names, boolean storeTermVectors, boolean storePositionWithTermVector, - boolean storeOffsetWithTermVector, boolean storePayloads, boolean omitTf) throws IOException { - Iterator i = names.iterator(); - while (i.hasNext()) { - String field = (String)i.next(); - fieldInfos.add(field, true, storeTermVectors, storePositionWithTermVector, storeOffsetWithTermVector, !reader.hasNorms(field), storePayloads, omitTf); + private void mergeNorms(SegmentWriteState segmentWriteState) throws IOException { + DocValuesConsumer consumer = codec.normsFormat().normsConsumer(segmentWriteState); + boolean success = false; + try { + for (FieldInfo field : mergeState.fieldInfos) { + if (field.hasNorms()) { + List toMerge = new ArrayList<>(); + List docsWithField = new ArrayList<>(); + for (AtomicReader reader : mergeState.readers) { + NumericDocValues norms = reader.getNormValues(field.name); + if (norms == null) { + norms = DocValues.emptyNumeric(); + } + toMerge.add(norms); + docsWithField.add(new Bits.MatchAllBits(reader.maxDoc())); + } + consumer.mergeNumericField(field, mergeState, toMerge, docsWithField); + } + } + success = true; + } finally { + if (success) { + IOUtils.close(consumer); + } else { + IOUtils.closeWhileHandlingException(consumer); + } } } - private SegmentReader[] matchingSegmentReaders; - private int[] rawDocLengths; - private int[] rawDocLengths2; - private void setMatchingSegmentReaders() { // If the i'th reader is a SegmentReader and has // identical fieldName -> number mapping, then this // array will be non-null at position i: - matchingSegmentReaders = new SegmentReader[readers.size()]; + int numReaders = mergeState.readers.size(); + mergeState.matchingSegmentReaders = new SegmentReader[numReaders]; // If this reader is a SegmentReader, and all of its // field name -> number mappings match the "merged" // FieldInfos, then we can do a bulk copy of the // stored fields: - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = (IndexReader) readers.get(i); + for (int i = 0; i < numReaders; i++) { + AtomicReader reader = mergeState.readers.get(i); + // TODO: we may be able to broaden this to + // non-SegmentReaders, since FieldInfos is now + // required? But... this'd also require exposing + // bulk-copy (TVs and stored fields) API in foreign + // readers.. if (reader instanceof SegmentReader) { SegmentReader segmentReader = (SegmentReader) reader; boolean same = true; FieldInfos segmentFieldInfos = segmentReader.getFieldInfos(); - for (int j = 0; same && j < segmentFieldInfos.size(); j++) - same = fieldInfos.fieldName(j).equals(segmentFieldInfos.fieldName(j)); - if (same) - matchingSegmentReaders[i] = segmentReader; + for (FieldInfo fi : segmentFieldInfos) { + FieldInfo other = mergeState.fieldInfos.fieldInfo(fi.number); + if (other == null || !other.name.equals(fi.name)) { + same = false; + break; + } + } + if (same) { + mergeState.matchingSegmentReaders[i] = segmentReader; + mergeState.matchedCount++; + } } } - // Used for bulk-reading raw bytes for stored fields - rawDocLengths = new int[MAX_RAW_MERGE_DOCS]; - rawDocLengths2 = new int[MAX_RAW_MERGE_DOCS]; + if (mergeState.infoStream.isEnabled("SM")) { + mergeState.infoStream.message("SM", "merge store matchedCount=" + mergeState.matchedCount + " vs " + mergeState.readers.size()); + if (mergeState.matchedCount != mergeState.readers.size()) { + mergeState.infoStream.message("SM", "" + (mergeState.readers.size() - mergeState.matchedCount) + " non-bulk merges"); + } + } } + + public void mergeFieldInfos() throws IOException { + for (AtomicReader reader : mergeState.readers) { + FieldInfos readerFieldInfos = reader.getFieldInfos(); + for (FieldInfo fi : readerFieldInfos) { + fieldInfosBuilder.add(fi); + } + } + mergeState.fieldInfos = fieldInfosBuilder.finish(); + } /** - * + * * @return The number of documents in all of the readers * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - private final int mergeFields() throws CorruptIndexException, IOException { - - if (!mergeDocStores) { - // When we are not merging by doc stores, that means - // all segments were written as part of a single - // autoCommit=false IndexWriter session, so their field - // name -> number mapping are the same. So, we start - // with the fieldInfos of the last segment in this - // case, to keep that numbering. - final SegmentReader sr = (SegmentReader) readers.get(readers.size()-1); - fieldInfos = (FieldInfos) sr.fieldInfos.clone(); - } else { - fieldInfos = new FieldInfos(); // merge field names + private int mergeFields() throws IOException { + final StoredFieldsWriter fieldsWriter = codec.storedFieldsFormat().fieldsWriter(directory, mergeState.segmentInfo, context); + + try { + return fieldsWriter.merge(mergeState); + } finally { + fieldsWriter.close(); } - - for (int i = 0; i < readers.size(); i++) { - IndexReader reader = (IndexReader) readers.get(i); - if (reader instanceof SegmentReader) { - SegmentReader segmentReader = (SegmentReader) reader; - for (int j = 0; j < segmentReader.getFieldInfos().size(); j++) { - FieldInfo fi = segmentReader.getFieldInfos().fieldInfo(j); - fieldInfos.add(fi.name, fi.isIndexed, fi.storeTermVector, fi.storePositionWithTermVector, fi.storeOffsetWithTermVector, !reader.hasNorms(fi.name), fi.storePayloads, fi.omitTf); - } - } else { - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET), true, true, true, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_POSITION), true, true, false, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET), true, false, true, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.TERMVECTOR), true, false, false, false, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.OMIT_TF), false, false, false, false, true); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.STORES_PAYLOADS), false, false, false, true, false); - addIndexed(reader, fieldInfos, reader.getFieldNames(IndexReader.FieldOption.INDEXED), false, false, false, false, false); - fieldInfos.add(reader.getFieldNames(IndexReader.FieldOption.UNINDEXED), false); - } - } - fieldInfos.write(directory, segment + ".fnm"); - - int docCount = 0; - - setMatchingSegmentReaders(); - - if (mergeDocStores) { - - // for merging we don't want to compress/uncompress the data, so to tell the FieldsReader that we're - // in merge mode, we use this FieldSelector - FieldSelector fieldSelectorMerge = new FieldSelector() { - public FieldSelectorResult accept(String fieldName) { - return FieldSelectorResult.LOAD_FOR_MERGE; - } - }; - - // merge field values - final FieldsWriter fieldsWriter = new FieldsWriter(directory, segment, fieldInfos); - - try { - for (int i = 0; i < readers.size(); i++) { - final IndexReader reader = (IndexReader) readers.get(i); - final SegmentReader matchingSegmentReader = matchingSegmentReaders[i]; - final FieldsReader matchingFieldsReader; - final boolean hasMatchingReader; - if (matchingSegmentReader != null) { - final FieldsReader fieldsReader = matchingSegmentReader.getFieldsReader(); - if (fieldsReader != null && !fieldsReader.canReadRawDocs()) { - matchingFieldsReader = null; - hasMatchingReader = false; - } else { - matchingFieldsReader = fieldsReader; - hasMatchingReader = true; - } - } else { - hasMatchingReader = false; - matchingFieldsReader = null; - } - final int maxDoc = reader.maxDoc(); - final boolean hasDeletions = reader.hasDeletions(); - for (int j = 0; j < maxDoc;) { - if (!hasDeletions || !reader.isDeleted(j)) { // skip deleted docs - if (hasMatchingReader) { - // We can optimize this case (doing a bulk - // byte copy) since the field numbers are - // identical - int start = j; - int numDocs = 0; - do { - j++; - numDocs++; - if (j >= maxDoc) - break; - if (hasDeletions && matchingSegmentReader.isDeleted(j)) { - j++; - break; - } - } while(numDocs < MAX_RAW_MERGE_DOCS); - - IndexInput stream = matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs); - fieldsWriter.addRawDocuments(stream, rawDocLengths, numDocs); - docCount += numDocs; - if (checkAbort != null) - checkAbort.work(300*numDocs); - } else { - // NOTE: it's very important to first assign - // to doc then pass it to - // termVectorsWriter.addAllDocVectors; see - // LUCENE-1282 - Document doc = reader.document(j, fieldSelectorMerge); - fieldsWriter.addDocument(doc); - j++; - docCount++; - if (checkAbort != null) - checkAbort.work(300); - } - } else - j++; - } - } - } finally { - fieldsWriter.close(); - } - - final long fdxFileLength = directory.fileLength(segment + "." + IndexFileNames.FIELDS_INDEX_EXTENSION); - - if (4+docCount*8 != fdxFileLength) - // This is most likely a bug in Sun JRE 1.6.0_04/_05; - // we detect that the bug has struck, here, and - // throw an exception to prevent the corruption from - // entering the index. See LUCENE-1282 for - // details. - throw new RuntimeException("mergeFields produced an invalid result: docCount is " + docCount + " but fdx file size is " + fdxFileLength + "; now aborting this merge to prevent index corruption"); - - } else - // If we are skipping the doc stores, that means there - // are no deletions in any of these segments, so we - // just sum numDocs() of each segment to get total docCount - for (int i = 0; i < readers.size(); i++) - docCount += ((IndexReader) readers.get(i)).numDocs(); - - return docCount; } /** * Merge the TermVectors from each of the segments into the new one. - * @throws IOException + * @throws IOException if there is a low-level IO error */ - private final void mergeVectors() throws IOException { - TermVectorsWriter termVectorsWriter = - new TermVectorsWriter(directory, segment, fieldInfos); - + private int mergeVectors() throws IOException { + final TermVectorsWriter termVectorsWriter = codec.termVectorsFormat().vectorsWriter(directory, mergeState.segmentInfo, context); + try { - for (int r = 0; r < readers.size(); r++) { - final SegmentReader matchingSegmentReader = matchingSegmentReaders[r]; - TermVectorsReader matchingVectorsReader; - final boolean hasMatchingReader; - if (matchingSegmentReader != null) { - matchingVectorsReader = matchingSegmentReader.termVectorsReaderOrig; - - // If the TV* files are an older format then they - // cannot read raw docs: - if (matchingVectorsReader != null && !matchingVectorsReader.canReadRawDocs()) { - matchingVectorsReader = null; - hasMatchingReader = false; - } else - hasMatchingReader = matchingVectorsReader != null; - - } else { - hasMatchingReader = false; - matchingVectorsReader = null; - } - IndexReader reader = (IndexReader) readers.get(r); - final boolean hasDeletions = reader.hasDeletions(); - int maxDoc = reader.maxDoc(); - for (int docNum = 0; docNum < maxDoc;) { - // skip deleted docs - if (!hasDeletions || !reader.isDeleted(docNum)) { - if (hasMatchingReader) { - // We can optimize this case (doing a bulk - // byte copy) since the field numbers are - // identical - int start = docNum; - int numDocs = 0; - do { - docNum++; - numDocs++; - if (docNum >= maxDoc) - break; - if (hasDeletions && matchingSegmentReader.isDeleted(docNum)) { - docNum++; - break; - } - } while(numDocs < MAX_RAW_MERGE_DOCS); - - matchingVectorsReader.rawDocs(rawDocLengths, rawDocLengths2, start, numDocs); - termVectorsWriter.addRawDocuments(matchingVectorsReader, rawDocLengths, rawDocLengths2, numDocs); - if (checkAbort != null) - checkAbort.work(300*numDocs); - } else { - // NOTE: it's very important to first assign - // to vectors then pass it to - // termVectorsWriter.addAllDocVectors; see - // LUCENE-1282 - TermFreqVector[] vectors = reader.getTermFreqVectors(docNum); - termVectorsWriter.addAllDocVectors(vectors); - docNum++; - if (checkAbort != null) - checkAbort.work(300); - } - } else - docNum++; - } - } + return termVectorsWriter.merge(mergeState); } finally { termVectorsWriter.close(); } - - final long tvxSize = directory.fileLength(segment + "." + IndexFileNames.VECTORS_INDEX_EXTENSION); - - if (4+mergedDocs*16 != tvxSize) - // This is most likely a bug in Sun JRE 1.6.0_04/_05; - // we detect that the bug has struck, here, and - // throw an exception to prevent the corruption from - // entering the index. See LUCENE-1282 for - // details. - throw new RuntimeException("mergeVectors produced an invalid result: mergedDocs is " + mergedDocs + " but tvx size is " + tvxSize + "; now aborting this merge to prevent index corruption"); } - private IndexOutput freqOutput = null; - private IndexOutput proxOutput = null; - private TermInfosWriter termInfosWriter = null; - private int skipInterval; - private int maxSkipLevels; - private SegmentMergeQueue queue = null; - private DefaultSkipListWriter skipListWriter = null; + // NOTE: removes any "all deleted" readers from mergeState.readers + private int setDocMaps() throws IOException { + final int numReaders = mergeState.readers.size(); - private final void mergeTerms() throws CorruptIndexException, IOException { - try { - freqOutput = directory.createOutput(segment + ".frq"); - if (hasProx()) - proxOutput = directory.createOutput(segment + ".prx"); - termInfosWriter = - new TermInfosWriter(directory, segment, fieldInfos, - termIndexInterval); - skipInterval = termInfosWriter.skipInterval; - maxSkipLevels = termInfosWriter.maxSkipLevels; - skipListWriter = new DefaultSkipListWriter(skipInterval, maxSkipLevels, mergedDocs, freqOutput, proxOutput); - queue = new SegmentMergeQueue(readers.size()); + // Remap docIDs + mergeState.docMaps = new MergeState.DocMap[numReaders]; + mergeState.docBase = new int[numReaders]; - mergeTermInfos(); + int docBase = 0; - } finally { - if (freqOutput != null) freqOutput.close(); - if (proxOutput != null) proxOutput.close(); - if (termInfosWriter != null) termInfosWriter.close(); - if (queue != null) queue.close(); - } - } + int i = 0; + while(i < mergeState.readers.size()) { - private final void mergeTermInfos() throws CorruptIndexException, IOException { - int base = 0; - final int readerCount = readers.size(); - for (int i = 0; i < readerCount; i++) { - IndexReader reader = (IndexReader) readers.get(i); - TermEnum termEnum = reader.terms(); - SegmentMergeInfo smi = new SegmentMergeInfo(base, termEnum, reader); - int[] docMap = smi.getDocMap(); - if (docMap != null) { - if (docMaps == null) { - docMaps = new int[readerCount][]; - delCounts = new int[readerCount]; - } - docMaps[i] = docMap; - delCounts[i] = smi.reader.maxDoc() - smi.reader.numDocs(); - } + final AtomicReader reader = mergeState.readers.get(i); - base += reader.numDocs(); - if (smi.next()) - queue.put(smi); // initialize queue - else - smi.close(); - } + mergeState.docBase[i] = docBase; + final MergeState.DocMap docMap = MergeState.DocMap.build(reader); + mergeState.docMaps[i] = docMap; + docBase += docMap.numDocs(); - SegmentMergeInfo[] match = new SegmentMergeInfo[readers.size()]; - - while (queue.size() > 0) { - int matchSize = 0; // pop matching terms - match[matchSize++] = (SegmentMergeInfo) queue.pop(); - Term term = match[0].term; - SegmentMergeInfo top = (SegmentMergeInfo) queue.top(); - - while (top != null && term.compareTo(top.term) == 0) { - match[matchSize++] = (SegmentMergeInfo) queue.pop(); - top = (SegmentMergeInfo) queue.top(); - } - - final int df = mergeTermInfo(match, matchSize); // add new TermInfo - - if (checkAbort != null) - checkAbort.work(df/3.0); - - while (matchSize > 0) { - SegmentMergeInfo smi = match[--matchSize]; - if (smi.next()) - queue.put(smi); // restore queue - else - smi.close(); // done with a segment - } + i++; } + + return docBase; } - private final TermInfo termInfo = new TermInfo(); // minimize consing - - /** Merge one term found in one or more segments. The array smis - * contains segments that are positioned at the same term. N - * is the number of cells in the array actually occupied. - * - * @param smis array of segments - * @param n number of cells in the array actually occupied - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - private final int mergeTermInfo(SegmentMergeInfo[] smis, int n) - throws CorruptIndexException, IOException { - final long freqPointer = freqOutput.getFilePointer(); - final long proxPointer; - if (proxOutput != null) - proxPointer = proxOutput.getFilePointer(); - else - proxPointer = 0; - - int df; - if (fieldInfos.fieldInfo(smis[0].term.field).omitTf) { // append posting data - df = appendPostingsNoTf(smis, n); - } else{ - df = appendPostings(smis, n); - } + private void mergeTerms(SegmentWriteState segmentWriteState) throws IOException { - long skipPointer = skipListWriter.writeSkip(freqOutput); + final List fields = new ArrayList<>(); + final List slices = new ArrayList<>(); - if (df > 0) { - // add an entry to the dictionary with pointers to prox and freq files - termInfo.set(df, freqPointer, proxPointer, (int) (skipPointer - freqPointer)); - termInfosWriter.add(smis[0].term, termInfo); - } + int docBase = 0; - return df; - } - - private byte[] payloadBuffer; - private int[][] docMaps; - int[][] getDocMaps() { - return docMaps; - } - private int[] delCounts; - int[] getDelCounts() { - return delCounts; - } - - /** Process postings from multiple segments all positioned on the - * same term. Writes out merged entries into freqOutput and - * the proxOutput streams. - * - * @param smis array of segments - * @param n number of cells in the array actually occupied - * @return number of documents across all segments where this term was found - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - private final int appendPostings(SegmentMergeInfo[] smis, int n) - throws CorruptIndexException, IOException { - int lastDoc = 0; - int df = 0; // number of docs w/ term - skipListWriter.resetSkip(); - boolean storePayloads = fieldInfos.fieldInfo(smis[0].term.field).storePayloads; - int lastPayloadLength = -1; // ensures that we write the first length - for (int i = 0; i < n; i++) { - SegmentMergeInfo smi = smis[i]; - TermPositions postings = smi.getPositions(); - assert postings != null; - int base = smi.base; - int[] docMap = smi.getDocMap(); - postings.seek(smi.termEnum); - while (postings.next()) { - int doc = postings.doc(); - if (docMap != null) - doc = docMap[doc]; // map around deletions - doc += base; // convert to merged space - - if (doc < 0 || (df > 0 && doc <= lastDoc)) - throw new CorruptIndexException("docs out of order (" + doc + - " <= " + lastDoc + " )"); - - df++; - - if ((df % skipInterval) == 0) { - skipListWriter.setSkipData(lastDoc, storePayloads, lastPayloadLength); - skipListWriter.bufferSkip(df); - } - - int docCode = (doc - lastDoc) << 1; // use low bit to flag freq=1 - lastDoc = doc; - - int freq = postings.freq(); - if (freq == 1) { - freqOutput.writeVInt(docCode | 1); // write doc & freq=1 - } else { - freqOutput.writeVInt(docCode); // write doc - freqOutput.writeVInt(freq); // write frequency in doc - } - - /** See {@link DocumentWriter#writePostings(Posting[], String)} for - * documentation about the encoding of positions and payloads - */ - int lastPosition = 0; // write position deltas - for (int j = 0; j < freq; j++) { - int position = postings.nextPosition(); - int delta = position - lastPosition; - if (storePayloads) { - int payloadLength = postings.getPayloadLength(); - if (payloadLength == lastPayloadLength) { - proxOutput.writeVInt(delta * 2); - } else { - proxOutput.writeVInt(delta * 2 + 1); - proxOutput.writeVInt(payloadLength); - lastPayloadLength = payloadLength; - } - if (payloadLength > 0) { - if (payloadBuffer == null || payloadBuffer.length < payloadLength) { - payloadBuffer = new byte[payloadLength]; - } - postings.getPayload(payloadBuffer, 0); - proxOutput.writeBytes(payloadBuffer, 0, payloadLength); - } - } else { - proxOutput.writeVInt(delta); - } - lastPosition = position; - } + for(int readerIndex=0;readerIndex 0 && doc <= lastDoc)) - throw new CorruptIndexException("docs out of order (" + doc + - " <= " + lastDoc + " )"); - - df++; - - if ((df % skipInterval) == 0) { - skipListWriter.setSkipData(lastDoc, false, lastPayloadLength); - skipListWriter.bufferSkip(df); - } - - int docCode = (doc - lastDoc); - lastDoc = doc; - freqOutput.writeVInt(docCode); // write doc & freq=1 - } - } - return df; - } - - private void mergeNorms() throws IOException { - byte[] normBuffer = null; - IndexOutput output = null; + final FieldsConsumer consumer = codec.postingsFormat().fieldsConsumer(segmentWriteState); + boolean success = false; try { - for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); - if (fi.isIndexed && !fi.omitNorms) { - if (output == null) { - output = directory.createOutput(segment + "." + IndexFileNames.NORMS_EXTENSION); - output.writeBytes(NORMS_HEADER,NORMS_HEADER.length); - } - for (int j = 0; j < readers.size(); j++) { - IndexReader reader = (IndexReader) readers.get(j); - int maxDoc = reader.maxDoc(); - if (normBuffer == null || normBuffer.length < maxDoc) { - // the buffer is too small for the current segment - normBuffer = new byte[maxDoc]; - } - reader.norms(fi.name, normBuffer, 0); - if (!reader.hasDeletions()) { - //optimized case for segments without deleted docs - output.writeBytes(normBuffer, maxDoc); - } else { - // this segment has deleted docs, so we have to - // check for every doc if it is deleted or not - for (int k = 0; k < maxDoc; k++) { - if (!reader.isDeleted(k)) { - output.writeByte(normBuffer[k]); - } - } - } - if (checkAbort != null) - checkAbort.work(maxDoc); - } - } - } + consumer.merge(mergeState, + new MultiFields(fields.toArray(Fields.EMPTY_ARRAY), + slices.toArray(ReaderSlice.EMPTY_ARRAY))); + success = true; } finally { - if (output != null) { - output.close(); + if (success) { + IOUtils.close(consumer); + } else { + IOUtils.closeWhileHandlingException(consumer); } } } - - final static class CheckAbort { - private double workCount; - private MergePolicy.OneMerge merge; - private Directory dir; - public CheckAbort(MergePolicy.OneMerge merge, Directory dir) { - this.merge = merge; - this.dir = dir; - } - - /** - * Records the fact that roughly units amount of work - * have been done since this method was last called. - * When adding time-consuming code into SegmentMerger, - * you should test different values for units to ensure - * that the time in between calls to merge.checkAborted - * is up to ~ 1 second. - */ - public void work(double units) throws MergePolicy.MergeAbortedException { - workCount += units; - if (workCount >= 10000.0) { - merge.checkAborted(dir); - workCount = 0; - } - } - } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentReadState.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/SegmentReader.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/SegmentReader.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/SegmentReader.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/SegmentReader.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,372 +19,113 @@ import java.io.IOException; import java.util.ArrayList; -import java.util.Arrays; -import java.util.Collection; +import java.util.Collections; import java.util.HashMap; -import java.util.HashSet; -import java.util.Iterator; +import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Set; -import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.search.DefaultSimilarity; -import org.apache.lucene.store.BufferedIndexInput; +import org.apache.lucene.codecs.Codec; +import org.apache.lucene.codecs.DocValuesFormat; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.codecs.FieldInfosFormat; +import org.apache.lucene.codecs.StoredFieldsReader; +import org.apache.lucene.codecs.TermVectorsReader; +import org.apache.lucene.index.FieldInfo.DocValuesType; +import org.apache.lucene.store.CompoundFileDirectory; import org.apache.lucene.store.Directory; -import org.apache.lucene.store.IndexInput; -import org.apache.lucene.store.IndexOutput; -import org.apache.lucene.util.BitVector; +import org.apache.lucene.store.IOContext; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.CloseableThreadLocal; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.Version; /** - * @version $Id$ + * IndexReader implementation over a single segment. + *

    + * Instances pointing to the same segment (but with different deletes, etc) + * may share the same core data. + * @lucene.experimental */ -class SegmentReader extends DirectoryIndexReader { - private String segment; - private SegmentInfo si; - private int readBufferSize; +public final class SegmentReader extends AtomicReader implements Accountable { - FieldInfos fieldInfos; - private FieldsReader fieldsReader; + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(SegmentReader.class) + + RamUsageEstimator.shallowSizeOfInstance(SegmentDocValues.class); + private static final long LONG_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(Long.class); + + private final SegmentCommitInfo si; + private final Bits liveDocs; - TermInfosReader tis; - TermVectorsReader termVectorsReaderOrig = null; - CloseableThreadLocal termVectorsLocal = new CloseableThreadLocal(); + // Normally set to si.docCount - si.delDocCount, unless we + // were created as an NRT reader from IW, in which case IW + // tells us the docCount: + private final int numDocs; - BitVector deletedDocs = null; - private boolean deletedDocsDirty = false; - private boolean normsDirty = false; - private boolean undeleteAll = false; - private int pendingDeleteCount; - - private boolean rollbackDeletedDocsDirty = false; - private boolean rollbackNormsDirty = false; - private boolean rollbackUndeleteAll = false; - private int rollbackPendingDeleteCount; - private boolean readOnly; - - IndexInput freqStream; - IndexInput proxStream; - - // optionally used for the .nrm file shared by multiple norms - private IndexInput singleNormStream; - - // Compound File Reader when based on a compound file segment - CompoundFileReader cfsReader = null; - CompoundFileReader storeCFSReader = null; + final SegmentCoreReaders core; + final SegmentDocValues segDocValues; - // indicates the SegmentReader with which the resources are being shared, - // in case this is a re-opened reader - private SegmentReader referencedSegmentReader = null; - - private class Norm { - volatile int refCount; - boolean useSingleNormStream; - - public synchronized void incRef() { - assert refCount > 0; - refCount++; + final CloseableThreadLocal> docValuesLocal = new CloseableThreadLocal>() { + @Override + protected Map initialValue() { + return new HashMap<>(); } + }; - public synchronized void decRef() throws IOException { - assert refCount > 0; - if (refCount == 1) { - close(); - } - refCount--; - + final CloseableThreadLocal> docsWithFieldLocal = new CloseableThreadLocal>() { + @Override + protected Map initialValue() { + return new HashMap<>(); } - - public Norm(IndexInput in, boolean useSingleNormStream, int number, long normSeek) - { - refCount = 1; - this.in = in; - this.number = number; - this.normSeek = normSeek; - this.useSingleNormStream = useSingleNormStream; - } + }; - private IndexInput in; - private byte[] bytes; - private boolean dirty; - private int number; - private long normSeek; - private boolean rollbackDirty; - - private void reWrite(SegmentInfo si) throws IOException { - // NOTE: norms are re-written in regular directory, not cfs - si.advanceNormGen(this.number); - IndexOutput out = directory().createOutput(si.getNormFileName(this.number)); - try { - out.writeBytes(bytes, maxDoc()); - } finally { - out.close(); - } - this.dirty = false; - } - - /** Closes the underlying IndexInput for this norm. - * It is still valid to access all other norm properties after close is called. - * @throws IOException - */ - private synchronized void close() throws IOException { - if (in != null && !useSingleNormStream) { - in.close(); - } - in = null; - } - } + final Map dvProducersByField = new HashMap<>(); + final Set dvProducers = Collections.newSetFromMap(new IdentityHashMap()); - /** - * Increments the RC of this reader, as well as - * of all norms this reader is using - */ - public synchronized void incRef() { - super.incRef(); - Iterator it = norms.values().iterator(); - while (it.hasNext()) { - Norm norm = (Norm) it.next(); - norm.incRef(); - } - } - - /** - * only increments the RC of this reader, not tof - * he norms. This is important whenever a reopen() - * creates a new SegmentReader that doesn't share - * the norms with this one - */ - private synchronized void incRefReaderNotNorms() { - super.incRef(); - } + final FieldInfos fieldInfos; - public synchronized void decRef() throws IOException { - super.decRef(); - Iterator it = norms.values().iterator(); - while (it.hasNext()) { - Norm norm = (Norm) it.next(); - norm.decRef(); - } - } + private final List dvGens = new ArrayList<>(); - private synchronized void decRefReaderNotNorms() throws IOException { - super.decRef(); - } - - Map norms = new HashMap(); - - /** The class which implements SegmentReader. */ - private static Class IMPL; - static { - try { - String name = - System.getProperty("org.apache.lucene.SegmentReader.class", - SegmentReader.class.getName()); - IMPL = Class.forName(name); - } catch (ClassNotFoundException e) { - throw new RuntimeException("cannot load SegmentReader class: " + e, e); - } catch (SecurityException se) { - try { - IMPL = Class.forName(SegmentReader.class.getName()); - } catch (ClassNotFoundException e) { - throw new RuntimeException("cannot load default SegmentReader class: " + e, e); - } - } - } - - private static Class READONLY_IMPL; - static { - try { - String name = - System.getProperty("org.apache.lucene.ReadOnlySegmentReader.class", - ReadOnlySegmentReader.class.getName()); - READONLY_IMPL = Class.forName(name); - } catch (ClassNotFoundException e) { - throw new RuntimeException("cannot load ReadOnlySegmentReader class: " + e, e); - } catch (SecurityException se) { - try { - READONLY_IMPL = Class.forName(ReadOnlySegmentReader.class.getName()); - } catch (ClassNotFoundException e) { - throw new RuntimeException("cannot load default ReadOnlySegmentReader class: " + e, e); - } - } - } - /** + * Constructs a new SegmentReader with a new core. * @throws CorruptIndexException if the index is corrupt * @throws IOException if there is a low-level IO error */ - public static SegmentReader get(SegmentInfo si) throws CorruptIndexException, IOException { - return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static SegmentReader get(boolean readOnly, SegmentInfo si) throws CorruptIndexException, IOException { - return get(readOnly, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, true); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - static SegmentReader get(SegmentInfo si, boolean doOpenStores) throws CorruptIndexException, IOException { - return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, BufferedIndexInput.BUFFER_SIZE, doOpenStores); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static SegmentReader get(SegmentInfo si, int readBufferSize) throws CorruptIndexException, IOException { - return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, readBufferSize, true); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - static SegmentReader get(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException { - return get(READ_ONLY_DEFAULT, si.dir, si, null, false, false, readBufferSize, doOpenStores); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - static SegmentReader get(boolean readOnly, SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException { - return get(readOnly, si.dir, si, null, false, false, readBufferSize, doOpenStores); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static SegmentReader get(boolean readOnly, SegmentInfos sis, SegmentInfo si, - boolean closeDir) throws CorruptIndexException, IOException { - return get(readOnly, si.dir, si, sis, closeDir, true, BufferedIndexInput.BUFFER_SIZE, true); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static SegmentReader get(Directory dir, SegmentInfo si, - SegmentInfos sis, - boolean closeDir, boolean ownDir, - int readBufferSize) - throws CorruptIndexException, IOException { - return get(READ_ONLY_DEFAULT, dir, si, sis, closeDir, ownDir, readBufferSize, true); - } - - /** - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public static SegmentReader get(boolean readOnly, - Directory dir, - SegmentInfo si, - SegmentInfos sis, - boolean closeDir, boolean ownDir, - int readBufferSize, - boolean doOpenStores) - throws CorruptIndexException, IOException { - SegmentReader instance; - try { - if (readOnly) - instance = (SegmentReader)READONLY_IMPL.newInstance(); - else - instance = (SegmentReader)IMPL.newInstance(); - } catch (Exception e) { - throw new RuntimeException("cannot load SegmentReader class: " + e, e); - } - instance.init(dir, sis, closeDir, readOnly); - instance.initialize(si, readBufferSize, doOpenStores); - return instance; - } - - private void initialize(SegmentInfo si, int readBufferSize, boolean doOpenStores) throws CorruptIndexException, IOException { - segment = si.name; + // TODO: why is this public? + public SegmentReader(SegmentCommitInfo si, int termInfosIndexDivisor, IOContext context) throws IOException { this.si = si; - this.readBufferSize = readBufferSize; - + // TODO if the segment uses CFS, we may open the CFS file twice: once for + // reading the FieldInfos (if they are not gen'd) and second time by + // SegmentCoreReaders. We can open the CFS here and pass to SCR, but then it + // results in less readable code (resource not closed where it was opened). + // Best if we could somehow read FieldInfos in SCR but not keep it there, but + // constructors don't allow returning two things... + fieldInfos = readFieldInfos(si); + core = new SegmentCoreReaders(this, si.info.dir, si, context, termInfosIndexDivisor); + segDocValues = new SegmentDocValues(); + boolean success = false; - + final Codec codec = si.info.getCodec(); try { - // Use compound file directory for some files, if it exists - Directory cfsDir = directory(); - if (si.getUseCompoundFile()) { - cfsReader = new CompoundFileReader(directory(), segment + "." + IndexFileNames.COMPOUND_FILE_EXTENSION, readBufferSize); - cfsDir = cfsReader; + if (si.hasDeletions()) { + // NOTE: the bitvector is stored using the regular directory, not cfs + liveDocs = codec.liveDocsFormat().readLiveDocs(directory(), si, IOContext.READONCE); + } else { + assert si.getDelCount() == 0; + liveDocs = null; } + numDocs = si.info.getDocCount() - si.getDelCount(); - final Directory storeDir; - - if (doOpenStores) { - if (si.getDocStoreOffset() != -1) { - if (si.getDocStoreIsCompoundFile()) { - storeCFSReader = new CompoundFileReader(directory(), si.getDocStoreSegment() + "." + IndexFileNames.COMPOUND_FILE_STORE_EXTENSION, readBufferSize); - storeDir = storeCFSReader; - } else { - storeDir = directory(); - } - } else { - storeDir = cfsDir; - } - } else - storeDir = null; - - fieldInfos = new FieldInfos(cfsDir, segment + ".fnm"); - - boolean anyProx = false; - final int numFields = fieldInfos.size(); - for(int i=0;!anyProx && i= maxDoc()) { + throw new IndexOutOfBoundsException("docID must be >= 0 and < maxDoc=" + maxDoc() + " (got docID=" + docID + ")"); + } + } + @Override + public String toString() { + // SegmentInfo.toString takes dir and number of + // *pending* deletions; so we reverse compute that here: + return si.toString(si.info.dir, si.info.getDocCount() - numDocs - si.getDelCount()); + } + /** - * @see IndexReader#getFieldNames(IndexReader.FieldOption fldOption) + * Return the name of the segment this reader is reading. */ - public Collection getFieldNames(IndexReader.FieldOption fieldOption) { - ensureOpen(); - - Set fieldSet = new HashSet(); - for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); - if (fieldOption == IndexReader.FieldOption.ALL) { - fieldSet.add(fi.name); - } - else if (!fi.isIndexed && fieldOption == IndexReader.FieldOption.UNINDEXED) { - fieldSet.add(fi.name); - } - else if (fi.omitTf && fieldOption == IndexReader.FieldOption.OMIT_TF) { - fieldSet.add(fi.name); - } - else if (fi.storePayloads && fieldOption == IndexReader.FieldOption.STORES_PAYLOADS) { - fieldSet.add(fi.name); - } - else if (fi.isIndexed && fieldOption == IndexReader.FieldOption.INDEXED) { - fieldSet.add(fi.name); - } - else if (fi.isIndexed && fi.storeTermVector == false && fieldOption == IndexReader.FieldOption.INDEXED_NO_TERMVECTOR) { - fieldSet.add(fi.name); - } - else if (fi.storeTermVector == true && - fi.storePositionWithTermVector == false && - fi.storeOffsetWithTermVector == false && - fieldOption == IndexReader.FieldOption.TERMVECTOR) { - fieldSet.add(fi.name); - } - else if (fi.isIndexed && fi.storeTermVector && fieldOption == IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR) { - fieldSet.add(fi.name); - } - else if (fi.storePositionWithTermVector && fi.storeOffsetWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION) { - fieldSet.add(fi.name); - } - else if (fi.storeOffsetWithTermVector && fi.storePositionWithTermVector == false && fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_OFFSET) { - fieldSet.add(fi.name); - } - else if ((fi.storeOffsetWithTermVector && fi.storePositionWithTermVector) && - fieldOption == IndexReader.FieldOption.TERMVECTOR_WITH_POSITION_OFFSET) { - fieldSet.add(fi.name); - } - } - return fieldSet; + public String getSegmentName() { + return si.info.name; } - - - public synchronized boolean hasNorms(String field) { - ensureOpen(); - return norms.containsKey(field); + + /** + * Return the SegmentInfoPerCommit of the segment this reader is reading. + */ + public SegmentCommitInfo getSegmentInfo() { + return si; } - static byte[] createFakeNorms(int size) { - byte[] ones = new byte[size]; - Arrays.fill(ones, DefaultSimilarity.encodeNorm(1.0f)); - return ones; + /** Returns the directory this index resides in. */ + public Directory directory() { + // Don't ensureOpen here -- in certain cases, when a + // cloned/reopened reader needs to commit, it may call + // this method on the closed original reader + return si.info.dir; } - private byte[] ones; - private byte[] fakeNorms() { - if (ones==null) ones=createFakeNorms(maxDoc()); - return ones; + // This is necessary so that cloned SegmentReaders (which + // share the underlying postings data) will map to the + // same entry in the FieldCache. See LUCENE-1579. + @Override + public Object getCoreCacheKey() { + // NOTE: if this ever changes, be sure to fix + // SegmentCoreReader.notifyCoreClosedListeners to match! + // Today it passes "this" as its coreCacheKey: + return core; } - // can return null if norms aren't stored - protected synchronized byte[] getNorms(String field) throws IOException { - Norm norm = (Norm) norms.get(field); - if (norm == null) return null; // not indexed, or norms not stored - synchronized(norm) { - if (norm.bytes == null) { // value not yet read - byte[] bytes = new byte[maxDoc()]; - norms(field, bytes, 0); - norm.bytes = bytes; // cache it - // it's OK to close the underlying IndexInput as we have cached the - // norms and will never read them again. - norm.close(); - } - return norm.bytes; - } + @Override + public Object getCombinedCoreAndDeletesKey() { + return this; } - // returns fake norms if norms aren't available - public synchronized byte[] norms(String field) throws IOException { - ensureOpen(); - byte[] bytes = getNorms(field); - if (bytes==null) bytes=fakeNorms(); - return bytes; + /** Returns term infos index divisor originally passed to + * {@link #SegmentReader(SegmentCommitInfo, int, IOContext)}. */ + public int getTermInfosIndexDivisor() { + return core.termsIndexDivisor; } - protected void doSetNorm(int doc, String field, byte value) - throws IOException { - Norm norm = (Norm) norms.get(field); - if (norm == null) // not an indexed field - return; + // returns the FieldInfo that corresponds to the given field and type, or + // null if the field does not exist, or not indexed as the requested + // DovDocValuesType. + private FieldInfo getDVField(String field, DocValuesType type) { + FieldInfo fi = fieldInfos.fieldInfo(field); + if (fi == null) { + // Field does not exist + return null; + } + if (fi.getDocValuesType() == null) { + // Field was not indexed with doc values + return null; + } + if (fi.getDocValuesType() != type) { + // Field DocValues are different than requested type + return null; + } - norm.dirty = true; // mark it dirty - normsDirty = true; - - norms(field)[doc] = value; // set the value + return fi; } - - /** Read norms into a pre-allocated array. */ - public synchronized void norms(String field, byte[] bytes, int offset) - throws IOException { - + + @Override + public NumericDocValues getNumericDocValues(String field) throws IOException { ensureOpen(); - Norm norm = (Norm) norms.get(field); - if (norm == null) { - System.arraycopy(fakeNorms(), 0, bytes, offset, maxDoc()); - return; - } - - synchronized(norm) { - if (norm.bytes != null) { // can copy from cache - System.arraycopy(norm.bytes, 0, bytes, offset, maxDoc()); - return; - } + Map dvFields = docValuesLocal.get(); - // Read from disk. norm.in may be shared across multiple norms and - // should only be used in a synchronized context. - IndexInput normStream; - if (norm.useSingleNormStream) { - normStream = singleNormStream; - } else { - normStream = norm.in; + Object previous = dvFields.get(field); + if (previous != null && previous instanceof NumericDocValues) { + return (NumericDocValues) previous; + } else { + FieldInfo fi = getDVField(field, DocValuesType.NUMERIC); + if (fi == null) { + return null; } - normStream.seek(norm.normSeek); - normStream.readBytes(bytes, offset, maxDoc()); + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; + NumericDocValues dv = dvProducer.getNumeric(fi); + dvFields.put(field, dv); + return dv; } } + @Override + public Bits getDocsWithField(String field) throws IOException { + ensureOpen(); + Map dvFields = docsWithFieldLocal.get(); - private void openNorms(Directory cfsDir, int readBufferSize) throws IOException { - long nextNormSeek = SegmentMerger.NORMS_HEADER.length; //skip header (header unused for now) - int maxDoc = maxDoc(); - for (int i = 0; i < fieldInfos.size(); i++) { - FieldInfo fi = fieldInfos.fieldInfo(i); - if (norms.containsKey(fi.name)) { - // in case this SegmentReader is being re-opened, we might be able to - // reuse some norm instances and skip loading them here - continue; + Bits previous = dvFields.get(field); + if (previous != null) { + return previous; + } else { + FieldInfo fi = fieldInfos.fieldInfo(field); + if (fi == null) { + // Field does not exist + return null; } - if (fi.isIndexed && !fi.omitNorms) { - Directory d = directory(); - String fileName = si.getNormFileName(fi.number); - if (!si.hasSeparateNorms(fi.number)) { - d = cfsDir; - } - - // singleNormFile means multiple norms share this file - boolean singleNormFile = fileName.endsWith("." + IndexFileNames.NORMS_EXTENSION); - IndexInput normInput = null; - long normSeek; - - if (singleNormFile) { - normSeek = nextNormSeek; - if (singleNormStream==null) { - singleNormStream = d.openInput(fileName, readBufferSize); - } - // All norms in the .nrm file can share a single IndexInput since - // they are only used in a synchronized context. - // If this were to change in the future, a clone could be done here. - normInput = singleNormStream; - } else { - normSeek = 0; - normInput = d.openInput(fileName); - } - - norms.put(fi.name, new Norm(normInput, singleNormFile, fi.number, normSeek)); - nextNormSeek += maxDoc; // increment also if some norms are separate + if (fi.getDocValuesType() == null) { + // Field was not indexed with doc values + return null; } + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; + Bits dv = dvProducer.getDocsWithField(fi); + dvFields.put(field, dv); + return dv; } } - // for testing only - boolean normsClosed() { - if (singleNormStream != null) { - return false; + @Override + public BinaryDocValues getBinaryDocValues(String field) throws IOException { + ensureOpen(); + FieldInfo fi = getDVField(field, DocValuesType.BINARY); + if (fi == null) { + return null; } - Iterator it = norms.values().iterator(); - while (it.hasNext()) { - Norm norm = (Norm) it.next(); - if (norm.refCount > 0) { - return false; - } + + Map dvFields = docValuesLocal.get(); + + BinaryDocValues dvs = (BinaryDocValues) dvFields.get(field); + if (dvs == null) { + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; + dvs = dvProducer.getBinary(fi); + dvFields.put(field, dvs); } - return true; + + return dvs; } - - // for testing only - boolean normsClosed(String field) { - Norm norm = (Norm) norms.get(field); - return norm.refCount == 0; - } - /** - * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal. - * @return TermVectorsReader - */ - private TermVectorsReader getTermVectorsReader() { - assert termVectorsReaderOrig != null; - TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get(); - if (tvReader == null) { - try { - tvReader = (TermVectorsReader)termVectorsReaderOrig.clone(); - } catch (CloneNotSupportedException cnse) { + @Override + public SortedDocValues getSortedDocValues(String field) throws IOException { + ensureOpen(); + Map dvFields = docValuesLocal.get(); + + Object previous = dvFields.get(field); + if (previous != null && previous instanceof SortedDocValues) { + return (SortedDocValues) previous; + } else { + FieldInfo fi = getDVField(field, DocValuesType.SORTED); + if (fi == null) { return null; } - termVectorsLocal.set(tvReader); + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; + SortedDocValues dv = dvProducer.getSorted(fi); + dvFields.put(field, dv); + return dv; } - return tvReader; } - /** Return a term frequency vector for the specified document and field. The - * vector returned contains term numbers and frequencies for all terms in - * the specified field of this document, if the field had storeTermVector - * flag set. If the flag was not set, the method returns null. - * @throws IOException - */ - public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException { - // Check if this field is invalid or has no stored term vector + @Override + public SortedNumericDocValues getSortedNumericDocValues(String field) throws IOException { ensureOpen(); - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) - return null; - - TermVectorsReader termVectorsReader = getTermVectorsReader(); - if (termVectorsReader == null) - return null; - - return termVectorsReader.get(docNumber, field); - } + Map dvFields = docValuesLocal.get(); - - public void getTermFreqVector(int docNumber, String field, TermVectorMapper mapper) throws IOException { - ensureOpen(); - FieldInfo fi = fieldInfos.fieldInfo(field); - if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) - return; - - TermVectorsReader termVectorsReader = getTermVectorsReader(); - if (termVectorsReader == null) - { - return; + Object previous = dvFields.get(field); + if (previous != null && previous instanceof SortedNumericDocValues) { + return (SortedNumericDocValues) previous; + } else { + FieldInfo fi = getDVField(field, DocValuesType.SORTED_NUMERIC); + if (fi == null) { + return null; + } + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; + SortedNumericDocValues dv = dvProducer.getSortedNumeric(fi); + dvFields.put(field, dv); + return dv; } - - - termVectorsReader.get(docNumber, field, mapper); } - - public void getTermFreqVector(int docNumber, TermVectorMapper mapper) throws IOException { + @Override + public SortedSetDocValues getSortedSetDocValues(String field) throws IOException { ensureOpen(); - if (termVectorsReaderOrig == null) - return; - - TermVectorsReader termVectorsReader = getTermVectorsReader(); - if (termVectorsReader == null) - return; - - termVectorsReader.get(docNumber, mapper); + Map dvFields = docValuesLocal.get(); + + Object previous = dvFields.get(field); + if (previous != null && previous instanceof SortedSetDocValues) { + return (SortedSetDocValues) previous; + } else { + FieldInfo fi = getDVField(field, DocValuesType.SORTED_SET); + if (fi == null) { + return null; + } + DocValuesProducer dvProducer = dvProducersByField.get(field); + assert dvProducer != null; + SortedSetDocValues dv = dvProducer.getSortedSet(fi); + dvFields.put(field, dv); + return dv; + } } - /** Return an array of term frequency vectors for the specified document. - * The array contains a vector for each vectorized field in the document. - * Each vector vector contains term numbers and frequencies for all terms - * in a given vectorized field. - * If no such fields existed, the method returns null. - * @throws IOException - */ - public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException { + @Override + public NumericDocValues getNormValues(String field) throws IOException { ensureOpen(); - if (termVectorsReaderOrig == null) - return null; - - TermVectorsReader termVectorsReader = getTermVectorsReader(); - if (termVectorsReader == null) - return null; - - return termVectorsReader.get(docNumber); + return core.getNormValues(fieldInfos, field); } - /** Returns the field infos of this segment */ - FieldInfos fieldInfos() { - return fieldInfos; + @Override + public void addCoreClosedListener(CoreClosedListener listener) { + ensureOpen(); + core.addCoreClosedListener(listener); } - /** - * Return the name of the segment this reader is reading. - */ - String getSegmentName() { - return segment; + @Override + public void removeCoreClosedListener(CoreClosedListener listener) { + ensureOpen(); + core.removeCoreClosedListener(listener); } - /** - * Return the SegmentInfo of the segment this reader is reading. - */ - SegmentInfo getSegmentInfo() { - return si; - } - - void setSegmentInfo(SegmentInfo info) { - si = info; - } - - void startCommit() { - super.startCommit(); - rollbackDeletedDocsDirty = deletedDocsDirty; - rollbackNormsDirty = normsDirty; - rollbackUndeleteAll = undeleteAll; - rollbackPendingDeleteCount = pendingDeleteCount; - Iterator it = norms.values().iterator(); - while (it.hasNext()) { - Norm norm = (Norm) it.next(); - norm.rollbackDirty = norm.dirty; + @Override + public long ramBytesUsed() { + ensureOpen(); + long ramBytesUsed = BASE_RAM_BYTES_USED; + ramBytesUsed += dvGens.size() * LONG_RAM_BYTES_USED; + ramBytesUsed += dvProducers.size() * RamUsageEstimator.NUM_BYTES_OBJECT_REF; + ramBytesUsed += dvProducersByField.size() * 2 * RamUsageEstimator.NUM_BYTES_OBJECT_REF; + if (dvProducers != null) { + for (DocValuesProducer producer : dvProducers) { + ramBytesUsed += producer.ramBytesUsed(); + } } + if (core != null) { + ramBytesUsed += core.ramBytesUsed(); + } + return ramBytesUsed; } - - void rollbackCommit() { - super.rollbackCommit(); - deletedDocsDirty = rollbackDeletedDocsDirty; - normsDirty = rollbackNormsDirty; - undeleteAll = rollbackUndeleteAll; - pendingDeleteCount = rollbackPendingDeleteCount; - Iterator it = norms.values().iterator(); - while (it.hasNext()) { - Norm norm = (Norm) it.next(); - norm.dirty = norm.rollbackDirty; + + @Override + public void checkIntegrity() throws IOException { + ensureOpen(); + + // stored fields + getFieldsReader().checkIntegrity(); + + // term vectors + TermVectorsReader termVectorsReader = getTermVectorsReader(); + if (termVectorsReader != null) { + termVectorsReader.checkIntegrity(); } + + // terms/postings + if (core.fields != null) { + core.fields.checkIntegrity(); + } + + // norms + if (core.normsProducer != null) { + core.normsProducer.checkIntegrity(); + } + + // docvalues + if (dvProducers != null) { + for (DocValuesProducer producer : dvProducers) { + producer.checkIntegrity(); + } + } } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentTermDocs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentTermEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentTermPositionVector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentTermPositions.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentTermVector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SegmentWriteState.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/SerialMergeScheduler.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/SerialMergeScheduler.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/SerialMergeScheduler.java 17 Aug 2012 14:55:01 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/SerialMergeScheduler.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -23,11 +23,15 @@ * sequentially, using the current thread. */ public class SerialMergeScheduler extends MergeScheduler { + /** Sole constructor. */ + public SerialMergeScheduler() { + } + /** Just do the merges in sequence. We do this * "synchronized" so that even if the application is using * multiple threads, only one merge may run at a time. */ - synchronized public void merge(IndexWriter writer) - throws CorruptIndexException, IOException { + @Override + synchronized public void merge(IndexWriter writer, MergeTrigger trigger, boolean newMergesFound) throws IOException { while(true) { MergePolicy.OneMerge merge = writer.getNextMerge(); @@ -37,5 +41,6 @@ } } + @Override public void close() {} } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SimpleMergedSegmentWarmer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SingleTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SingletonSortedNumericDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SingletonSortedSetDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SlowCompositeReaderWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/SnapshotDeletionPolicy.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/SnapshotDeletionPolicy.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/SnapshotDeletionPolicy.java 17 Aug 2012 14:54:59 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/SnapshotDeletionPolicy.java 16 Dec 2014 11:31:42 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,113 +18,247 @@ */ import java.util.Collection; +import java.util.HashMap; import java.util.List; import java.util.ArrayList; +import java.util.Map; import java.io.IOException; + import org.apache.lucene.store.Directory; -/** A {@link IndexDeletionPolicy} that wraps around any other - * {@link IndexDeletionPolicy} and adds the ability to hold and - * later release a single "snapshot" of an index. While - * the snapshot is held, the {@link IndexWriter} will not - * remove any files associated with it even if the index is - * otherwise being actively, arbitrarily changed. Because - * we wrap another arbitrary {@link IndexDeletionPolicy}, this - * gives you the freedom to continue using whatever {@link - * IndexDeletionPolicy} you would normally want to use with your - * index. Note that you can re-use a single instance of - * SnapshotDeletionPolicy across multiple writers as long - * as they are against the same index Directory. Any - * snapshot held when a writer is closed will "survive" - * when the next writer is opened. - * - *

    WARNING: This API is a new and experimental and - * may suddenly change.

    */ +/** + * An {@link IndexDeletionPolicy} that wraps any other + * {@link IndexDeletionPolicy} and adds the ability to hold and later release + * snapshots of an index. While a snapshot is held, the {@link IndexWriter} will + * not remove any files associated with it even if the index is otherwise being + * actively, arbitrarily changed. Because we wrap another arbitrary + * {@link IndexDeletionPolicy}, this gives you the freedom to continue using + * whatever {@link IndexDeletionPolicy} you would normally want to use with your + * index. + * + *

    + * This class maintains all snapshots in-memory, and so the information is not + * persisted and not protected against system failures. If persistence is + * important, you can use {@link PersistentSnapshotDeletionPolicy}. + * + * @lucene.experimental + */ +public class SnapshotDeletionPolicy extends IndexDeletionPolicy { -public class SnapshotDeletionPolicy implements IndexDeletionPolicy { + /** Records how many snapshots are held against each + * commit generation */ + protected final Map refCounts = new HashMap<>(); - private IndexCommit lastCommit; - private IndexDeletionPolicy primary; - private String snapshot; + /** Used to map gen to IndexCommit. */ + protected final Map indexCommits = new HashMap<>(); + /** Wrapped {@link IndexDeletionPolicy} */ + private final IndexDeletionPolicy primary; + + /** Most recently committed {@link IndexCommit}. */ + protected IndexCommit lastCommit; + + /** Used to detect misuse */ + private boolean initCalled; + + /** Sole constructor, taking the incoming {@link + * IndexDeletionPolicy} to wrap. */ public SnapshotDeletionPolicy(IndexDeletionPolicy primary) { this.primary = primary; } - public synchronized void onInit(List commits) throws IOException { + @Override + public synchronized void onCommit(List commits) + throws IOException { + primary.onCommit(wrapCommits(commits)); + lastCommit = commits.get(commits.size() - 1); + } + + @Override + public synchronized void onInit(List commits) + throws IOException { + initCalled = true; primary.onInit(wrapCommits(commits)); - lastCommit = (IndexCommit) commits.get(commits.size()-1); + for(IndexCommit commit : commits) { + if (refCounts.containsKey(commit.getGeneration())) { + indexCommits.put(commit.getGeneration(), commit); + } + } + if (!commits.isEmpty()) { + lastCommit = commits.get(commits.size() - 1); + } } - public synchronized void onCommit(List commits) throws IOException { - primary.onCommit(wrapCommits(commits)); - lastCommit = (IndexCommit) commits.get(commits.size()-1); + /** + * Release a snapshotted commit. + * + * @param commit + * the commit previously returned by {@link #snapshot} + */ + public synchronized void release(IndexCommit commit) throws IOException { + long gen = commit.getGeneration(); + releaseGen(gen); } - /** Take a snapshot of the most recent commit to the - * index. You must call release() to free this snapshot. - * Note that while the snapshot is held, the files it - * references will not be deleted, which will consume - * additional disk space in your index. If you take a - * snapshot at a particularly bad time (say just before - * you call optimize()) then in the worst case this could - * consume an extra 1X of your total index size, until - * you release the snapshot. */ - // TODO 3.0: change this to return IndexCommit instead - public synchronized IndexCommitPoint snapshot() { - if (snapshot == null) - snapshot = lastCommit.getSegmentsFileName(); - else - throw new IllegalStateException("snapshot is already set; please call release() first"); + /** Release a snapshot by generation. */ + protected void releaseGen(long gen) throws IOException { + if (!initCalled) { + throw new IllegalStateException("this instance is not being used by IndexWriter; be sure to use the instance returned from writer.getConfig().getIndexDeletionPolicy()"); + } + Integer refCount = refCounts.get(gen); + if (refCount == null) { + throw new IllegalArgumentException("commit gen=" + gen + " is not currently snapshotted"); + } + int refCountInt = refCount.intValue(); + assert refCountInt > 0; + refCountInt--; + if (refCountInt == 0) { + refCounts.remove(gen); + indexCommits.remove(gen); + } else { + refCounts.put(gen, refCountInt); + } + } + + /** Increments the refCount for this {@link IndexCommit}. */ + protected synchronized void incRef(IndexCommit ic) { + long gen = ic.getGeneration(); + Integer refCount = refCounts.get(gen); + int refCountInt; + if (refCount == null) { + indexCommits.put(gen, lastCommit); + refCountInt = 0; + } else { + refCountInt = refCount.intValue(); + } + refCounts.put(gen, refCountInt+1); + } + + /** + * Snapshots the last commit and returns it. Once a commit is 'snapshotted,' it is protected + * from deletion (as long as this {@link IndexDeletionPolicy} is used). The + * snapshot can be removed by calling {@link #release(IndexCommit)} followed + * by a call to {@link IndexWriter#deleteUnusedFiles()}. + * + *

    + * NOTE: while the snapshot is held, the files it references will not + * be deleted, which will consume additional disk space in your index. If you + * take a snapshot at a particularly bad time (say just before you call + * forceMerge) then in the worst case this could consume an extra 1X of your + * total index size, until you release the snapshot. + * + * @throws IllegalStateException + * if this index does not have any commits yet + * @return the {@link IndexCommit} that was snapshotted. + */ + public synchronized IndexCommit snapshot() throws IOException { + if (!initCalled) { + throw new IllegalStateException("this instance is not being used by IndexWriter; be sure to use the instance returned from writer.getConfig().getIndexDeletionPolicy()"); + } + if (lastCommit == null) { + // No commit yet, eg this is a new IndexWriter: + throw new IllegalStateException("No index commit to snapshot"); + } + + incRef(lastCommit); + return lastCommit; } - /** Release the currently held snapshot. */ - public synchronized void release() { - if (snapshot != null) - snapshot = null; - else - throw new IllegalStateException("snapshot was not set; please call snapshot() first"); + /** Returns all IndexCommits held by at least one snapshot. */ + public synchronized List getSnapshots() { + return new ArrayList<>(indexCommits.values()); } - private class MyCommitPoint extends IndexCommit { - IndexCommit cp; - MyCommitPoint(IndexCommit cp) { - this.cp = cp; + /** Returns the total number of snapshots currently held. */ + public synchronized int getSnapshotCount() { + int total = 0; + for(Integer refCount : refCounts.values()) { + total += refCount.intValue(); } - public String getSegmentsFileName() { - return cp.getSegmentsFileName(); + + return total; + } + + /** Retrieve an {@link IndexCommit} from its generation; + * returns null if this IndexCommit is not currently + * snapshotted */ + public synchronized IndexCommit getIndexCommit(long gen) { + return indexCommits.get(gen); + } + + /** Wraps each {@link IndexCommit} as a {@link + * SnapshotCommitPoint}. */ + private List wrapCommits(List commits) { + List wrappedCommits = new ArrayList<>(commits.size()); + for (IndexCommit ic : commits) { + wrappedCommits.add(new SnapshotCommitPoint(ic)); } - public Collection getFileNames() throws IOException { - return cp.getFileNames(); + return wrappedCommits; + } + + /** Wraps a provided {@link IndexCommit} and prevents it + * from being deleted. */ + private class SnapshotCommitPoint extends IndexCommit { + + /** The {@link IndexCommit} we are preventing from deletion. */ + protected IndexCommit cp; + + /** Creates a {@code SnapshotCommitPoint} wrapping the provided + * {@link IndexCommit}. */ + protected SnapshotCommitPoint(IndexCommit cp) { + this.cp = cp; } - public Directory getDirectory() { - return cp.getDirectory(); + + @Override + public String toString() { + return "SnapshotDeletionPolicy.SnapshotCommitPoint(" + cp + ")"; } + + @Override public void delete() { - synchronized(SnapshotDeletionPolicy.this) { + synchronized (SnapshotDeletionPolicy.this) { // Suppress the delete request if this commit point is - // our current snapshot. - if (snapshot == null || !snapshot.equals(getSegmentsFileName())) + // currently snapshotted. + if (!refCounts.containsKey(cp.getGeneration())) { cp.delete(); + } } } - public boolean isDeleted() { - return cp.isDeleted(); + + @Override + public Directory getDirectory() { + return cp.getDirectory(); } - public long getVersion() { - return cp.getVersion(); + + @Override + public Collection getFileNames() throws IOException { + return cp.getFileNames(); } + + @Override public long getGeneration() { return cp.getGeneration(); } - } - private List wrapCommits(List commits) { - final int count = commits.size(); - List myCommits = new ArrayList(count); - for(int i=0;i getUserData() throws IOException { + return cp.getUserData(); + } + + @Override + public boolean isDeleted() { + return cp.isDeleted(); + } + + @Override + public int getSegmentCount() { + return cp.getSegmentCount(); + } } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedDocValuesTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedDocValuesWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedNumericDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedNumericDocValuesWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedSetDocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedSetDocValuesTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedSetDocValuesWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/SortedTermVectorMapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/StaleReaderException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/StandardDirectoryReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/StoredFieldVisitor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/StoredFieldsWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/StoredFieldsWriterPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/StoredFieldsWriterPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/Term.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/Term.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/Term.java 17 Aug 2012 14:55:02 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/Term.java 16 Dec 2014 11:31:41 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,106 +17,142 @@ * limitations under the License. */ +import java.nio.ByteBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.CharsetDecoder; +import java.nio.charset.CodingErrorAction; +import java.nio.charset.StandardCharsets; + +import org.apache.lucene.util.BytesRef; + /** A Term represents a word from text. This is the unit of search. It is composed of two elements, the text of the word, as a string, and the name of - the field that the text occured in, an interned string. + the field that the text occurred in. Note that terms may represent more than words from text fields, but also things like dates, email addresses, urls, etc. */ -public final class Term implements Comparable, java.io.Serializable { +public final class Term implements Comparable { String field; - String text; + BytesRef bytes; + /** Constructs a Term with the given field and bytes. + *

    Note that a null field or null bytes value results in undefined + * behavior for most Lucene APIs that accept a Term parameter. + * + *

    WARNING: the provided BytesRef is not copied, but used directly. + * Therefore the bytes should not be modified after construction, for + * example, you should clone a copy by {@link BytesRef#deepCopyOf} + * rather than pass reused bytes from a TermsEnum. + */ + public Term(String fld, BytesRef bytes) { + field = fld; + this.bytes = bytes; + } + /** Constructs a Term with the given field and text. *

    Note that a null field or null text value results in undefined * behavior for most Lucene APIs that accept a Term parameter. */ - public Term(String fld, String txt) { - this(fld, txt, true); + public Term(String fld, String text) { + this(fld, new BytesRef(text)); } /** Constructs a Term with the given field and empty text. * This serves two purposes: 1) reuse of a Term with the same field. * 2) pattern for a query. * - * @param fld + * @param fld field's name */ public Term(String fld) { - this(fld, "", true); + this(fld, new BytesRef()); } - Term(String fld, String txt, boolean intern) { - field = intern ? fld.intern() : fld; // field names are interned - text = txt; // unless already known to be - } - - /** Returns the field of this term, an interned string. The field indicates + /** Returns the field of this term. The field indicates the part of a document which this term came from. */ public final String field() { return field; } /** Returns the text of this term. In the case of words, this is simply the text of the word. In the case of dates and other types, this is an encoding of the object as a string. */ - public final String text() { return text; } + public final String text() { + return toString(bytes); + } - /** - * Optimized construction of new Terms by reusing same field as this Term - * - avoids field.intern() overhead - * @param text The text of the new term (field is implicitly same as this Term instance) - * @return A new Term - */ - public Term createTerm(String text) - { - return new Term(field,text,false); + /** Returns human-readable form of the term text. If the term is not unicode, + * the raw bytes will be printed instead. */ + public static final String toString(BytesRef termText) { + // the term might not be text, but usually is. so we make a best effort + CharsetDecoder decoder = StandardCharsets.UTF_8.newDecoder() + .onMalformedInput(CodingErrorAction.REPORT) + .onUnmappableCharacter(CodingErrorAction.REPORT); + try { + return decoder.decode(ByteBuffer.wrap(termText.bytes, termText.offset, termText.length)).toString(); + } catch (CharacterCodingException e) { + return termText.toString(); + } } - /** Compares two terms, returning true iff they have the same - field and text. */ - public final boolean equals(Object o) { - if (o == this) + /** Returns the bytes of this term. */ + public final BytesRef bytes() { return bytes; } + + @Override + public boolean equals(Object obj) { + if (this == obj) return true; - if (o == null) + if (obj == null) return false; - if (!(o instanceof Term)) + if (getClass() != obj.getClass()) return false; - Term other = (Term)o; - return field == other.field && text.equals(other.text); + Term other = (Term) obj; + if (field == null) { + if (other.field != null) + return false; + } else if (!field.equals(other.field)) + return false; + if (bytes == null) { + if (other.bytes != null) + return false; + } else if (!bytes.equals(other.bytes)) + return false; + return true; } - /** Combines the hashCode() of the field and the text. */ - public final int hashCode() { - return field.hashCode() + text.hashCode(); + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + ((field == null) ? 0 : field.hashCode()); + result = prime * result + ((bytes == null) ? 0 : bytes.hashCode()); + return result; } - public int compareTo(Object other) { - return compareTo((Term)other); - } - /** Compares two terms, returning a negative integer if this term belongs before the argument, zero if this term is equal to the argument, and a positive integer if this term belongs after the argument. The ordering of terms is first by field, then by text.*/ + @Override public final int compareTo(Term other) { - if (field == other.field) // fields are interned - return text.compareTo(other.text); - else + if (field.equals(other.field)) { + return bytes.compareTo(other.bytes); + } else { return field.compareTo(other.field); + } } - /** Resets the field and text of a Term. */ - final void set(String fld, String txt) { + /** + * Resets the field and text of a Term. + *

    WARNING: the provided BytesRef is not copied, but used directly. + * Therefore the bytes should not be modified after construction, for + * example, you should clone a copy rather than pass reused bytes from + * a TermsEnum. + */ + final void set(String fld, BytesRef bytes) { field = fld; - text = txt; + this.bytes = bytes; } - public final String toString() { return field + ":" + text; } - - private void readObject(java.io.ObjectInputStream in) - throws java.io.IOException, ClassNotFoundException - { - in.defaultReadObject(); - field = field.intern(); - } + @Override + public final String toString() { return field + ":" + text(); } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermBuffer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermContext.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermDocs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermFreqVector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermInfo.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermInfosReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermInfosWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermPositionVector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermPositions.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermState.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorEntry.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorEntryFreqSortedComparator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorMapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorOffsetInfo.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsConsumer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsConsumerPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsTermsWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsTermsWriterPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsTermsWriterPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermVectorsWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/Terms.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/TermsHash.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/TermsHash.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/TermsHash.java 17 Aug 2012 14:54:59 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/TermsHash.java 16 Dec 2014 11:31:44 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,217 +17,86 @@ * limitations under the License. */ -import java.util.Collection; -import java.util.Map; -import java.util.HashMap; -import java.util.Iterator; -import java.util.HashSet; -import java.util.Arrays; import java.io.IOException; +import java.util.HashMap; +import java.util.Map; -import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.IntBlockPool; -/** This class implements {@link InvertedDocConsumer}, which - * is passed each token produced by the analyzer on each - * field. It stores these tokens in a hash table, and - * allocates separate byte streams per token. Consumers of - * this class, eg {@link FreqProxTermsWriter} and {@link - * TermVectorsTermsWriter}, write their own byte streams - * under each term. - */ +/** This class is passed each token produced by the analyzer + * on each field during indexing, and it stores these + * tokens in a hash table, and allocates separate byte + * streams per token. Consumers of this class, eg {@link + * FreqProxTermsWriter} and {@link TermVectorsConsumer}, + * write their own byte streams under each term. */ +abstract class TermsHash { -final class TermsHash extends InvertedDocConsumer { - - final TermsHashConsumer consumer; final TermsHash nextTermsHash; - final int bytesPerPosting; - final int postingsFreeChunk; - final DocumentsWriter docWriter; - - private TermsHash primaryTermsHash; - private RawPostingList[] postingsFreeList = new RawPostingList[1]; - private int postingsFreeCount; - private int postingsAllocCount; - boolean trackAllocations; + final IntBlockPool intPool; + final ByteBlockPool bytePool; + ByteBlockPool termBytePool; + final Counter bytesUsed; - public TermsHash(final DocumentsWriter docWriter, boolean trackAllocations, final TermsHashConsumer consumer, final TermsHash nextTermsHash) { - this.docWriter = docWriter; - this.consumer = consumer; - this.nextTermsHash = nextTermsHash; - this.trackAllocations = trackAllocations; + final DocumentsWriterPerThread.DocState docState; - // Why + 4*POINTER_NUM_BYTE below? - // +1: Posting is referenced by postingsFreeList array - // +3: Posting is referenced by hash, which - // targets 25-50% fill factor; approximate this - // as 3X # pointers - bytesPerPosting = consumer.bytesPerPosting() + 4*DocumentsWriter.POINTER_NUM_BYTE; - postingsFreeChunk = (int) (DocumentsWriter.BYTE_BLOCK_SIZE / bytesPerPosting); - } + final boolean trackAllocations; - InvertedDocConsumerPerThread addThread(DocInverterPerThread docInverterPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, null); - } + TermsHash(final DocumentsWriterPerThread docWriter, boolean trackAllocations, TermsHash nextTermsHash) { + this.docState = docWriter.docState; + this.trackAllocations = trackAllocations; + this.nextTermsHash = nextTermsHash; + this.bytesUsed = trackAllocations ? docWriter.bytesUsed : Counter.newCounter(); + intPool = new IntBlockPool(docWriter.intBlockAllocator); + bytePool = new ByteBlockPool(docWriter.byteBlockAllocator); - TermsHashPerThread addThread(DocInverterPerThread docInverterPerThread, TermsHashPerThread primaryPerThread) { - return new TermsHashPerThread(docInverterPerThread, this, nextTermsHash, primaryPerThread); + if (nextTermsHash != null) { + // We are primary + termBytePool = bytePool; + nextTermsHash.termBytePool = bytePool; + } } - void setFieldInfos(FieldInfos fieldInfos) { - this.fieldInfos = fieldInfos; - consumer.setFieldInfos(fieldInfos); - } - - synchronized public void abort() { - consumer.abort(); - if (nextTermsHash != null) - nextTermsHash.abort(); - } - - void shrinkFreePostings(Map threadsAndFields, DocumentsWriter.FlushState state) { - - assert postingsFreeCount == postingsAllocCount: Thread.currentThread().getName() + ": postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount + " consumer=" + consumer; - - final int newSize = ArrayUtil.getShrinkSize(postingsFreeList.length, postingsAllocCount); - if (newSize != postingsFreeList.length) { - RawPostingList[] newArray = new RawPostingList[newSize]; - System.arraycopy(postingsFreeList, 0, newArray, 0, postingsFreeCount); - postingsFreeList = newArray; + public void abort() { + try { + reset(); + } finally { + if (nextTermsHash != null) { + nextTermsHash.abort(); + } } } - synchronized void closeDocStore(DocumentsWriter.FlushState state) throws IOException { - consumer.closeDocStore(state); - if (nextTermsHash != null) - nextTermsHash.closeDocStore(state); + // Clear all state + void reset() { + // we don't reuse so we drop everything and don't fill with 0 + intPool.reset(false, false); + bytePool.reset(false, false); } - synchronized void flush(Map threadsAndFields, final DocumentsWriter.FlushState state) throws IOException { - Map childThreadsAndFields = new HashMap(); - Map nextThreadsAndFields; - - if (nextTermsHash != null) - nextThreadsAndFields = new HashMap(); - else - nextThreadsAndFields = null; - - Iterator it = threadsAndFields.entrySet().iterator(); - while(it.hasNext()) { - - Map.Entry entry = (Map.Entry) it.next(); - - TermsHashPerThread perThread = (TermsHashPerThread) entry.getKey(); - - Collection fields = (Collection) entry.getValue(); - - Iterator fieldsIt = fields.iterator(); - Collection childFields = new HashSet(); - Collection nextChildFields; - - if (nextTermsHash != null) - nextChildFields = new HashSet(); - else - nextChildFields = null; - - while(fieldsIt.hasNext()) { - TermsHashPerField perField = (TermsHashPerField) fieldsIt.next(); - childFields.add(perField.consumer); - if (nextTermsHash != null) - nextChildFields.add(perField.nextPerField); + void flush(Map fieldsToFlush, final SegmentWriteState state) throws IOException { + if (nextTermsHash != null) { + Map nextChildFields = new HashMap<>(); + for (final Map.Entry entry : fieldsToFlush.entrySet()) { + nextChildFields.put(entry.getKey(), entry.getValue().nextPerField); } - - childThreadsAndFields.put(perThread.consumer, childFields); - if (nextTermsHash != null) - nextThreadsAndFields.put(perThread.nextPerThread, nextChildFields); + nextTermsHash.flush(nextChildFields, state); } - - consumer.flush(childThreadsAndFields, state); - - shrinkFreePostings(threadsAndFields, state); - - if (nextTermsHash != null) - nextTermsHash.flush(nextThreadsAndFields, state); } - synchronized public boolean freeRAM() { + abstract TermsHashPerField addField(FieldInvertState fieldInvertState, final FieldInfo fieldInfo); - if (!trackAllocations) - return false; - - boolean any; - final int numToFree; - if (postingsFreeCount >= postingsFreeChunk) - numToFree = postingsFreeChunk; - else - numToFree = postingsFreeCount; - any = numToFree > 0; - if (any) { - Arrays.fill(postingsFreeList, postingsFreeCount-numToFree, postingsFreeCount, null); - postingsFreeCount -= numToFree; - postingsAllocCount -= numToFree; - docWriter.bytesAllocated(-numToFree * bytesPerPosting); - any = true; + void finishDocument() throws IOException { + if (nextTermsHash != null) { + nextTermsHash.finishDocument(); } - - if (nextTermsHash != null) - any |= nextTermsHash.freeRAM(); - - return any; } - synchronized public void recyclePostings(final RawPostingList[] postings, final int numPostings) { - - assert postings.length >= numPostings; - - // Move all Postings from this ThreadState back to our - // free list. We pre-allocated this array while we were - // creating Postings to make sure it's large enough - assert postingsFreeCount + numPostings <= postingsFreeList.length; - System.arraycopy(postings, 0, postingsFreeList, postingsFreeCount, numPostings); - postingsFreeCount += numPostings; - } - - synchronized public void getPostings(final RawPostingList[] postings) { - - assert docWriter.writer.testPoint("TermsHash.getPostings start"); - - assert postingsFreeCount <= postingsFreeList.length; - assert postingsFreeCount <= postingsAllocCount: "postingsFreeCount=" + postingsFreeCount + " postingsAllocCount=" + postingsAllocCount; - - final int numToCopy; - if (postingsFreeCount < postings.length) - numToCopy = postingsFreeCount; - else - numToCopy = postings.length; - final int start = postingsFreeCount-numToCopy; - assert start >= 0; - assert start + numToCopy <= postingsFreeList.length; - assert numToCopy <= postings.length; - System.arraycopy(postingsFreeList, start, - postings, 0, numToCopy); - - // Directly allocate the remainder if any - if (numToCopy != postings.length) { - final int extra = postings.length - numToCopy; - final int newPostingsAllocCount = postingsAllocCount + extra; - - consumer.createPostings(postings, numToCopy, extra); - assert docWriter.writer.testPoint("TermsHash.getPostings after create"); - postingsAllocCount += extra; - - if (trackAllocations) - docWriter.bytesAllocated(extra * bytesPerPosting); - - if (newPostingsAllocCount > postingsFreeList.length) - // Pre-allocate the postingsFreeList so it's large - // enough to hold all postings we've given out - postingsFreeList = new RawPostingList[ArrayUtil.getNextSize(newPostingsAllocCount)]; + void startDocument() throws IOException { + if (nextTermsHash != null) { + nextTermsHash.startDocument(); } - - postingsFreeCount -= numToCopy; - - if (trackAllocations) - docWriter.bytesUsed(postings.length * bytesPerPosting); } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermsHashConsumer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermsHashConsumerPerField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermsHashConsumerPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/TermsHashPerField.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/TermsHashPerField.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/TermsHashPerField.java 17 Aug 2012 14:54:58 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/TermsHashPerField.java 16 Dec 2014 11:31:43 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.index; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,458 +18,186 @@ */ import java.io.IOException; -import java.util.Arrays; +import java.util.Comparator; -import org.apache.lucene.document.Fieldable; -import org.apache.lucene.analysis.Token; -import org.apache.lucene.util.UnicodeUtil; +import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute; +import org.apache.lucene.util.ByteBlockPool; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefHash; +import org.apache.lucene.util.Counter; +import org.apache.lucene.util.IntBlockPool; +import org.apache.lucene.util.BytesRefHash.BytesStartArray; -final class TermsHashPerField extends InvertedDocConsumerPerField { +abstract class TermsHashPerField implements Comparable { + private static final int HASH_INIT_SIZE = 4; - final TermsHashConsumerPerField consumer; + final TermsHash termsHash; + final TermsHashPerField nextPerField; - final TermsHashPerThread perThread; - final DocumentsWriter.DocState docState; - final DocInverter.FieldInvertState fieldState; + protected final DocumentsWriterPerThread.DocState docState; + protected final FieldInvertState fieldState; + TermToBytesRefAttribute termAtt; + BytesRef termBytesRef; // Copied from our perThread - final CharBlockPool charPool; final IntBlockPool intPool; final ByteBlockPool bytePool; + final ByteBlockPool termBytePool; final int streamCount; final int numPostingInt; - final FieldInfo fieldInfo; + protected final FieldInfo fieldInfo; - boolean postingsCompacted; - int numPostings; - private int postingsHashSize = 4; - private int postingsHashHalfSize = postingsHashSize/2; - private int postingsHashMask = postingsHashSize-1; - private RawPostingList[] postingsHash = new RawPostingList[postingsHashSize]; - private RawPostingList p; + final BytesRefHash bytesHash; - public TermsHashPerField(DocInverterPerField docInverterPerField, final TermsHashPerThread perThread, final TermsHashPerThread nextPerThread, final FieldInfo fieldInfo) { - this.perThread = perThread; - intPool = perThread.intPool; - charPool = perThread.charPool; - bytePool = perThread.bytePool; - docState = perThread.docState; - fieldState = docInverterPerField.fieldState; - this.consumer = perThread.consumer.addField(this, fieldInfo); - streamCount = consumer.getStreamCount(); + ParallelPostingsArray postingsArray; + private final Counter bytesUsed; + + /** streamCount: how many streams this field stores per term. + * E.g. doc(+freq) is 1 stream, prox+offset is a second. */ + + public TermsHashPerField(int streamCount, FieldInvertState fieldState, TermsHash termsHash, TermsHashPerField nextPerField, FieldInfo fieldInfo) { + intPool = termsHash.intPool; + bytePool = termsHash.bytePool; + termBytePool = termsHash.termBytePool; + docState = termsHash.docState; + this.termsHash = termsHash; + bytesUsed = termsHash.bytesUsed; + this.fieldState = fieldState; + this.streamCount = streamCount; numPostingInt = 2*streamCount; this.fieldInfo = fieldInfo; - if (nextPerThread != null) - nextPerField = (TermsHashPerField) nextPerThread.addField(docInverterPerField, fieldInfo); - else - nextPerField = null; + this.nextPerField = nextPerField; + PostingsBytesStartArray byteStarts = new PostingsBytesStartArray(this, bytesUsed); + bytesHash = new BytesRefHash(termBytePool, HASH_INIT_SIZE, byteStarts); } - void shrinkHash(int targetSize) { - assert postingsCompacted || numPostings == 0; - - // Cannot use ArrayUtil.shrink because we require power - // of 2: - int newSize = postingsHash.length; - while(newSize >= 8 && newSize/4 > targetSize) { - newSize /= 2; - } - - if (newSize != postingsHash.length) { - postingsHash = new RawPostingList[newSize]; - postingsHashSize = newSize; - postingsHashHalfSize = newSize/2; - postingsHashMask = newSize-1; - } - } - public void reset() { - if (!postingsCompacted) - compactPostings(); - assert numPostings <= postingsHash.length; - if (numPostings > 0) { - perThread.termsHash.recyclePostings(postingsHash, numPostings); - Arrays.fill(postingsHash, 0, numPostings, null); - numPostings = 0; - } - postingsCompacted = false; - if (nextPerField != null) + bytesHash.clear(false); + if (nextPerField != null) { nextPerField.reset(); + } } - synchronized public void abort() { - reset(); - if (nextPerField != null) - nextPerField.abort(); - } - - public void initReader(ByteSliceReader reader, RawPostingList p, int stream) { + public void initReader(ByteSliceReader reader, int termID, int stream) { assert stream < streamCount; - final int[] ints = intPool.buffers[p.intStart >> DocumentsWriter.INT_BLOCK_SHIFT]; - final int upto = p.intStart & DocumentsWriter.INT_BLOCK_MASK; + int intStart = postingsArray.intStarts[termID]; + final int[] ints = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; + final int upto = intStart & IntBlockPool.INT_BLOCK_MASK; reader.init(bytePool, - p.byteStart+stream*ByteBlockPool.FIRST_LEVEL_SIZE, + postingsArray.byteStarts[termID]+stream*ByteBlockPool.FIRST_LEVEL_SIZE, ints[upto+stream]); } - private synchronized void compactPostings() { - int upto = 0; - for(int i=0;i termComp) { + return bytesHash.sort(termComp); } - /** Collapse the hash table & sort in-place. */ - public RawPostingList[] sortPostings() { - compactPostings(); - quickSort(postingsHash, 0, numPostings-1); - return postingsHash; - } - - void quickSort(RawPostingList[] postings, int lo, int hi) { - if (lo >= hi) - return; - else if (hi == 1+lo) { - if (comparePostings(postings[lo], postings[hi]) > 0) { - final RawPostingList tmp = postings[lo]; - postings[lo] = postings[hi]; - postings[hi] = tmp; - } - return; - } - - int mid = (lo + hi) >>> 1; - - if (comparePostings(postings[lo], postings[mid]) > 0) { - RawPostingList tmp = postings[lo]; - postings[lo] = postings[mid]; - postings[mid] = tmp; - } - - if (comparePostings(postings[mid], postings[hi]) > 0) { - RawPostingList tmp = postings[mid]; - postings[mid] = postings[hi]; - postings[hi] = tmp; - - if (comparePostings(postings[lo], postings[mid]) > 0) { - RawPostingList tmp2 = postings[lo]; - postings[lo] = postings[mid]; - postings[mid] = tmp2; - } - } - - int left = lo + 1; - int right = hi - 1; - - if (left >= right) - return; - - RawPostingList partition = postings[mid]; - - for (; ;) { - while (comparePostings(postings[right], partition) > 0) - --right; - - while (left < right && comparePostings(postings[left], partition) <= 0) - ++left; - - if (left < right) { - RawPostingList tmp = postings[left]; - postings[left] = postings[right]; - postings[right] = tmp; - --right; - } else { - break; - } - } - - quickSort(postings, lo, left); - quickSort(postings, left + 1, hi); - } - - /** Compares term text for two Posting instance and - * returns -1 if p1 < p2; 1 if p1 > p2; else 0. */ - int comparePostings(RawPostingList p1, RawPostingList p2) { - - if (p1 == p2) - return 0; - - final char[] text1 = charPool.buffers[p1.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; - int pos1 = p1.textStart & DocumentsWriter.CHAR_BLOCK_MASK; - final char[] text2 = charPool.buffers[p2.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; - int pos2 = p2.textStart & DocumentsWriter.CHAR_BLOCK_MASK; - - assert text1 != text2 || pos1 != pos2; - - while(true) { - final char c1 = text1[pos1++]; - final char c2 = text2[pos2++]; - if (c1 != c2) { - if (0xffff == c2) - return 1; - else if (0xffff == c1) - return -1; - else - return c1-c2; - } else - // This method should never compare equal postings - // unless p1==p2 - assert c1 != 0xffff; - } - } - - /** Test whether the text for current RawPostingList p equals - * current tokenText. */ - private boolean postingEquals(final char[] tokenText, final int tokenTextLen) { - - final char[] text = perThread.charPool.buffers[p.textStart >> DocumentsWriter.CHAR_BLOCK_SHIFT]; - assert text != null; - int pos = p.textStart & DocumentsWriter.CHAR_BLOCK_MASK; - - int tokenPos = 0; - for(;tokenPos>8)+code)|1; - do { - code += inc; - hashPos = code & postingsHashMask; - p = postingsHash[hashPos]; - } while (p != null && p.textStart != textStart); - } - - if (p == null) { - + public void add(int textStart) throws IOException { + int termID = bytesHash.addByPoolOffset(textStart); + if (termID >= 0) { // New posting // First time we are seeing this token since we last // flushed the hash. - - // Refill? - if (0 == perThread.freePostingsCount) - perThread.morePostings(); - - // Pull next free RawPostingList from free list - p = perThread.freePostings[--perThread.freePostingsCount]; - assert p != null; - - p.textStart = textStart; - - assert postingsHash[hashPos] == null; - postingsHash[hashPos] = p; - numPostings++; - - if (numPostings == postingsHashHalfSize) - rehashPostings(2*postingsHashSize); - // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { intPool.nextBuffer(); + } - if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) + if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); + } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; - p.intStart = intUptoStart + intPool.intOffset; + postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; - consumer.addTerm(token, p); + termID = (-termID)-1; + int intStart = postingsArray.intStarts[termID]; + intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; + intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK; + addTerm(termID); } } - // Primary entry point (for first TermsHash) - void add(Token token) throws IOException { + /** Called once per inverted token. This is the primary + * entry point (for first TermsHash); postings use this + * API. */ + void add() throws IOException { - assert !postingsCompacted; + termAtt.fillBytesRef(); // We are first in the chain so we must "intern" the // term text into textStart address + // Get the text & hash of this term. + int termID = bytesHash.add(termBytesRef); + + //System.out.println("add term=" + termBytesRef.utf8ToString() + " doc=" + docState.docID + " termID=" + termID); - // Get the text of this term. - final char[] tokenText = token.termBuffer(); - final int tokenTextLen = token.termLength(); - - // Compute hashcode & replace any invalid UTF16 sequences - int downto = tokenTextLen; - int code = 0; - while (downto > 0) { - char ch = tokenText[--downto]; - - if (ch >= UnicodeUtil.UNI_SUR_LOW_START && ch <= UnicodeUtil.UNI_SUR_LOW_END) { - if (0 == downto) { - // Unpaired - ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; - } else { - final char ch2 = tokenText[downto-1]; - if (ch2 >= UnicodeUtil.UNI_SUR_HIGH_START && ch2 <= UnicodeUtil.UNI_SUR_HIGH_END) { - // OK: high followed by low. This is a valid - // surrogate pair. - code = ((code*31) + ch)*31+ch2; - downto--; - continue; - } else { - // Unpaired - ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; - } - } - } else if (ch >= UnicodeUtil.UNI_SUR_HIGH_START && ch <= UnicodeUtil.UNI_SUR_HIGH_END) - // Unpaired - ch = tokenText[downto] = UnicodeUtil.UNI_REPLACEMENT_CHAR; - - code = (code*31) + ch; - } - - int hashPos = code & postingsHashMask; - - // Locate RawPostingList in hash - p = postingsHash[hashPos]; - - if (p != null && !postingEquals(tokenText, tokenTextLen)) { - // Conflict: keep searching different locations in - // the hash table. - final int inc = ((code>>8)+code)|1; - do { - code += inc; - hashPos = code & postingsHashMask; - p = postingsHash[hashPos]; - } while (p != null && !postingEquals(tokenText, tokenTextLen)); - } - - if (p == null) { - - // First time we are seeing this token since we last - // flushed the hash. - final int textLen1 = 1+tokenTextLen; - if (textLen1 + charPool.charUpto > DocumentsWriter.CHAR_BLOCK_SIZE) { - if (textLen1 > DocumentsWriter.CHAR_BLOCK_SIZE) { - // Just skip this term, to remain as robust as - // possible during indexing. A TokenFilter - // can be inserted into the analyzer chain if - // other behavior is wanted (pruning the term - // to a prefix, throwing an exception, etc). - - if (docState.maxTermPrefix == null) - docState.maxTermPrefix = new String(tokenText, 0, 30); - - consumer.skippingLongTerm(token); - return; - } - charPool.nextBuffer(); - } - - // Refill? - if (0 == perThread.freePostingsCount) - perThread.morePostings(); - - // Pull next free RawPostingList from free list - p = perThread.freePostings[--perThread.freePostingsCount]; - assert p != null; - - final char[] text = charPool.buffer; - final int textUpto = charPool.charUpto; - p.textStart = textUpto + charPool.charOffset; - charPool.charUpto += textLen1; - System.arraycopy(tokenText, 0, text, textUpto, tokenTextLen); - text[textUpto+tokenTextLen] = 0xffff; - - assert postingsHash[hashPos] == null; - postingsHash[hashPos] = p; - numPostings++; - - if (numPostings == postingsHashHalfSize) - rehashPostings(2*postingsHashSize); - + if (termID >= 0) {// New posting + bytesHash.byteStart(termID); // Init stream slices - if (numPostingInt + intPool.intUpto > DocumentsWriter.INT_BLOCK_SIZE) + if (numPostingInt + intPool.intUpto > IntBlockPool.INT_BLOCK_SIZE) { intPool.nextBuffer(); + } - if (DocumentsWriter.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) + if (ByteBlockPool.BYTE_BLOCK_SIZE - bytePool.byteUpto < numPostingInt*ByteBlockPool.FIRST_LEVEL_SIZE) { bytePool.nextBuffer(); + } intUptos = intPool.buffer; intUptoStart = intPool.intUpto; intPool.intUpto += streamCount; - p.intStart = intUptoStart + intPool.intOffset; + postingsArray.intStarts[termID] = intUptoStart + intPool.intOffset; for(int i=0;i> DocumentsWriter.INT_BLOCK_SHIFT]; - intUptoStart = p.intStart & DocumentsWriter.INT_BLOCK_MASK; - consumer.addTerm(token, p); + termID = (-termID)-1; + int intStart = postingsArray.intStarts[termID]; + intUptos = intPool.buffers[intStart >> IntBlockPool.INT_BLOCK_SHIFT]; + intUptoStart = intStart & IntBlockPool.INT_BLOCK_MASK; + addTerm(termID); } - if (doNextCall) - nextPerField.add(token, p.textStart); + if (doNextCall) { + nextPerField.add(postingsArray.textStarts[termID]); + } } int[] intUptos; int intUptoStart; void writeByte(int stream, byte b) { int upto = intUptos[intUptoStart+stream]; - byte[] bytes = bytePool.buffers[upto >> DocumentsWriter.BYTE_BLOCK_SHIFT]; + byte[] bytes = bytePool.buffers[upto >> ByteBlockPool.BYTE_BLOCK_SHIFT]; assert bytes != null; - int offset = upto & DocumentsWriter.BYTE_BLOCK_MASK; + int offset = upto & ByteBlockPool.BYTE_BLOCK_MASK; if (bytes[offset] != 0) { // End of slice; allocate a new one offset = bytePool.allocSlice(bytes, offset); @@ -496,51 +224,92 @@ writeByte(stream, (byte) i); } - void finish() throws IOException { - consumer.finish(); - if (nextPerField != null) - nextPerField.finish(); - } + private static final class PostingsBytesStartArray extends BytesStartArray { - /** Called when postings hash is too small (> 50% - * occupied) or too large (< 20% occupied). */ - void rehashPostings(final int newSize) { + private final TermsHashPerField perField; + private final Counter bytesUsed; - final int newMask = newSize-1; + private PostingsBytesStartArray( + TermsHashPerField perField, Counter bytesUsed) { + this.perField = perField; + this.bytesUsed = bytesUsed; + } - RawPostingList[] newHash = new RawPostingList[newSize]; - for(int i=0;i> DocumentsWriter.CHAR_BLOCK_SHIFT]; - int pos = start; - while(text[pos] != 0xffff) - pos++; - code = 0; - while (pos > start) - code = (code*31) + text[--pos]; - } else - code = p0.textStart; + @Override + public int[] init() { + if (perField.postingsArray == null) { + perField.postingsArray = perField.createPostingsArray(2); + perField.newPostingsArray(); + bytesUsed.addAndGet(perField.postingsArray.size * perField.postingsArray.bytesPerPosting()); + } + return perField.postingsArray.textStarts; + } - int hashPos = code & newMask; - assert hashPos >= 0; - if (newHash[hashPos] != null) { - final int inc = ((code>>8)+code)|1; - do { - code += inc; - hashPos = code & newMask; - } while (newHash[hashPos] != null); - } - newHash[hashPos] = p0; + @Override + public int[] grow() { + ParallelPostingsArray postingsArray = perField.postingsArray; + final int oldSize = perField.postingsArray.size; + postingsArray = perField.postingsArray = postingsArray.grow(); + perField.newPostingsArray(); + bytesUsed.addAndGet((postingsArray.bytesPerPosting() * (postingsArray.size - oldSize))); + return postingsArray.textStarts; + } + + @Override + public int[] clear() { + if (perField.postingsArray != null) { + bytesUsed.addAndGet(-(perField.postingsArray.size * perField.postingsArray.bytesPerPosting())); + perField.postingsArray = null; + perField.newPostingsArray(); } + return null; } - postingsHashMask = newMask; - postingsHash = newHash; - postingsHashSize = newSize; - postingsHashHalfSize = newSize >> 1; + @Override + public Counter bytesUsed() { + return bytesUsed; + } } + + @Override + public int compareTo(TermsHashPerField other) { + return fieldInfo.name.compareTo(other.fieldInfo.name); + } + + /** Finish adding all instances of this field to the + * current document. */ + void finish() throws IOException { + if (nextPerField != null) { + nextPerField.finish(); + } + } + + /** Start adding a new field instance; first is true if + * this is the first time this field name was seen in the + * document. */ + boolean start(IndexableField field, boolean first) { + termAtt = fieldState.termAttribute; + // EmptyTokenStream can have null term att + if (termAtt != null) { + termBytesRef = termAtt.getBytesRef(); + } + if (nextPerField != null) { + doNextCall = nextPerField.start(field, first); + } + + return true; + } + + /** Called when a term is seen for the first time. */ + abstract void newTerm(int termID) throws IOException; + + /** Called when a previously seen term is seen again. */ + abstract void addTerm(int termID) throws IOException; + + /** Called when the postings array is initialized or + * resized. */ + abstract void newPostingsArray(); + + /** Creates a new postings array of the specified size. */ + abstract ParallelPostingsArray createPostingsArray(int size); } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TermsHashPerThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TieredMergePolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TrackingIndexWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TwoPhaseCommit.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/TwoPhaseCommitTool.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/index/UpgradeIndexMergePolicy.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/index/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/index/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/index/package.html 17 Aug 2012 14:55:03 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/index/package.html 16 Dec 2014 11:31:41 -0000 1.1.2.1 @@ -18,9 +18,247 @@ - Code to maintain and access indices. + +

    Table Of Contents

    +

    +

      +
    1. Postings APIs + +
    2. +
    3. Index Statistics + +
    4. +
    +

    + +

    Postings APIs

    + +

    + Fields +

    +

    +{@link org.apache.lucene.index.Fields} is the initial entry point into the +postings APIs, this can be obtained in several ways: +

    +// access indexed fields for an index segment
    +Fields fields = reader.fields();
    +// access term vector fields for a specified document
    +Fields fields = reader.getTermVectors(docid);
    +
    +Fields implements Java's Iterable interface, so its easy to enumerate the +list of fields: +
    +// enumerate list of fields
    +for (String field : fields) {
    +  // access the terms for this field
    +  Terms terms = fields.terms(field);
    +}
    +
    +

    + +

    + Terms +

    +

    +{@link org.apache.lucene.index.Terms} represents the collection of terms +within a field, exposes some metadata and statistics, +and an API for enumeration. +

    +// metadata about the field
    +System.out.println("positions? " + terms.hasPositions());
    +System.out.println("offsets? " + terms.hasOffsets());
    +System.out.println("payloads? " + terms.hasPayloads());
    +// iterate through terms
    +TermsEnum termsEnum = terms.iterator(null);
    +BytesRef term = null;
    +while ((term = termsEnum.next()) != null) {
    +  doSomethingWith(termsEnum.term());
    +}
    +
    +{@link org.apache.lucene.index.TermsEnum} provides an iterator over the list +of terms within a field, some statistics about the term, +and methods to access the term's documents and +positions. +
    +// seek to a specific term
    +boolean found = termsEnum.seekExact(new BytesRef("foobar"));
    +if (found) {
    +  // get the document frequency
    +  System.out.println(termsEnum.docFreq());
    +  // enumerate through documents
    +  DocsEnum docs = termsEnum.docs(null, null);
    +  // enumerate through documents and positions
    +  DocsAndPositionsEnum docsAndPositions = termsEnum.docsAndPositions(null, null);
    +}
    +
    +

    + +

    + Documents +

    +

    +{@link org.apache.lucene.index.DocsEnum} is an extension of +{@link org.apache.lucene.search.DocIdSetIterator}that iterates over the list of +documents for a term, along with the term frequency within that document. +

    +int docid;
    +while ((docid = docsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    +  System.out.println(docid);
    +  System.out.println(docsEnum.freq());
    +}
    +
    +

    + +

    + Positions +

    +

    +{@link org.apache.lucene.index.DocsAndPositionsEnum} is an extension of +{@link org.apache.lucene.index.DocsEnum} that additionally allows iteration +of the positions a term occurred within the document, and any additional +per-position information (offsets and payload) +

    +int docid;
    +while ((docid = docsAndPositionsEnum.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) {
    +  System.out.println(docid);
    +  int freq = docsAndPositionsEnum.freq();
    +  for (int i = 0; i < freq; i++) {
    +     System.out.println(docsAndPositionsEnum.nextPosition());
    +     System.out.println(docsAndPositionsEnum.startOffset());
    +     System.out.println(docsAndPositionsEnum.endOffset());
    +     System.out.println(docsAndPositionsEnum.getPayload());
    +  }
    +}
    +
    +

    + +

    Index Statistics

    + +

    + Term statistics +

    +

    +

      +
    • {@link org.apache.lucene.index.TermsEnum#docFreq}: Returns the number of + documents that contain at least one occurrence of the term. This statistic + is always available for an indexed term. Note that it will also count + deleted documents, when segments are merged the statistic is updated as + those deleted documents are merged away. +
    • {@link org.apache.lucene.index.TermsEnum#totalTermFreq}: Returns the number + of occurrences of this term across all documents. Note that this statistic + is unavailable (returns -1) if term frequencies were omitted + from the index + ({@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_ONLY DOCS_ONLY}) + for the field. Like docFreq(), it will also count occurrences that appear in + deleted documents. +
    +

    + +

    + Field statistics +

    +

    +

      +
    • {@link org.apache.lucene.index.Terms#size}: Returns the number of + unique terms in the field. This statistic may be unavailable + (returns -1) for some Terms implementations such as + {@link org.apache.lucene.index.MultiTerms}, where it cannot be efficiently + computed. Note that this count also includes terms that appear only + in deleted documents: when segments are merged such terms are also merged + away and the statistic is then updated. +
    • {@link org.apache.lucene.index.Terms#getDocCount}: Returns the number of + documents that contain at least one occurrence of any term for this field. + This can be thought of as a Field-level docFreq(). Like docFreq() it will + also count deleted documents. +
    • {@link org.apache.lucene.index.Terms#getSumDocFreq}: Returns the number of + postings (term-document mappings in the inverted index) for the field. This + can be thought of as the sum of {@link org.apache.lucene.index.TermsEnum#docFreq} + across all terms in the field, and like docFreq() it will also count postings + that appear in deleted documents. +
    • {@link org.apache.lucene.index.Terms#getSumTotalTermFreq}: Returns the number + of tokens for the field. This can be thought of as the sum of + {@link org.apache.lucene.index.TermsEnum#totalTermFreq} across all terms in the + field, and like totalTermFreq() it will also count occurrences that appear in + deleted documents, and will be unavailable (returns -1) if term + frequencies were omitted from the index + ({@link org.apache.lucene.index.FieldInfo.IndexOptions#DOCS_ONLY DOCS_ONLY}) + for the field. +
    +

    + +

    + Segment statistics +

    +

    +

      +
    • {@link org.apache.lucene.index.IndexReader#maxDoc}: Returns the number of + documents (including deleted documents) in the index. +
    • {@link org.apache.lucene.index.IndexReader#numDocs}: Returns the number + of live documents (excluding deleted documents) in the index. +
    • {@link org.apache.lucene.index.IndexReader#numDeletedDocs}: Returns the + number of deleted documents in the index. +
    • {@link org.apache.lucene.index.Fields#size}: Returns the number of indexed + fields. +
    • {@link org.apache.lucene.index.Fields#getUniqueTermCount}: Returns the number + of indexed terms, the sum of {@link org.apache.lucene.index.Terms#size} + across all fields. +
    +

    + +

    + Document statistics +

    +

    +Document statistics are available during the indexing process for an indexed field: typically +a {@link org.apache.lucene.search.similarities.Similarity} implementation will store some +of these values (possibly in a lossy way), into the normalization value for the document in +its {@link org.apache.lucene.search.similarities.Similarity#computeNorm} method. +

    +

    +

      +
    • {@link org.apache.lucene.index.FieldInvertState#getLength}: Returns the number of + tokens for this field in the document. Note that this is just the number + of times that {@link org.apache.lucene.analysis.TokenStream#incrementToken} returned + true, and is unrelated to the values in + {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute}. +
    • {@link org.apache.lucene.index.FieldInvertState#getNumOverlap}: Returns the number + of tokens for this field in the document that had a position increment of zero. This + can be used to compute a document length that discounts artificial tokens + such as synonyms. +
    • {@link org.apache.lucene.index.FieldInvertState#getPosition}: Returns the accumulated + position value for this field in the document: computed from the values of + {@link org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute} and including + {@link org.apache.lucene.analysis.Analyzer#getPositionIncrementGap}s across multivalued + fields. +
    • {@link org.apache.lucene.index.FieldInvertState#getOffset}: Returns the total + character offset value for this field in the document: computed from the values of + {@link org.apache.lucene.analysis.tokenattributes.OffsetAttribute} returned by + {@link org.apache.lucene.analysis.TokenStream#end}, and including + {@link org.apache.lucene.analysis.Analyzer#getOffsetGap}s across multivalued + fields. +
    • {@link org.apache.lucene.index.FieldInvertState#getUniqueTermCount}: Returns the number + of unique terms encountered for this field in the document. +
    • {@link org.apache.lucene.index.FieldInvertState#getMaxTermFrequency}: Returns the maximum + frequency across all unique terms encountered for this field in the document. +
    +

    +

    +Additional user-supplied statistics can be added to the document as DocValues fields and +accessed via {@link org.apache.lucene.index.AtomicReader#getNumericDocValues}. +

    +

    Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/CharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/FastCharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/MultiFieldQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/ParseException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/QueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/QueryParser.jj'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/QueryParserConstants.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/QueryParserTokenManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/Token.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/TokenMgrError.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryParser/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/analyzing/AnalyzingQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/analyzing/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/CharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/FastCharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/MultiFieldQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/ParseException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/QueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/QueryParser.jj'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/QueryParserBase.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/QueryParserConstants.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/QueryParserTokenManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/Token.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/TokenMgrError.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/classic/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/complexPhrase/ComplexPhraseQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/complexPhrase/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/ext/ExtendableQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/ext/ExtensionQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/ext/Extensions.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/ext/ParserExtension.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/ext/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/QueryNodeError.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/QueryNodeException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/QueryNodeParseException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/QueryParserHelper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/builders/QueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/builders/QueryTreeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/builders/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/config/AbstractQueryConfig.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/config/ConfigurationKey.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/config/FieldConfig.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/config/FieldConfigListener.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/config/QueryConfigHandler.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/config/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/messages/QueryParserMessages.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/messages/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/AndQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/AnyQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/BooleanQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/BoostQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/DeletedQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/FieldQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/FieldValuePairQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/FieldableNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/FuzzyQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/GroupQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/MatchAllDocsQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/MatchNoDocsQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/ModifierQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/NoTokenFoundQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/OpaqueQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/OrQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/PathQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/PhraseSlopQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/ProximityQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/QueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/QueryNodeImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/QuotedFieldQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/RangeQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/SlopQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/TextableQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/TokenizedPhraseQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/ValueQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/nodes/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/parser/EscapeQuerySyntax.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/parser/SyntaxParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/parser/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/processors/NoChildOptimizationQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/processors/QueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/processors/QueryNodeProcessorImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/processors/QueryNodeProcessorPipeline.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/processors/RemoveDeletedQueryNodesProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/processors/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/util/QueryNodeOperation.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/util/StringUtils.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/util/UnescapedCharSequence.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/core/util/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/messages/Message.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/messages/MessageImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/messages/NLS.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/messages/NLSException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/messages/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/precedence/PrecedenceQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/precedence/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/precedence/processors/BooleanModifiersQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/precedence/processors/PrecedenceQueryNodeProcessorPipeline.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/precedence/processors/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/CommonQueryParserConfiguration.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/QueryParserUtil.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/StandardQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/AnyQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/BooleanQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/BoostQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/DummyQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/FieldQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/FuzzyQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/GroupQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/MatchAllDocsQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/MatchNoDocsQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/ModifierQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/MultiPhraseQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/NumericRangeQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/PhraseQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/PrefixWildcardQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/RegexpQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/SlopQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/StandardBooleanQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/StandardQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/StandardQueryTreeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/TermRangeQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/WildcardQueryNodeBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/builders/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/FieldBoostMapFCListener.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/FieldDateResolutionFCListener.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/FuzzyConfig.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/NumberDateFormat.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/NumericConfig.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/NumericFieldConfigListener.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/StandardQueryConfigHandler.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/config/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/AbstractRangeQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/BooleanModifierNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/MultiPhraseQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/NumericQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/NumericRangeQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/PrefixWildcardQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/RegexpQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/StandardBooleanQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/TermRangeQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/WildcardQueryNode.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/nodes/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/CharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/EscapeQuerySyntaxImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/FastCharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/ParseException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParser.jj'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserConstants.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/StandardSyntaxParserTokenManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/Token.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/TokenMgrError.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/parser/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/AllowLeadingWildcardProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/AnalyzerQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/BooleanQuery2ModifierNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/BooleanSingleChildOptimizationQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/BoostQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/DefaultPhraseSlopQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/FuzzyQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/GroupQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/LowercaseExpandedTermsQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/MatchAllDocsQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/MultiFieldQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/MultiTermRewriteMethodProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/NumericQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/NumericRangeQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/OpenRangeQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/PhraseSlopQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/RemoveEmptyNonLeafQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/StandardQueryNodeProcessorPipeline.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/TermRangeQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/WildcardQueryNodeProcessor.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/flexible/standard/processors/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/simple/SimpleQueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/simple/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/CharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/FastCharStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/ParseException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/QueryParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/QueryParser.jj'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/QueryParserConstants.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/QueryParserTokenManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/Token.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/TokenMgrError.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/parser/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/AndQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/BasicQueryFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/ComposedQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/DistanceQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/DistanceRewriteQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/DistanceSubQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/FieldsQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/NotQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/OrQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/RewriteQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SimpleTerm.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SimpleTermRewriteQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SpanNearClauseFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SrndBooleanQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SrndPrefixQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SrndQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SrndTermQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/SrndTruncQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/TooManyBasicQueries.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/surround/query/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/CoreParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/CorePlusExtensionsParser.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/DOMUtils.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/FilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/FilterBuilderFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/ParserException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/QueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/QueryBuilderFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/QueryTemplateManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/BooleanFilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/BooleanQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/BoostingQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/BoostingTermBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/CachedFilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/ConstantScoreQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/DisjunctionMaxQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/DuplicateFilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/FilteredQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/FuzzyLikeThisQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/LikeThisQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/MatchAllDocsQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/NumericRangeFilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/NumericRangeQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/RangeFilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanBuilderBase.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanFirstBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanNearBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanNotBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanOrBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanOrTermsBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanQueryBuilderFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/SpanTermBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/TermQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/TermsFilterBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/TermsQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/UserInputQueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/queryparser/xml/builders/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/AutomatonQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/BitsFilteredDocIdSet.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/BooleanClause.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/BooleanClause.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/BooleanClause.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/BooleanClause.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,8 +1,6 @@ package org.apache.lucene.search; -import org.apache.lucene.util.Parameter; - -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,36 +18,27 @@ */ /** A clause in a BooleanQuery. */ -public class BooleanClause implements java.io.Serializable { +public class BooleanClause { /** Specifies how clauses are to occur in matching documents. */ - public static final class Occur extends Parameter implements java.io.Serializable { - - private Occur(String name) { - // typesafe enum pattern, no public constructor - super(name); - } + public static enum Occur { - public String toString() { - if (this == MUST) return "+"; - if (this == MUST_NOT) return "-"; - return ""; - } - /** Use this operator for clauses that must appear in the matching documents. */ - public static final Occur MUST = new Occur("MUST"); + MUST { @Override public String toString() { return "+"; } }, + /** Use this operator for clauses that should appear in the * matching documents. For a BooleanQuery with no MUST * clauses one or more SHOULD clauses must match a document * for the BooleanQuery to match. * @see BooleanQuery#setMinimumNumberShouldMatch */ - public static final Occur SHOULD = new Occur("SHOULD"); + SHOULD { @Override public String toString() { return ""; } }, + /** Use this operator for clauses that must not appear in the matching documents. * Note that it is not possible to search for queries that only consist * of a MUST_NOT clause. */ - public static final Occur MUST_NOT = new Occur("MUST_NOT"); - + MUST_NOT { @Override public String toString() { return "-"; } }; + } /** The query whose matching documents are combined by the boolean query. @@ -85,30 +74,33 @@ } public boolean isProhibited() { - return Occur.MUST_NOT.equals(occur); + return Occur.MUST_NOT == occur; } public boolean isRequired() { - return Occur.MUST.equals(occur); + return Occur.MUST == occur; } - /** Returns true iff o is equal to this. */ + /** Returns true if o is equal to this. */ + @Override public boolean equals(Object o) { - if (!(o instanceof BooleanClause)) + if (o == null || !(o instanceof BooleanClause)) return false; BooleanClause other = (BooleanClause)o; return this.query.equals(other.query) - && this.occur.equals(other.occur); + && this.occur == other.occur; } /** Returns a hash code value for this object.*/ + @Override public int hashCode() { - return query.hashCode() ^ (Occur.MUST.equals(occur)?1:0) ^ (Occur.MUST_NOT.equals(occur)?2:0); + return query.hashCode() ^ (Occur.MUST == occur?1:0) ^ (Occur.MUST_NOT == occur?2:0); } + @Override public String toString() { return occur.toString() + query.toString(); } Index: 3rdParty_sources/lucene/org/apache/lucene/search/BooleanQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/BooleanQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/BooleanQuery.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/BooleanQuery.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,31 +17,37 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Set; + +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.index.Term; import org.apache.lucene.search.BooleanClause.Occur; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ToStringUtils; -import java.io.IOException; -import java.util.*; - /** A Query that matches documents matching boolean combinations of other * queries, e.g. {@link TermQuery}s, {@link PhraseQuery}s or other * BooleanQuerys. */ -public class BooleanQuery extends Query { +public class BooleanQuery extends Query implements Iterable { - private static int maxClauseCount = 1024; /** Thrown when an attempt is made to add more than {@link * #getMaxClauseCount()} clauses. This typically happens if - * a PrefixQuery, FuzzyQuery, WildcardQuery, or RangeQuery + * a PrefixQuery, FuzzyQuery, WildcardQuery, or TermRangeQuery * is expanded to many terms during search. */ public static class TooManyClauses extends RuntimeException { - public TooManyClauses() {} - public String getMessage() { - return "maxClauseCount is set to " + maxClauseCount; + public TooManyClauses() { + super("maxClauseCount is set to " + maxClauseCount); } } @@ -52,30 +58,24 @@ */ public static int getMaxClauseCount() { return maxClauseCount; } - /** Set the maximum number of clauses permitted per BooleanQuery. + /** + * Set the maximum number of clauses permitted per BooleanQuery. * Default value is 1024. - *

    TermQuery clauses are generated from for example prefix queries and - * fuzzy queries. Each TermQuery needs some buffer space during search, - * so this parameter indirectly controls the maximum buffer requirements for - * query search. - *

    When this parameter becomes a bottleneck for a Query one can use a - * Filter. For example instead of a {@link RangeQuery} one can use a - * {@link RangeFilter}. - *

    Normally the buffers are allocated by the JVM. When using for example - * {@link org.apache.lucene.store.MMapDirectory} the buffering is left to - * the operating system. */ public static void setMaxClauseCount(int maxClauseCount) { - if (maxClauseCount < 1) + if (maxClauseCount < 1) { throw new IllegalArgumentException("maxClauseCount must be >= 1"); + } BooleanQuery.maxClauseCount = maxClauseCount; } - private ArrayList clauses = new ArrayList(); - private boolean disableCoord; + private ArrayList clauses = new ArrayList<>(); + private final boolean disableCoord; /** Constructs an empty boolean query. */ - public BooleanQuery() {} + public BooleanQuery() { + disableCoord = false; + } /** Constructs an empty boolean query. * @@ -96,20 +96,6 @@ */ public boolean isCoordDisabled() { return disableCoord; } - // Implement coord disabling. - // Inherit javadoc. - public Similarity getSimilarity(Searcher searcher) { - Similarity result = super.getSimilarity(searcher); - if (disableCoord) { // disable coord as requested - result = new SimilarityDelegator(result) { - public float coord(int overlap, int maxOverlap) { - return 1.0f; - } - }; - } - return result; - } - /** * Specifies a minimum number of the optional BooleanClauses * which must be satisfied. @@ -124,13 +110,8 @@ * any specific clauses are required (or prohibited). This number will * only be compared against the number of matching optional clauses. *

    - *

    - * EXPERT NOTE: Using this method may force collecting docs in order, - * regardless of whether setAllowDocsOutOfOrder(true) has been called. - *

    * * @param min the number of optional clauses that must match - * @see #setAllowDocsOutOfOrder */ public void setMinimumNumberShouldMatch(int min) { this.minNrShouldMatch = min; @@ -139,7 +120,7 @@ /** * Gets the minimum number of the optional BooleanClauses - * which must be satisifed. + * which must be satisfied. */ public int getMinimumNumberShouldMatch() { return minNrShouldMatch; @@ -159,100 +140,117 @@ * @see #getMaxClauseCount() */ public void add(BooleanClause clause) { - if (clauses.size() >= maxClauseCount) + if (clauses.size() >= maxClauseCount) { throw new TooManyClauses(); + } clauses.add(clause); } /** Returns the set of clauses in this query. */ public BooleanClause[] getClauses() { - return (BooleanClause[])clauses.toArray(new BooleanClause[clauses.size()]); + return clauses.toArray(new BooleanClause[clauses.size()]); } /** Returns the list of clauses in this query. */ - public List clauses() { return clauses; } + public List clauses() { return clauses; } - private class BooleanWeight implements Weight { + /** Returns an iterator on the clauses in this query. It implements the {@link Iterable} interface to + * make it possible to do: + *
    for (BooleanClause clause : booleanQuery) {}
    + */ + @Override + public final Iterator iterator() { return clauses().iterator(); } + + /** + * Expert: the Weight for BooleanQuery, used to + * normalize, score and explain these queries. + * + * @lucene.experimental + */ + protected class BooleanWeight extends Weight { + /** The Similarity implementation. */ protected Similarity similarity; - protected ArrayList weights = new ArrayList(); + protected ArrayList weights; + protected int maxCoord; // num optional + num required + private final boolean disableCoord; - public BooleanWeight(Searcher searcher) + public BooleanWeight(IndexSearcher searcher, boolean disableCoord) throws IOException { - this.similarity = getSimilarity(searcher); + this.similarity = searcher.getSimilarity(); + this.disableCoord = disableCoord; + weights = new ArrayList<>(clauses.size()); for (int i = 0 ; i < clauses.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - weights.add(c.getQuery().createWeight(searcher)); + BooleanClause c = clauses.get(i); + Weight w = c.getQuery().createWeight(searcher); + weights.add(w); + if (!c.isProhibited()) { + maxCoord++; + } } } + @Override public Query getQuery() { return BooleanQuery.this; } - public float getValue() { return getBoost(); } - public float sumOfSquaredWeights() throws IOException { + @Override + public float getValueForNormalization() throws IOException { float sum = 0.0f; for (int i = 0 ; i < weights.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - Weight w = (Weight)weights.get(i); // call sumOfSquaredWeights for all clauses in case of side effects - float s = w.sumOfSquaredWeights(); // sum sub weights - if (!c.isProhibited()) + float s = weights.get(i).getValueForNormalization(); // sum sub weights + if (!clauses.get(i).isProhibited()) { // only add to sum for non-prohibited clauses sum += s; + } } sum *= getBoost() * getBoost(); // boost each sub-weight return sum ; } + public float coord(int overlap, int maxOverlap) { + // LUCENE-4300: in most cases of maxOverlap=1, BQ rewrites itself away, + // so coord() is not applied. But when BQ cannot optimize itself away + // for a single clause (minNrShouldMatch, prohibited clauses, etc), its + // important not to apply coord(1,1) for consistency, it might not be 1.0F + return maxOverlap == 1 ? 1F : similarity.coord(overlap, maxOverlap); + } - public void normalize(float norm) { - norm *= getBoost(); // incorporate boost - for (int i = 0 ; i < weights.size(); i++) { - Weight w = (Weight)weights.get(i); + @Override + public void normalize(float norm, float topLevelBoost) { + topLevelBoost *= getBoost(); // incorporate boost + for (Weight w : weights) { // normalize all clauses, (even if prohibited in case of side affects) - w.normalize(norm); + w.normalize(norm, topLevelBoost); } } - /** @return Returns BooleanScorer2 that uses and provides skipTo(), - * and scores documents in document number order. - */ - public Scorer scorer(IndexReader reader) throws IOException { - BooleanScorer2 result = new BooleanScorer2(similarity, - minNrShouldMatch, - allowDocsOutOfOrder); - - for (int i = 0 ; i < weights.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - Weight w = (Weight)weights.get(i); - Scorer subScorer = w.scorer(reader); - if (subScorer != null) - result.add(subScorer, c.isRequired(), c.isProhibited()); - else if (c.isRequired()) - return null; - } - - return result; - } - - public Explanation explain(IndexReader reader, int doc) + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { final int minShouldMatch = BooleanQuery.this.getMinimumNumberShouldMatch(); ComplexExplanation sumExpl = new ComplexExplanation(); sumExpl.setDescription("sum of:"); int coord = 0; - int maxCoord = 0; float sum = 0.0f; boolean fail = false; int shouldMatchCount = 0; - for (int i = 0 ; i < weights.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - Weight w = (Weight)weights.get(i); - Explanation e = w.explain(reader, doc); - if (!c.isProhibited()) maxCoord++; + Iterator cIter = clauses.iterator(); + for (Iterator wIter = weights.iterator(); wIter.hasNext();) { + Weight w = wIter.next(); + BooleanClause c = cIter.next(); + if (w.scorer(context, context.reader().getLiveDocs()) == null) { + if (c.isRequired()) { + fail = true; + Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); + sumExpl.addDetail(r); + } + continue; + } + Explanation e = w.explain(context, doc); if (e.isMatch()) { if (!c.isProhibited()) { sumExpl.addDetail(e); @@ -265,8 +263,9 @@ sumExpl.addDetail(r); fail = true; } - if (c.getOccur().equals(Occur.SHOULD)) + if (c.getOccur() == Occur.SHOULD) { shouldMatchCount++; + } } else if (c.isRequired()) { Explanation r = new Explanation(0.0f, "no match on required clause (" + c.getQuery().toString() + ")"); r.addDetail(e); @@ -291,10 +290,10 @@ sumExpl.setMatch(0 < coord ? Boolean.TRUE : Boolean.FALSE); sumExpl.setValue(sum); - float coordFactor = similarity.coord(coord, maxCoord); - if (coordFactor == 1.0f) // coord is no-op + final float coordFactor = disableCoord ? 1.0f : coord(coord, maxCoord); + if (coordFactor == 1.0f) { return sumExpl; // eliminate wrapper - else { + } else { ComplexExplanation result = new ComplexExplanation(sumExpl.isMatch(), sum*coordFactor, "product of:"); @@ -304,72 +303,244 @@ return result; } } - } - /** Whether hit docs may be collected out of docid order. */ - private static boolean allowDocsOutOfOrder = false; + @Override + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, + Bits acceptDocs) throws IOException { - /** - * Expert: Indicates whether hit docs may be collected out of docid - * order. - * - *

    - * Background: although the contract of the Scorer class requires that - * documents be iterated in order of doc id, this was not true in early - * versions of Lucene. Many pieces of functionality in the current - * Lucene code base have undefined behavior if this contract is not - * upheld, but in some specific simple cases may be faster. (For - * example: disjunction queries with less than 32 prohibited clauses; - * This setting has no effect for other queries.) - *

    - * - *

    - * Specifics: By setting this option to true, calls to - * {@link HitCollector#collect(int,float)} might be - * invoked first for docid N and only later for docid N-1. - * Being static, this setting is system wide. - *

    - */ - public static void setAllowDocsOutOfOrder(boolean allow) { - allowDocsOutOfOrder = allow; - } - - /** - * Whether hit docs may be collected out of docid order. - * @see #setAllowDocsOutOfOrder(boolean) - */ - public static boolean getAllowDocsOutOfOrder() { - return allowDocsOutOfOrder; - } - - /** - * @deprecated Use {@link #setAllowDocsOutOfOrder(boolean)} instead. - */ - public static void setUseScorer14(boolean use14) { - setAllowDocsOutOfOrder(use14); + if (scoreDocsInOrder || minNrShouldMatch > 1) { + // TODO: (LUCENE-4872) in some cases BooleanScorer may be faster for minNrShouldMatch + // but the same is even true of pure conjunctions... + return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + } + + List prohibited = new ArrayList<>(); + List optional = new ArrayList<>(); + Iterator cIter = clauses.iterator(); + for (Weight w : weights) { + BooleanClause c = cIter.next(); + BulkScorer subScorer = w.bulkScorer(context, false, acceptDocs); + if (subScorer == null) { + if (c.isRequired()) { + return null; + } + } else if (c.isRequired()) { + // TODO: there are some cases where BooleanScorer + // would handle conjunctions faster than + // BooleanScorer2... + return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + } else if (c.isProhibited()) { + prohibited.add(subScorer); + } else { + optional.add(subScorer); + } + } + + return new BooleanScorer(this, disableCoord, minNrShouldMatch, optional, prohibited, maxCoord); + } + + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) + throws IOException { + // initially the user provided value, + // but if minNrShouldMatch == optional.size(), + // we will optimize and move these to required, making this 0 + int minShouldMatch = minNrShouldMatch; + + List required = new ArrayList<>(); + List prohibited = new ArrayList<>(); + List optional = new ArrayList<>(); + Iterator cIter = clauses.iterator(); + for (Weight w : weights) { + BooleanClause c = cIter.next(); + Scorer subScorer = w.scorer(context, acceptDocs); + if (subScorer == null) { + if (c.isRequired()) { + return null; + } + } else if (c.isRequired()) { + required.add(subScorer); + } else if (c.isProhibited()) { + prohibited.add(subScorer); + } else { + optional.add(subScorer); + } + } + + // scorer simplifications: + + if (optional.size() == minShouldMatch) { + // any optional clauses are in fact required + required.addAll(optional); + optional.clear(); + minShouldMatch = 0; + } + + if (required.isEmpty() && optional.isEmpty()) { + // no required and optional clauses. + return null; + } else if (optional.size() < minShouldMatch) { + // either >1 req scorer, or there are 0 req scorers and at least 1 + // optional scorer. Therefore if there are not enough optional scorers + // no documents will be matched by the query + return null; + } + + // three cases: conjunction, disjunction, or mix + + // pure conjunction + if (optional.isEmpty()) { + return excl(req(required, disableCoord), prohibited); + } + + // pure disjunction + if (required.isEmpty()) { + return excl(opt(optional, minShouldMatch, disableCoord), prohibited); + } + + // conjunction-disjunction mix: + // we create the required and optional pieces with coord disabled, and then + // combine the two: if minNrShouldMatch > 0, then its a conjunction: because the + // optional side must match. otherwise its required + optional, factoring the + // number of optional terms into the coord calculation + + Scorer req = excl(req(required, true), prohibited); + Scorer opt = opt(optional, minShouldMatch, true); + + // TODO: clean this up: its horrible + if (disableCoord) { + if (minShouldMatch > 0) { + return new ConjunctionScorer(this, new Scorer[] { req, opt }, 1F); + } else { + return new ReqOptSumScorer(req, opt); + } + } else if (optional.size() == 1) { + if (minShouldMatch > 0) { + return new ConjunctionScorer(this, new Scorer[] { req, opt }, coord(required.size()+1, maxCoord)); + } else { + float coordReq = coord(required.size(), maxCoord); + float coordBoth = coord(required.size() + 1, maxCoord); + return new BooleanTopLevelScorers.ReqSingleOptScorer(req, opt, coordReq, coordBoth); + } + } else { + if (minShouldMatch > 0) { + return new BooleanTopLevelScorers.CoordinatingConjunctionScorer(this, coords(), req, required.size(), opt); + } else { + return new BooleanTopLevelScorers.ReqMultiOptScorer(req, opt, required.size(), coords()); + } + } + } + + @Override + public boolean scoresDocsOutOfOrder() { + if (minNrShouldMatch > 1) { + // BS2 (in-order) will be used by scorer() + return false; + } + int optionalCount = 0; + for (BooleanClause c : clauses) { + if (c.isRequired()) { + // BS2 (in-order) will be used by scorer() + return false; + } else if (!c.isProhibited()) { + optionalCount++; + } + } + + if (optionalCount == minNrShouldMatch) { + return false; // BS2 (in-order) will be used, as this means conjunction + } + + // scorer() will return an out-of-order scorer if requested. + return true; + } + + private Scorer req(List required, boolean disableCoord) { + if (required.size() == 1) { + Scorer req = required.get(0); + if (!disableCoord && maxCoord > 1) { + return new BooleanTopLevelScorers.BoostedScorer(req, coord(1, maxCoord)); + } else { + return req; + } + } else { + return new ConjunctionScorer(this, + required.toArray(new Scorer[required.size()]), + disableCoord ? 1.0F : coord(required.size(), maxCoord)); + } + } + + private Scorer excl(Scorer main, List prohibited) throws IOException { + if (prohibited.isEmpty()) { + return main; + } else if (prohibited.size() == 1) { + return new ReqExclScorer(main, prohibited.get(0)); + } else { + float coords[] = new float[prohibited.size()+1]; + Arrays.fill(coords, 1F); + return new ReqExclScorer(main, + new DisjunctionSumScorer(this, + prohibited.toArray(new Scorer[prohibited.size()]), + coords)); + } + } + + private Scorer opt(List optional, int minShouldMatch, boolean disableCoord) throws IOException { + if (optional.size() == 1) { + Scorer opt = optional.get(0); + if (!disableCoord && maxCoord > 1) { + return new BooleanTopLevelScorers.BoostedScorer(opt, coord(1, maxCoord)); + } else { + return opt; + } + } else { + float coords[]; + if (disableCoord) { + coords = new float[optional.size()+1]; + Arrays.fill(coords, 1F); + } else { + coords = coords(); + } + if (minShouldMatch > 1) { + return new MinShouldMatchSumScorer(this, optional, minShouldMatch, coords); + } else { + return new DisjunctionSumScorer(this, + optional.toArray(new Scorer[optional.size()]), + coords); + } + } + } + + private float[] coords() { + float[] coords = new float[maxCoord+1]; + coords[0] = 0F; + for (int i = 1; i < coords.length; i++) { + coords[i] = coord(i, maxCoord); + } + return coords; + } } - - /** - * @deprecated Use {@link #getAllowDocsOutOfOrder()} instead. - */ - public static boolean getUseScorer14() { - return getAllowDocsOutOfOrder(); - } - protected Weight createWeight(Searcher searcher) throws IOException { - return new BooleanWeight(searcher); + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + return new BooleanWeight(searcher, disableCoord); } + @Override public Query rewrite(IndexReader reader) throws IOException { if (minNrShouldMatch == 0 && clauses.size() == 1) { // optimize 1-clause queries - BooleanClause c = (BooleanClause)clauses.get(0); - if (!c.isProhibited()) { // just return clause + BooleanClause c = clauses.get(0); + if (!c.isProhibited()) { // just return clause Query query = c.getQuery().rewrite(reader); // rewrite first if (getBoost() != 1.0f) { // incorporate boost - if (query == c.getQuery()) // if rewrite was no-op - query = (Query)query.clone(); // then clone before boost + if (query == c.getQuery()) { // if rewrite was no-op + query = query.clone(); // then clone before boost + } + // Since the BooleanQuery only has 1 clause, the BooleanQuery will be + // written out. Therefore the rewritten Query's boost must incorporate both + // the clause's boost, and the boost of the BooleanQuery itself query.setBoost(getBoost() * query.getBoost()); } @@ -379,59 +550,75 @@ BooleanQuery clone = null; // recursively rewrite for (int i = 0 ; i < clauses.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); + BooleanClause c = clauses.get(i); Query query = c.getQuery().rewrite(reader); if (query != c.getQuery()) { // clause rewrote: must clone - if (clone == null) - clone = (BooleanQuery)this.clone(); + if (clone == null) { + // The BooleanQuery clone is lazily initialized so only initialize + // it if a rewritten clause differs from the original clause (and hasn't been + // initialized already). If nothing differs, the clone isn't needlessly created + clone = this.clone(); + } clone.clauses.set(i, new BooleanClause(query, c.getOccur())); } } if (clone != null) { return clone; // some clauses rewrote - } else + } else { return this; // no clauses rewrote + } } // inherit javadoc - public void extractTerms(Set terms) { - for (Iterator i = clauses.iterator(); i.hasNext();) { - BooleanClause clause = (BooleanClause) i.next(); - clause.getQuery().extractTerms(terms); - } + @Override + public void extractTerms(Set terms) { + for (BooleanClause clause : clauses) { + if (clause.getOccur() != Occur.MUST_NOT) { + clause.getQuery().extractTerms(terms); + } + } } - public Object clone() { + @Override @SuppressWarnings("unchecked") + public BooleanQuery clone() { BooleanQuery clone = (BooleanQuery)super.clone(); - clone.clauses = (ArrayList)this.clauses.clone(); + clone.clauses = (ArrayList) this.clauses.clone(); return clone; } /** Prints a user-readable version of this query. */ + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); - boolean needParens=(getBoost() != 1.0) || (getMinimumNumberShouldMatch()>0) ; + StringBuilder buffer = new StringBuilder(); + boolean needParens= getBoost() != 1.0 || getMinimumNumberShouldMatch() > 0; if (needParens) { buffer.append("("); } for (int i = 0 ; i < clauses.size(); i++) { - BooleanClause c = (BooleanClause)clauses.get(i); - if (c.isProhibited()) + BooleanClause c = clauses.get(i); + if (c.isProhibited()) { buffer.append("-"); - else if (c.isRequired()) + } else if (c.isRequired()) { buffer.append("+"); + } Query subQuery = c.getQuery(); - if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens - buffer.append("("); - buffer.append(c.getQuery().toString(field)); - buffer.append(")"); - } else - buffer.append(c.getQuery().toString(field)); + if (subQuery != null) { + if (subQuery instanceof BooleanQuery) { // wrap sub-bools in parens + buffer.append("("); + buffer.append(subQuery.toString(field)); + buffer.append(")"); + } else { + buffer.append(subQuery.toString(field)); + } + } else { + buffer.append("null"); + } - if (i != clauses.size()-1) + if (i != clauses.size()-1) { buffer.append(" "); + } } if (needParens) { @@ -443,28 +630,31 @@ buffer.append(getMinimumNumberShouldMatch()); } - if (getBoost() != 1.0f) - { + if (getBoost() != 1.0f) { buffer.append(ToStringUtils.boost(getBoost())); } return buffer.toString(); } /** Returns true iff o is equal to this. */ + @Override public boolean equals(Object o) { - if (!(o instanceof BooleanQuery)) + if (!(o instanceof BooleanQuery)) { return false; + } BooleanQuery other = (BooleanQuery)o; - return (this.getBoost() == other.getBoost()) + return this.getBoost() == other.getBoost() && this.clauses.equals(other.clauses) - && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch(); + && this.getMinimumNumberShouldMatch() == other.getMinimumNumberShouldMatch() + && this.disableCoord == other.disableCoord; } /** Returns a hash code value for this object.*/ + @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ clauses.hashCode() - + getMinimumNumberShouldMatch(); + + getMinimumNumberShouldMatch() + (disableCoord ? 17:0); } - + } Index: 3rdParty_sources/lucene/org/apache/lucene/search/BooleanScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/BooleanScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/BooleanScorer.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/BooleanScorer.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,104 +18,215 @@ */ import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; -final class BooleanScorer extends Scorer { - private SubScorer scorers = null; - private BucketTable bucketTable = new BucketTable(); +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.BooleanQuery.BooleanWeight; - private int maxCoord = 1; - private float[] coordFactors = null; +/* Description from Doug Cutting (excerpted from + * LUCENE-1483): + * + * BooleanScorer uses an array to score windows of + * 2K docs. So it scores docs 0-2K first, then docs 2K-4K, + * etc. For each window it iterates through all query terms + * and accumulates a score in table[doc%2K]. It also stores + * in the table a bitmask representing which terms + * contributed to the score. Non-zero scores are chained in + * a linked list. At the end of scoring each window it then + * iterates through the linked list and, if the bitmask + * matches the boolean constraints, collects a hit. For + * boolean queries with lots of frequent terms this can be + * much faster, since it does not need to update a priority + * queue for each posting, instead performing constant-time + * operations per posting. The only downside is that it + * results in hits being delivered out-of-order within the + * window, which means it cannot be nested within other + * scorers. But it works well as a top-level scorer. + * + * The new BooleanScorer2 implementation instead works by + * merging priority queues of postings, albeit with some + * clever tricks. For example, a pure conjunction (all terms + * required) does not require a priority queue. Instead it + * sorts the posting streams at the start, then repeatedly + * skips the first to to the last. If the first ever equals + * the last, then there's a hit. When some terms are + * required and some terms are optional, the conjunction can + * be evaluated first, then the optional terms can all skip + * to the match and be added to the score. Thus the + * conjunction can reduce the number of priority queue + * updates for the optional terms. */ - private int requiredMask = 0; - private int prohibitedMask = 0; - private int nextMask = 1; +final class BooleanScorer extends BulkScorer { + + private static final class BooleanScorerCollector extends Collector { + private BucketTable bucketTable; + private int mask; + private Scorer scorer; + + public BooleanScorerCollector(int mask, BucketTable bucketTable) { + this.mask = mask; + this.bucketTable = bucketTable; + } + + @Override + public void collect(final int doc) throws IOException { + final BucketTable table = bucketTable; + final int i = doc & BucketTable.MASK; + final Bucket bucket = table.buckets[i]; + + if (bucket.doc != doc) { // invalid bucket + bucket.doc = doc; // set doc + bucket.score = scorer.score(); // initialize score + bucket.bits = mask; // initialize mask + bucket.coord = 1; // initialize coord - private final int minNrShouldMatch; + bucket.next = table.first; // push onto valid list + table.first = bucket; + } else { // valid bucket + bucket.score += scorer.score(); // increment score + bucket.bits |= mask; // add bits in mask + bucket.coord++; // increment coord + } + } + + @Override + public void setNextReader(AtomicReaderContext context) { + // not needed by this implementation + } + + @Override + public void setScorer(Scorer scorer) { + this.scorer = scorer; + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return true; + } - BooleanScorer(Similarity similarity) { - this(similarity, 1); } - BooleanScorer(Similarity similarity, int minNrShouldMatch) { - super(similarity); - this.minNrShouldMatch = minNrShouldMatch; + static final class Bucket { + int doc = -1; // tells if bucket is valid + double score; // incremental score + // TODO: break out bool anyProhibited, int + // numRequiredMatched; then we can remove 32 limit on + // required clauses + int bits; // used for bool constraints + int coord; // count of terms in score + Bucket next; // next valid bucket } + /** A simple hash table of document scores within a range. */ + static final class BucketTable { + public static final int SIZE = 1 << 11; + public static final int MASK = SIZE - 1; + + final Bucket[] buckets = new Bucket[SIZE]; + Bucket first = null; // head of valid list + + public BucketTable() { + // Pre-fill to save the lazy init when collecting + // each sub: + for(int idx=0;idx optionalScorers, List prohibitedScorers, int maxCoord) throws IOException { + this.minNrShouldMatch = minNrShouldMatch; + this.weight = weight; - if (prohibited) - prohibitedMask |= mask; // update prohibited mask - else if (required) - requiredMask |= mask; // update required mask + for (BulkScorer scorer : optionalScorers) { + scorers = new SubScorer(scorer, false, false, bucketTable.newCollector(0), scorers); + } + + for (BulkScorer scorer : prohibitedScorers) { + scorers = new SubScorer(scorer, false, true, bucketTable.newCollector(PROHIBITED_MASK), scorers); + } - scorers = new SubScorer(scorer, required, prohibited, - bucketTable.newCollector(mask), scorers); + coordFactors = new float[optionalScorers.size() + 1]; + for (int i = 0; i < coordFactors.length; i++) { + coordFactors[i] = disableCoord ? 1.0f : weight.coord(i, maxCoord); + } } - private final void computeCoordFactors() { - coordFactors = new float[maxCoord]; - for (int i = 0; i < maxCoord; i++) - coordFactors[i] = getSimilarity().coord(i, maxCoord-1); - } + @Override + public boolean score(Collector collector, int max) throws IOException { - private int end; - private Bucket current; - - public void score(HitCollector hc) throws IOException { - next(); - score(hc, Integer.MAX_VALUE); - } - - protected boolean score(HitCollector hc, int max) throws IOException { - if (coordFactors == null) - computeCoordFactors(); - boolean more; Bucket tmp; - + FakeScorer fs = new FakeScorer(); + + // The internal loop will set the score and doc before calling collect. + collector.setScorer(fs); do { bucketTable.first = null; while (current != null) { // more queued // check prohibited & required - if ((current.bits & prohibitedMask) == 0 && - (current.bits & requiredMask) == requiredMask) { + if ((current.bits & PROHIBITED_MASK) == 0) { + + // TODO: re-enable this if BQ ever sends us required + // clauses + //&& (current.bits & requiredMask) == requiredMask) { - if (current.doc >= max){ + // NOTE: Lucene always passes max = + // Integer.MAX_VALUE today, because we never embed + // a BooleanScorer inside another (even though + // that should work)... but in theory an outside + // app could pass a different max so we must check + // it: + if (current.doc >= max) { tmp = current; current = current.next; tmp.next = bucketTable.first; @@ -124,7 +235,10 @@ } if (current.coord >= minNrShouldMatch) { - hc.collect(current.doc, current.score * coordFactors[current.coord]); + fs.score = (float) (current.score * coordFactors[current.coord]); + fs.doc = current.doc; + fs.freq = current.coord; + collector.collect(current.doc); } } @@ -141,10 +255,9 @@ more = false; end += BucketTable.SIZE; for (SubScorer sub = scorers; sub != null; sub = sub.next) { - if (!sub.done) { - sub.done = !sub.scorer.score(sub.collector, end); - if (!sub.done) - more = true; + if (sub.more) { + sub.more = sub.scorer.score(sub.collector, end); + more |= sub.more; } } current = bucketTable.first; @@ -154,112 +267,9 @@ return false; } - public int doc() { return current.doc; } - - public boolean next() throws IOException { - boolean more; - do { - while (bucketTable.first != null) { // more queued - current = bucketTable.first; - bucketTable.first = current.next; // pop the queue - - // check prohibited & required, and minNrShouldMatch - if ((current.bits & prohibitedMask) == 0 && - (current.bits & requiredMask) == requiredMask && - current.coord >= minNrShouldMatch) { - return true; - } - } - - // refill the queue - more = false; - end += BucketTable.SIZE; - for (SubScorer sub = scorers; sub != null; sub = sub.next) { - Scorer scorer = sub.scorer; - while (!sub.done && scorer.doc() < end) { - sub.collector.collect(scorer.doc(), scorer.score()); - sub.done = !scorer.next(); - } - if (!sub.done) { - more = true; - } - } - } while (bucketTable.first != null || more); - - return false; - } - - public float score() { - if (coordFactors == null) - computeCoordFactors(); - return current.score * coordFactors[current.coord]; - } - - static final class Bucket { - int doc = -1; // tells if bucket is valid - float score; // incremental score - int bits; // used for bool constraints - int coord; // count of terms in score - Bucket next; // next valid bucket - } - - /** A simple hash table of document scores within a range. */ - static final class BucketTable { - public static final int SIZE = 1 << 11; - public static final int MASK = SIZE - 1; - - final Bucket[] buckets = new Bucket[SIZE]; - Bucket first = null; // head of valid list - - public BucketTable() {} - - public final int size() { return SIZE; } - - public HitCollector newCollector(int mask) { - return new Collector(mask, this); - } - } - - static final class Collector extends HitCollector { - private BucketTable bucketTable; - private int mask; - public Collector(int mask, BucketTable bucketTable) { - this.mask = mask; - this.bucketTable = bucketTable; - } - public final void collect(final int doc, final float score) { - final BucketTable table = bucketTable; - final int i = doc & BucketTable.MASK; - Bucket bucket = table.buckets[i]; - if (bucket == null) - table.buckets[i] = bucket = new Bucket(); - - if (bucket.doc != doc) { // invalid bucket - bucket.doc = doc; // set doc - bucket.score = score; // initialize score - bucket.bits = mask; // initialize mask - bucket.coord = 1; // initialize coord - - bucket.next = table.first; // push onto valid list - table.first = bucket; - } else { // valid bucket - bucket.score += score; // increment score - bucket.bits |= mask; // add bits in mask - bucket.coord++; // increment coord - } - } - } - - public boolean skipTo(int target) { - throw new UnsupportedOperationException(); - } - - public Explanation explain(int doc) { - throw new UnsupportedOperationException(); - } - + @Override public String toString() { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("boolean("); for (SubScorer sub = scorers; sub != null; sub = sub.next) { buffer.append(sub.scorer.toString()); @@ -268,5 +278,4 @@ buffer.append(")"); return buffer.toString(); } - } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/BooleanScorer2.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/BooleanTopLevelScorers.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/BoostAttribute.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/BoostAttributeImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/BulkScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/CachingCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/CachingSpanFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/CachingWrapperFilter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/CachingWrapperFilter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/CachingWrapperFilter.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/CachingWrapperFilter.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,88 +17,134 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; -import java.util.BitSet; -import java.util.WeakHashMap; -import java.util.Map; +import static org.apache.lucene.search.DocIdSet.EMPTY; + import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.WeakHashMap; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.WAH8DocIdSet; + /** - * Wraps another filter's result and caches it. The purpose is to allow - * filters to simply filter, and then wrap with this class to add caching. + * Wraps another {@link Filter}'s result and caches it. The purpose is to allow + * filters to simply filter, and then wrap with this class + * to add caching. */ -public class CachingWrapperFilter extends Filter { - protected Filter filter; +public class CachingWrapperFilter extends Filter implements Accountable { + private final Filter filter; + private final Map cache = Collections.synchronizedMap(new WeakHashMap()); - /** - * A transient Filter cache. To cache Filters even when using {@link RemoteSearchable} use - * {@link RemoteCachingWrapperFilter} instead. - */ - protected transient Map cache; - - /** + /** Wraps another filter's result and caches it. * @param filter Filter to cache results of */ public CachingWrapperFilter(Filter filter) { this.filter = filter; } /** - * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. + * Gets the contained filter. + * @return the contained filter. */ - public BitSet bits(IndexReader reader) throws IOException { - if (cache == null) { - cache = new WeakHashMap(); - } + public Filter getFilter() { + return filter; + } - synchronized (cache) { // check cache - BitSet cached = (BitSet) cache.get(reader); - if (cached != null) { - return cached; + /** + * Provide the DocIdSet to be cached, using the DocIdSet provided + * by the wrapped Filter.

    This implementation returns the given {@link DocIdSet}, + * if {@link DocIdSet#isCacheable} returns true, else it calls + * {@link #cacheImpl(DocIdSetIterator,AtomicReader)} + *

    Note: This method returns {@linkplain DocIdSet#EMPTY} if the given docIdSet + * is null or if {@link DocIdSet#iterator()} return null. The empty + * instance is use as a placeholder in the cache instead of the null value. + */ + protected DocIdSet docIdSetToCache(DocIdSet docIdSet, AtomicReader reader) throws IOException { + if (docIdSet == null) { + // this is better than returning null, as the nonnull result can be cached + return EMPTY; + } else if (docIdSet.isCacheable()) { + return docIdSet; + } else { + final DocIdSetIterator it = docIdSet.iterator(); + // null is allowed to be returned by iterator(), + // in this case we wrap with the sentinel set, + // which is cacheable. + if (it == null) { + return EMPTY; + } else { + return cacheImpl(it, reader); } } - - final BitSet bits = filter.bits(reader); - - synchronized (cache) { // update cache - cache.put(reader, bits); - } - - return bits; } - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - if (cache == null) { - cache = new WeakHashMap(); - } + /** + * Default cache implementation: uses {@link WAH8DocIdSet}. + */ + protected DocIdSet cacheImpl(DocIdSetIterator iterator, AtomicReader reader) throws IOException { + WAH8DocIdSet.Builder builder = new WAH8DocIdSet.Builder(); + builder.add(iterator); + return builder.build(); + } - synchronized (cache) { // check cache - DocIdSet cached = (DocIdSet) cache.get(reader); - if (cached != null) { - return cached; - } - } + // for testing + int hitCount, missCount; - final DocIdSet docIdSet = filter.getDocIdSet(reader); + @Override + public DocIdSet getDocIdSet(AtomicReaderContext context, final Bits acceptDocs) throws IOException { + final AtomicReader reader = context.reader(); + final Object key = reader.getCoreCacheKey(); - synchronized (cache) { // update cache - cache.put(reader, docIdSet); + DocIdSet docIdSet = cache.get(key); + if (docIdSet != null) { + hitCount++; + } else { + missCount++; + docIdSet = docIdSetToCache(filter.getDocIdSet(context, null), reader); + assert docIdSet.isCacheable(); + cache.put(key, docIdSet); } - return docIdSet; - + return docIdSet == EMPTY ? null : BitsFilteredDocIdSet.wrap(docIdSet, acceptDocs); } - + + @Override public String toString() { - return "CachingWrapperFilter("+filter+")"; + return getClass().getSimpleName() + "("+filter+")"; } + @Override public boolean equals(Object o) { - if (!(o instanceof CachingWrapperFilter)) return false; - return this.filter.equals(((CachingWrapperFilter)o).filter); + if (o == null || !getClass().equals(o.getClass())) return false; + final CachingWrapperFilter other = (CachingWrapperFilter) o; + return this.filter.equals(other.filter); } + @Override public int hashCode() { - return filter.hashCode() ^ 0x1117BF25; + return (filter.hashCode() ^ getClass().hashCode()); } + + @Override + public long ramBytesUsed() { + + // Sync only to pull the current set of values: + List docIdSets; + synchronized(cache) { + docIdSets = new ArrayList<>(cache.values()); + } + + long total = 0; + for(DocIdSet dis : docIdSets) { + total += dis.ramBytesUsed(); + } + + return total; + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/CollectionStatistics.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/CollectionTerminatedException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Collector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/ComplexExplanation.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ComplexExplanation.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ComplexExplanation.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ComplexExplanation.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -27,7 +27,7 @@ } public ComplexExplanation(boolean match, float value, String description) { - // NOTE: use of "boolean" instead of "Boolean" in params is concious + // NOTE: use of "boolean" instead of "Boolean" in params is conscious // choice to encourage clients to be specific. super(value, description); this.match = Boolean.valueOf(match); @@ -52,11 +52,13 @@ *

    * @see #getMatch */ + @Override public boolean isMatch() { Boolean m = getMatch(); return (null != m ? m.booleanValue() : super.isMatch()); } + @Override protected String getSummary() { if (null == getMatch()) return super.getSummary(); Index: 3rdParty_sources/lucene/org/apache/lucene/search/ConjunctionScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ConjunctionScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ConjunctionScorer.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ConjunctionScorer.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,109 +18,125 @@ */ import java.io.IOException; +import java.util.ArrayList; import java.util.Collection; -import java.util.Arrays; import java.util.Comparator; +import org.apache.lucene.util.ArrayUtil; + /** Scorer for conjunctions, sets of queries, all of which are required. */ class ConjunctionScorer extends Scorer { - private final Scorer[] scorers; - - private boolean firstTime=true; - private boolean more; + protected int lastDoc = -1; + protected final DocsAndFreqs[] docsAndFreqs; + private final DocsAndFreqs lead; private final float coord; - private int lastDoc=-1; - public ConjunctionScorer(Similarity similarity, Collection scorers) throws IOException { - this(similarity, (Scorer[])scorers.toArray(new Scorer[scorers.size()])); + ConjunctionScorer(Weight weight, Scorer[] scorers) { + this(weight, scorers, 1f); } + + ConjunctionScorer(Weight weight, Scorer[] scorers, float coord) { + super(weight); + this.coord = coord; + this.docsAndFreqs = new DocsAndFreqs[scorers.length]; + for (int i = 0; i < scorers.length; i++) { + docsAndFreqs[i] = new DocsAndFreqs(scorers[i]); + } + // Sort the array the first time to allow the least frequent DocsEnum to + // lead the matching. + ArrayUtil.timSort(docsAndFreqs, new Comparator() { + @Override + public int compare(DocsAndFreqs o1, DocsAndFreqs o2) { + return Long.compare(o1.cost, o2.cost); + } + }); - public ConjunctionScorer(Similarity similarity, Scorer[] scorers) throws IOException { - super(similarity); - this.scorers = scorers; - coord = getSimilarity().coord(this.scorers.length, this.scorers.length); + lead = docsAndFreqs[0]; // least frequent DocsEnum leads the intersection } - public int doc() { return lastDoc; } + private int doNext(int doc) throws IOException { + for(;;) { + // doc may already be NO_MORE_DOCS here, but we don't check explicitly + // since all scorers should advance to NO_MORE_DOCS, match, then + // return that value. + advanceHead: for(;;) { + for (int i = 1; i < docsAndFreqs.length; i++) { + // invariant: docsAndFreqs[i].doc <= doc at this point. - public boolean next() throws IOException { - if (firstTime) - return init(0); - else if (more) - more = scorers[(scorers.length-1)].next(); - return doNext(); - } + // docsAndFreqs[i].doc may already be equal to doc if we "broke advanceHead" + // on the previous iteration and the advance on the lead scorer exactly matched. + if (docsAndFreqs[i].doc < doc) { + docsAndFreqs[i].doc = docsAndFreqs[i].scorer.advance(doc); - private boolean doNext() throws IOException { - int first=0; - Scorer lastScorer = scorers[scorers.length-1]; - Scorer firstScorer; - while (more && (firstScorer=scorers[first]).doc() < (lastDoc=lastScorer.doc())) { - more = firstScorer.skipTo(lastDoc); - lastScorer = firstScorer; - first = (first == (scorers.length-1)) ? 0 : first+1; + if (docsAndFreqs[i].doc > doc) { + // DocsEnum beyond the current doc - break and advance lead to the new highest doc. + doc = docsAndFreqs[i].doc; + break advanceHead; + } + } + } + // success - all DocsEnums are on the same doc + return doc; + } + // advance head for next iteration + doc = lead.doc = lead.scorer.advance(doc); } - return more; } - public boolean skipTo(int target) throws IOException { - if (firstTime) - return init(target); - else if (more) - more = scorers[(scorers.length-1)].skipTo(target); - return doNext(); + @Override + public int advance(int target) throws IOException { + lead.doc = lead.scorer.advance(target); + return lastDoc = doNext(lead.doc); } - // Note... most of this could be done in the constructor - // thus skipping a check for firstTime per call to next() and skipTo() - private boolean init(int target) throws IOException { - firstTime=false; - more = scorers.length>1; - for (int i=0; i>1); i++) { - Scorer tmp = scorers[i]; - scorers[i] = scorers[end-i-1]; - scorers[end-i-1] = tmp; - } - - return more; + @Override + public int nextDoc() throws IOException { + lead.doc = lead.scorer.nextDoc(); + return lastDoc = doNext(lead.doc); } + @Override public float score() throws IOException { + // TODO: sum into a double and cast to float if we ever send required clauses to BS1 float sum = 0.0f; - for (int i = 0; i < scorers.length; i++) { - sum += scorers[i].score(); + for (DocsAndFreqs docs : docsAndFreqs) { + sum += docs.scorer.score(); } return sum * coord; } + + @Override + public int freq() { + return docsAndFreqs.length; + } - public Explanation explain(int doc) { - throw new UnsupportedOperationException(); + @Override + public long cost() { + return lead.scorer.cost(); } + @Override + public Collection getChildren() { + ArrayList children = new ArrayList<>(docsAndFreqs.length); + for (DocsAndFreqs docs : docsAndFreqs) { + children.add(new ChildScorer(docs.scorer, "MUST")); + } + return children; + } + + static final class DocsAndFreqs { + final long cost; + final Scorer scorer; + int doc = -1; + + DocsAndFreqs(Scorer scorer) { + this.scorer = scorer; + this.cost = scorer.cost(); + } + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ConstantScoreAutoRewrite.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/ConstantScoreQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ConstantScoreQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ConstantScoreQuery.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ConstantScoreQuery.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,154 +17,314 @@ * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Set; /** - * A query that wraps a filter and simply returns a constant score equal to the - * query boost for every document in the filter. - * - * - * @version $Id$ + * A query that wraps another query or a filter and simply returns a constant score equal to the + * query boost for every document that matches the filter or query. + * For queries it therefore simply strips of all scores and returns a constant one. */ public class ConstantScoreQuery extends Query { protected final Filter filter; + protected final Query query; + /** Strips off scores from the passed in Query. The hits will get a constant score + * dependent on the boost factor of this query. */ + public ConstantScoreQuery(Query query) { + if (query == null) + throw new NullPointerException("Query may not be null"); + this.filter = null; + this.query = query; + } + + /** Wraps a Filter as a Query. The hits will get a constant score + * dependent on the boost factor of this query. + * If you simply want to strip off scores from a Query, no longer use + * {@code new ConstantScoreQuery(new QueryWrapperFilter(query))}, instead + * use {@link #ConstantScoreQuery(Query)}! + */ public ConstantScoreQuery(Filter filter) { - this.filter=filter; + if (filter == null) + throw new NullPointerException("Filter may not be null"); + this.filter = filter; + this.query = null; } - /** Returns the encapsulated filter */ + /** Returns the encapsulated filter, returns {@code null} if a query is wrapped. */ public Filter getFilter() { return filter; } + /** Returns the encapsulated query, returns {@code null} if a filter is wrapped. */ + public Query getQuery() { + return query; + } + + @Override public Query rewrite(IndexReader reader) throws IOException { + if (query != null) { + Query rewritten = query.rewrite(reader); + if (rewritten != query) { + rewritten = new ConstantScoreQuery(rewritten); + rewritten.setBoost(this.getBoost()); + return rewritten; + } + } else { + assert filter != null; + // Fix outdated usage pattern from Lucene 2.x/early-3.x: + // because ConstantScoreQuery only accepted filters, + // QueryWrapperFilter was used to wrap queries. + if (filter instanceof QueryWrapperFilter) { + final QueryWrapperFilter qwf = (QueryWrapperFilter) filter; + final Query rewritten = new ConstantScoreQuery(qwf.getQuery().rewrite(reader)); + rewritten.setBoost(this.getBoost()); + return rewritten; + } + } return this; } - public void extractTerms(Set terms) { - // OK to not add any terms when used for MultiSearcher, - // but may not be OK for highlighting + @Override + public void extractTerms(Set terms) { + // TODO: OK to not add any terms when wrapped a filter + // and used with MultiSearcher, but may not be OK for + // highlighting. + // If a query was wrapped, we delegate to query. + if (query != null) + query.extractTerms(terms); } - protected class ConstantWeight implements Weight { - private Similarity similarity; + protected class ConstantWeight extends Weight { + private final Weight innerWeight; private float queryNorm; private float queryWeight; - - public ConstantWeight(Searcher searcher) { - this.similarity = getSimilarity(searcher); + + public ConstantWeight(IndexSearcher searcher) throws IOException { + this.innerWeight = (query == null) ? null : query.createWeight(searcher); } + @Override public Query getQuery() { return ConstantScoreQuery.this; } - public float getValue() { - return queryWeight; - } - - public float sumOfSquaredWeights() throws IOException { + @Override + public float getValueForNormalization() throws IOException { + // we calculate sumOfSquaredWeights of the inner weight, but ignore it (just to initialize everything) + if (innerWeight != null) innerWeight.getValueForNormalization(); queryWeight = getBoost(); return queryWeight * queryWeight; } - public void normalize(float norm) { - this.queryNorm = norm; + @Override + public void normalize(float norm, float topLevelBoost) { + this.queryNorm = norm * topLevelBoost; queryWeight *= this.queryNorm; + // we normalize the inner weight, but ignore it (just to initialize everything) + if (innerWeight != null) innerWeight.normalize(norm, topLevelBoost); } - public Scorer scorer(IndexReader reader) throws IOException { - return new ConstantScorer(similarity, reader, this); + @Override + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + final DocIdSetIterator disi; + if (filter != null) { + assert query == null; + return super.bulkScorer(context, scoreDocsInOrder, acceptDocs); + } else { + assert query != null && innerWeight != null; + BulkScorer bulkScorer = innerWeight.bulkScorer(context, scoreDocsInOrder, acceptDocs); + if (bulkScorer == null) { + return null; + } + return new ConstantBulkScorer(bulkScorer, this, queryWeight); + } } - public Explanation explain(IndexReader reader, int doc) throws IOException { + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + final DocIdSetIterator disi; + if (filter != null) { + assert query == null; + final DocIdSet dis = filter.getDocIdSet(context, acceptDocs); + if (dis == null) { + return null; + } + disi = dis.iterator(); + } else { + assert query != null && innerWeight != null; + disi = innerWeight.scorer(context, acceptDocs); + } - ConstantScorer cs = (ConstantScorer)scorer(reader); - boolean exists = cs.docIdSetIterator.skipTo(doc) && (cs.docIdSetIterator.doc() == doc); + if (disi == null) { + return null; + } + return new ConstantScorer(disi, this, queryWeight); + } - ComplexExplanation result = new ComplexExplanation(); + @Override + public boolean scoresDocsOutOfOrder() { + return (innerWeight != null) ? innerWeight.scoresDocsOutOfOrder() : false; + } + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + final Scorer cs = scorer(context, context.reader().getLiveDocs()); + final boolean exists = (cs != null && cs.advance(doc) == doc); + + final ComplexExplanation result = new ComplexExplanation(); if (exists) { - result.setDescription("ConstantScoreQuery(" + filter - + "), product of:"); + result.setDescription(ConstantScoreQuery.this.toString() + ", product of:"); result.setValue(queryWeight); result.setMatch(Boolean.TRUE); result.addDetail(new Explanation(getBoost(), "boost")); - result.addDetail(new Explanation(queryNorm,"queryNorm")); + result.addDetail(new Explanation(queryNorm, "queryNorm")); } else { - result.setDescription("ConstantScoreQuery(" + filter - + ") doesn't match id " + doc); + result.setDescription(ConstantScoreQuery.this.toString() + " doesn't match id " + doc); result.setValue(0); result.setMatch(Boolean.FALSE); } return result; } } + /** We return this as our {@link BulkScorer} so that if the CSQ + * wraps a query with its own optimized top-level + * scorer (e.g. BooleanScorer) we can use that + * top-level scorer. */ + protected class ConstantBulkScorer extends BulkScorer { + final BulkScorer bulkScorer; + final Weight weight; + final float theScore; + + public ConstantBulkScorer(BulkScorer bulkScorer, Weight weight, float theScore) { + this.bulkScorer = bulkScorer; + this.weight = weight; + this.theScore = theScore; + } + + @Override + public boolean score(Collector collector, int max) throws IOException { + return bulkScorer.score(wrapCollector(collector), max); + } + + private Collector wrapCollector(final Collector collector) { + return new Collector() { + @Override + public void setScorer(Scorer scorer) throws IOException { + // we must wrap again here, but using the scorer passed in as parameter: + collector.setScorer(new ConstantScorer(scorer, weight, theScore)); + } + + @Override + public void collect(int doc) throws IOException { + collector.collect(doc); + } + + @Override + public void setNextReader(AtomicReaderContext context) throws IOException { + collector.setNextReader(context); + } + + @Override + public boolean acceptsDocsOutOfOrder() { + return collector.acceptsDocsOutOfOrder(); + } + }; + } + } + protected class ConstantScorer extends Scorer { final DocIdSetIterator docIdSetIterator; final float theScore; - int doc=-1; - public ConstantScorer(Similarity similarity, IndexReader reader, Weight w) throws IOException { - super(similarity); - theScore = w.getValue(); - docIdSetIterator = filter.getDocIdSet(reader).iterator(); + public ConstantScorer(DocIdSetIterator docIdSetIterator, Weight w, float theScore) { + super(w); + this.theScore = theScore; + this.docIdSetIterator = docIdSetIterator; } - public boolean next() throws IOException { - return docIdSetIterator.next(); + @Override + public int nextDoc() throws IOException { + return docIdSetIterator.nextDoc(); } - - public int doc() { - return docIdSetIterator.doc(); + + @Override + public int docID() { + return docIdSetIterator.docID(); } + @Override public float score() throws IOException { + assert docIdSetIterator.docID() != NO_MORE_DOCS; return theScore; } - public boolean skipTo(int target) throws IOException { - return docIdSetIterator.skipTo(target); + @Override + public int freq() throws IOException { + return 1; } - public Explanation explain(int doc) throws IOException { - throw new UnsupportedOperationException(); + @Override + public int advance(int target) throws IOException { + return docIdSetIterator.advance(target); } + + @Override + public long cost() { + return docIdSetIterator.cost(); + } + + @Override + public Collection getChildren() { + if (query != null) { + return Collections.singletonList(new ChildScorer((Scorer) docIdSetIterator, "constant")); + } else { + return Collections.emptyList(); + } + } } - - protected Weight createWeight(Searcher searcher) { + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { return new ConstantScoreQuery.ConstantWeight(searcher); } - - /** Prints a user-readable version of this query. */ - public String toString(String field) - { - return "ConstantScore(" + filter.toString() - + (getBoost()==1.0 ? ")" : "^" + getBoost()); + @Override + public String toString(String field) { + return new StringBuilder("ConstantScore(") + .append((query == null) ? filter.toString() : query.toString(field)) + .append(')') + .append(ToStringUtils.boost(getBoost())) + .toString(); } - /** Returns true if o is equal to this. */ + @Override public boolean equals(Object o) { if (this == o) return true; - if (!(o instanceof ConstantScoreQuery)) return false; - ConstantScoreQuery other = (ConstantScoreQuery)o; - return this.getBoost()==other.getBoost() && filter.equals(other.filter); + if (!super.equals(o)) + return false; + if (o instanceof ConstantScoreQuery) { + final ConstantScoreQuery other = (ConstantScoreQuery) o; + return + ((this.filter == null) ? other.filter == null : this.filter.equals(other.filter)) && + ((this.query == null) ? other.query == null : this.query.equals(other.query)); + } + return false; } - /** Returns a hash code value for this object. */ + @Override public int hashCode() { - // Simple add is OK since no existing filter hashcode has a float component. - return filter.hashCode() + Float.floatToIntBits(getBoost()); + return 31 * super.hashCode() + + ((query == null) ? filter : query).hashCode(); } } - - - Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ConstantScoreRangeQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ControlledRealTimeReopenThread.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/DefaultSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxQuery.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxQuery.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -16,14 +16,18 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; - import java.io.IOException; import java.util.ArrayList; -import java.util.Iterator; import java.util.Collection; +import java.util.Iterator; +import java.util.List; import java.util.Set; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.Bits; + /** * A query that generates the union of documents produced by its subqueries, and that scores each document with the maximum * score for that document as produced by any subquery, plus a tie breaking increment for any additional matching subqueries. @@ -38,10 +42,10 @@ * include this term in only the best of those multiple fields, without confusing this with the better case of two different terms * in the multiple fields. */ -public class DisjunctionMaxQuery extends Query { +public class DisjunctionMaxQuery extends Query implements Iterable { /* The subqueries */ - private ArrayList disjuncts = new ArrayList(); + private ArrayList disjuncts = new ArrayList<>(); /* Multiple of the non-max disjunct scores added into our final score. Non-zero values support tie-breaking. */ private float tieBreakerMultiplier = 0.0f; @@ -58,10 +62,10 @@ /** * Creates a new DisjunctionMaxQuery - * @param disjuncts a Collection of all the disjuncts to add + * @param disjuncts a {@code Collection} of all the disjuncts to add * @param tieBreakerMultiplier the weight to give to each matching non-maximum disjunct */ - public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) { + public DisjunctionMaxQuery(Collection disjuncts, float tieBreakerMultiplier) { this.tieBreakerMultiplier = tieBreakerMultiplier; add(disjuncts); } @@ -74,111 +78,150 @@ } /** Add a collection of disjuncts to this disjunction - * via Iterable + * via {@code Iterable} + * @param disjuncts a collection of queries to add as disjuncts. */ - public void add(Collection disjuncts) { + public void add(Collection disjuncts) { this.disjuncts.addAll(disjuncts); } - /** An Iterator over the disjuncts */ - public Iterator iterator() { + /** @return An {@code Iterator} over the disjuncts */ + @Override + public Iterator iterator() { return disjuncts.iterator(); } + + /** + * @return the disjuncts. + */ + public ArrayList getDisjuncts() { + return disjuncts; + } - /* The Weight for DisjunctionMaxQuery's, used to normalize, score and explain these queries */ - private class DisjunctionMaxWeight implements Weight { + /** + * @return tie breaker value for multiple matches. + */ + public float getTieBreakerMultiplier() { + return tieBreakerMultiplier; + } - private Similarity similarity; // The similarity which we are associated. - private ArrayList weights = new ArrayList(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts + /** + * Expert: the Weight for DisjunctionMaxQuery, used to + * normalize, score and explain these queries. + * + *

    NOTE: this API and implementation is subject to + * change suddenly in the next release.

    + */ + protected class DisjunctionMaxWeight extends Weight { - /* Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ - public DisjunctionMaxWeight(Searcher searcher) throws IOException { - this.similarity = searcher.getSimilarity(); - for (int i = 0; i < disjuncts.size(); i++) - weights.add(((Query) disjuncts.get(i)).createWeight(searcher)); + /** The Weights for our subqueries, in 1-1 correspondence with disjuncts */ + protected ArrayList weights = new ArrayList<>(); // The Weight's for our subqueries, in 1-1 correspondence with disjuncts + + /** Construct the Weight for this Query searched by searcher. Recursively construct subquery weights. */ + public DisjunctionMaxWeight(IndexSearcher searcher) throws IOException { + for (Query disjunctQuery : disjuncts) { + weights.add(disjunctQuery.createWeight(searcher)); + } } - /* Return our associated DisjunctionMaxQuery */ + /** Return our associated DisjunctionMaxQuery */ + @Override public Query getQuery() { return DisjunctionMaxQuery.this; } - /* Return our boost */ - public float getValue() { return getBoost(); } - - /* Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */ - public float sumOfSquaredWeights() throws IOException { + /** Compute the sub of squared weights of us applied to our subqueries. Used for normalization. */ + @Override + public float getValueForNormalization() throws IOException { float max = 0.0f, sum = 0.0f; - for (int i = 0; i < weights.size(); i++) { - float sub = ((Weight) weights.get(i)).sumOfSquaredWeights(); + for (Weight currentWeight : weights) { + float sub = currentWeight.getValueForNormalization(); sum += sub; max = Math.max(max, sub); + } - return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * getBoost() * getBoost(); + float boost = getBoost(); + return (((sum - max) * tieBreakerMultiplier * tieBreakerMultiplier) + max) * boost * boost; } - /* Apply the computed normalization factor to our subqueries */ - public void normalize(float norm) { - norm *= getBoost(); // Incorporate our boost - for (int i = 0 ; i < weights.size(); i++) - ((Weight) weights.get(i)).normalize(norm); + /** Apply the computed normalization factor to our subqueries */ + @Override + public void normalize(float norm, float topLevelBoost) { + topLevelBoost *= getBoost(); // Incorporate our boost + for (Weight wt : weights) { + wt.normalize(norm, topLevelBoost); + } } - /* Create the scorer used to score our associated DisjunctionMaxQuery */ - public Scorer scorer(IndexReader reader) throws IOException { - DisjunctionMaxScorer result = new DisjunctionMaxScorer(tieBreakerMultiplier, similarity); - for (int i = 0 ; i < weights.size(); i++) { - Weight w = (Weight) weights.get(i); - Scorer subScorer = w.scorer(reader); - if (subScorer == null) return null; - result.add(subScorer); + /** Create the scorer used to score our associated DisjunctionMaxQuery */ + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + List scorers = new ArrayList<>(); + for (Weight w : weights) { + // we will advance() subscorers + Scorer subScorer = w.scorer(context, acceptDocs); + if (subScorer != null) { + scorers.add(subScorer); + } } - return result; + if (scorers.isEmpty()) { + // no sub-scorers had any documents + return null; + } else if (scorers.size() == 1) { + // only one sub-scorer in this segment + return scorers.get(0); + } else { + return new DisjunctionMaxScorer(this, tieBreakerMultiplier, scorers.toArray(new Scorer[scorers.size()])); + } } - /* Explain the score we computed for doc */ - public Explanation explain(IndexReader reader, int doc) throws IOException { - if ( disjuncts.size() == 1) return ((Weight) weights.get(0)).explain(reader,doc); + /** Explain the score we computed for doc */ + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + if (disjuncts.size() == 1) return weights.get(0).explain(context,doc); ComplexExplanation result = new ComplexExplanation(); float max = 0.0f, sum = 0.0f; result.setDescription(tieBreakerMultiplier == 0.0f ? "max of:" : "max plus " + tieBreakerMultiplier + " times others of:"); - for (int i = 0 ; i < weights.size(); i++) { - Explanation e = ((Weight) weights.get(i)).explain(reader, doc); + for (Weight wt : weights) { + Explanation e = wt.explain(context, doc); if (e.isMatch()) { result.setMatch(Boolean.TRUE); result.addDetail(e); sum += e.getValue(); max = Math.max(max, e.getValue()); } } - result.setValue(max + (sum - max)*tieBreakerMultiplier); + result.setValue(max + (sum - max) * tieBreakerMultiplier); return result; } - + } // end of DisjunctionMaxWeight inner class - /* Create the Weight used to score us */ - protected Weight createWeight(Searcher searcher) throws IOException { + /** Create the Weight used to score us */ + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { return new DisjunctionMaxWeight(searcher); } /** Optimize our representation and our subqueries representations * @param reader the IndexReader we query * @return an optimized copy of us (which may not be a copy if there is nothing to optimize) */ + @Override public Query rewrite(IndexReader reader) throws IOException { - if (disjuncts.size() == 1) { - Query singleton = (Query) disjuncts.get(0); + int numDisjunctions = disjuncts.size(); + if (numDisjunctions == 1) { + Query singleton = disjuncts.get(0); Query result = singleton.rewrite(reader); if (getBoost() != 1.0f) { - if (result == singleton) result = (Query)result.clone(); + if (result == singleton) result = result.clone(); result.setBoost(getBoost() * result.getBoost()); } return result; } DisjunctionMaxQuery clone = null; - for (int i = 0 ; i < disjuncts.size(); i++) { - Query clause = (Query) disjuncts.get(i); + for (int i = 0 ; i < numDisjunctions; i++) { + Query clause = disjuncts.get(i); Query rewrite = clause.rewrite(reader); if (rewrite != clause) { - if (clone == null) clone = (DisjunctionMaxQuery)this.clone(); + if (clone == null) clone = this.clone(); clone.disjuncts.set(i, rewrite); } } @@ -188,37 +231,39 @@ /** Create a shallow copy of us -- used in rewriting if necessary * @return a copy of us (but reuse, don't copy, our subqueries) */ - public Object clone() { + @Override @SuppressWarnings("unchecked") + public DisjunctionMaxQuery clone() { DisjunctionMaxQuery clone = (DisjunctionMaxQuery)super.clone(); - clone.disjuncts = (ArrayList)this.disjuncts.clone(); + clone.disjuncts = (ArrayList) this.disjuncts.clone(); return clone; } - // inherit javadoc - public void extractTerms(Set terms) { - for (int i = 0; i < disjuncts.size(); i++) { - ((Query)disjuncts.get(i)).extractTerms(terms); - } + @Override + public void extractTerms(Set terms) { + for (Query query : disjuncts) { + query.extractTerms(terms); + } } - /** Prettyprint us. * @param field the field to which we are applied * @return a string that shows what we do, of the form "(disjunct1 | disjunct2 | ... | disjunctn)^boost" */ + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("("); - for (int i = 0 ; i < disjuncts.size(); i++) { - Query subquery = (Query) disjuncts.get(i); + int numDisjunctions = disjuncts.size(); + for (int i = 0 ; i < numDisjunctions; i++) { + Query subquery = disjuncts.get(i); if (subquery instanceof BooleanQuery) { // wrap sub-bools in parens buffer.append("("); buffer.append(subquery.toString(field)); buffer.append(")"); } else buffer.append(subquery.toString(field)); - if (i != disjuncts.size()-1) buffer.append(" | "); + if (i != numDisjunctions-1) buffer.append(" | "); } buffer.append(")"); if (tieBreakerMultiplier != 0.0f) { @@ -236,6 +281,7 @@ * @param o another object * @return true iff o is a DisjunctionMaxQuery with the same boost and the same subqueries, in the same order, as us */ + @Override public boolean equals(Object o) { if (! (o instanceof DisjunctionMaxQuery) ) return false; DisjunctionMaxQuery other = (DisjunctionMaxQuery)o; @@ -247,10 +293,12 @@ /** Compute a hash code for hashing us * @return the hash code */ + @Override public int hashCode() { return Float.floatToIntBits(getBoost()) + Float.floatToIntBits(tieBreakerMultiplier) + disjuncts.hashCode(); } + } Index: 3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxScorer.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/DisjunctionMaxScorer.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Copyright 2004 The Apache Software Foundation * * Licensed under the Apache License, Version 2.0 (the "License"); @@ -17,178 +17,53 @@ */ import java.io.IOException; -import java.util.ArrayList; /** - * The Scorer for DisjunctionMaxQuery's. The union of all documents generated by the the subquery scorers + * The Scorer for DisjunctionMaxQuery. The union of all documents generated by the the subquery scorers * is generated in document number order. The score for each document is the maximum of the scores computed * by the subquery scorers that generate that document, plus tieBreakerMultiplier times the sum of the scores * for the other subqueries that generate the document. */ -class DisjunctionMaxScorer extends Scorer { +final class DisjunctionMaxScorer extends DisjunctionScorer { + /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ + private final float tieBreakerMultiplier; - /* The scorers for subqueries that have remaining docs, kept as a min heap by number of next doc. */ - private ArrayList subScorers = new ArrayList(); + /* Used when scoring currently matching doc. */ + private float scoreSum; + private float scoreMax; - /* Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. */ - private float tieBreakerMultiplier; - - private boolean more = false; // True iff there is a next document - private boolean firstTime = true; // True iff next() has not yet been called - - /** Creates a new instance of DisjunctionMaxScorer - * @param tieBreakerMultiplier Multiplier applied to non-maximum-scoring subqueries for a document as they are summed into the result. - * @param similarity -- not used since our definition involves neither coord nor terms directly */ - public DisjunctionMaxScorer(float tieBreakerMultiplier, Similarity similarity) { - super(similarity); - this.tieBreakerMultiplier = tieBreakerMultiplier; + /** + * Creates a new instance of DisjunctionMaxScorer + * + * @param weight + * The Weight to be used. + * @param tieBreakerMultiplier + * Multiplier applied to non-maximum-scoring subqueries for a + * document as they are summed into the result. + * @param subScorers + * The sub scorers this Scorer should iterate on + */ + DisjunctionMaxScorer(Weight weight, float tieBreakerMultiplier, Scorer[] subScorers) { + super(weight, subScorers); + this.tieBreakerMultiplier = tieBreakerMultiplier; + } + + @Override + protected void reset() { + scoreSum = scoreMax = 0; + } + + @Override + protected void accum(Scorer subScorer) throws IOException { + float subScore = subScorer.score(); + scoreSum += subScore; + if (subScore > scoreMax) { + scoreMax = subScore; } - - /** Add the scorer for a subquery - * @param scorer the scorer of a subquery of our associated DisjunctionMaxQuery - */ - public void add(Scorer scorer) throws IOException { - if (scorer.next()) { // Initialize and retain only if it produces docs - subScorers.add(scorer); - more = true; - } - } - - /** Generate the next document matching our associated DisjunctionMaxQuery. - * @return true iff there is a next document - */ - public boolean next() throws IOException { - if (!more) return false; - if (firstTime) { - heapify(); - firstTime = false; - return true; // more would have been false if no subScorers had any docs - } - // Increment all generators that generated the last doc and adjust the heap. - int lastdoc = ((Scorer) subScorers.get(0)).doc(); - do { - if (((Scorer) subScorers.get(0)).next()) - heapAdjust(0); - else { - heapRemoveRoot(); - if (subScorers.isEmpty()) return (more = false); - } - } while ( ((Scorer) subScorers.get(0)).doc()==lastdoc ); - return true; - } - - /** Determine the current document number. Initially invalid, until {@link #next()} is called the first time. - * @return the document number of the currently generated document - */ - public int doc() { - return ((Scorer) subScorers.get(0)).doc(); - } - - /** Determine the current document score. Initially invalid, until {@link #next()} is called the first time. - * @return the score of the current generated document - */ - public float score() throws IOException { - int doc = ((Scorer) subScorers.get(0)).doc(); - float[] sum = {((Scorer) subScorers.get(0)).score()}, max = {sum[0]}; - int size = subScorers.size(); - scoreAll(1, size, doc, sum, max); - scoreAll(2, size, doc, sum, max); - return max[0] + (sum[0] - max[0])*tieBreakerMultiplier; - } - - // Recursively iterate all subScorers that generated last doc computing sum and max - private void scoreAll(int root, int size, int doc, float[] sum, float[] max) throws IOException { - if (root0 && ((Scorer)subScorers.get(0)).doc()>1)-1; i>=0; i--) - heapAdjust(i); - } - - /* The subtree of subScorers at root is a min heap except possibly for its root element. - * Bubble the root down as required to make the subtree a heap. - */ - private void heapAdjust(int root) { - Scorer scorer=(Scorer)subScorers.get(root); - int doc=scorer.doc(); - int i=root, size=subScorers.size(); - while (i<=(size>>1)-1) { - int lchild=(i<<1)+1; - Scorer lscorer=(Scorer)subScorers.get(lchild); - int ldoc=lscorer.doc(); - int rdoc=Integer.MAX_VALUE, rchild=(i<<1)+2; - Scorer rscorer=null; - if (rchildConjunctionScorer. - * This Scorer implements {@link Scorer#skipTo(int)} and uses skipTo() on the given Scorers. - * @todo Implement score(HitCollector, int). + * This Scorer implements {@link Scorer#advance(int)} and uses advance() on the given Scorers. */ -class DisjunctionSumScorer extends Scorer { - /** The number of subscorers. */ - private final int nrScorers; +final class DisjunctionSumScorer extends DisjunctionScorer { + private double score; + private final float[] coord; - /** The subscorers. */ - protected final List subScorers; - - /** The minimum number of scorers that should match. */ - private final int minimumNrMatchers; - - /** The scorerDocQueue contains all subscorers ordered by their current doc(), - * with the minimum at the top. - *
    The scorerDocQueue is initialized the first time next() or skipTo() is called. - *
    An exhausted scorer is immediately removed from the scorerDocQueue. - *
    If less than the minimumNrMatchers scorers - * remain in the scorerDocQueue next() and skipTo() return false. - *

    - * After each to call to next() or skipTo() - * currentSumScore is the total score of the current matching doc, - * nrMatchers is the number of matching scorers, - * and all scorers are after the matching doc, or are exhausted. - */ - private ScorerDocQueue scorerDocQueue = null; - private int queueSize = -1; // used to avoid size() method calls on scorerDocQueue - - /** The document number of the current match. */ - private int currentDoc = -1; - - /** The number of subscorers that provide the current match. */ - protected int nrMatchers = -1; - - private float currentScore = Float.NaN; - /** Construct a DisjunctionScorer. - * @param subScorers A collection of at least two subscorers. - * @param minimumNrMatchers The positive minimum number of subscorers that should - * match to match this query. - *
    When minimumNrMatchers is bigger than - * the number of subScorers, - * no matches will be produced. - *
    When minimumNrMatchers equals the number of subScorers, - * it more efficient to use ConjunctionScorer. + * @param weight The weight to be used. + * @param subScorers Array of at least two subscorers. + * @param coord Table of coordination factors */ - public DisjunctionSumScorer( List subScorers, int minimumNrMatchers) { - super(null); - - nrScorers = subScorers.size(); - - if (minimumNrMatchers <= 0) { - throw new IllegalArgumentException("Minimum nr of matchers must be positive"); - } - if (nrScorers <= 1) { - throw new IllegalArgumentException("There must be at least 2 subScorers"); - } - - this.minimumNrMatchers = minimumNrMatchers; - this.subScorers = subScorers; + DisjunctionSumScorer(Weight weight, Scorer[] subScorers, float[] coord) { + super(weight, subScorers); + this.coord = coord; } - /** Construct a DisjunctionScorer, using one as the minimum number - * of matching subscorers. - */ - public DisjunctionSumScorer(List subScorers) { - this(subScorers, 1); + @Override + protected void reset() { + score = 0; } - - /** Called the first time next() or skipTo() is called to - * initialize scorerDocQueue. - */ - private void initScorerDocQueue() throws IOException { - Iterator si = subScorers.iterator(); - scorerDocQueue = new ScorerDocQueue(nrScorers); - queueSize = 0; - while (si.hasNext()) { - Scorer se = (Scorer) si.next(); - if (se.next()) { // doc() method will be used in scorerDocQueue. - if (scorerDocQueue.insert(se)) { - queueSize++; - } - } - } + + @Override + protected void accum(Scorer subScorer) throws IOException { + score += subScorer.score(); } - - /** Scores and collects all matching documents. - * @param hc The collector to which all matching documents are passed through - * {@link HitCollector#collect(int, float)}. - *
    When this method is used the {@link #explain(int)} method should not be used. - */ - public void score(HitCollector hc) throws IOException { - while (next()) { - hc.collect(currentDoc, currentScore); - } - } - - /** Expert: Collects matching documents in a range. Hook for optimization. - * Note that {@link #next()} must be called once before this method is called - * for the first time. - * @param hc The collector to which all matching documents are passed through - * {@link HitCollector#collect(int, float)}. - * @param max Do not score documents past this. - * @return true if more matching documents may remain. - */ - protected boolean score(HitCollector hc, int max) throws IOException { - while (currentDoc < max) { - hc.collect(currentDoc, currentScore); - if (!next()) { - return false; - } - } - return true; - } - - public boolean next() throws IOException { - if (scorerDocQueue == null) { - initScorerDocQueue(); - } - return (scorerDocQueue.size() >= minimumNrMatchers) - && advanceAfterCurrent(); - } - - - /** Advance all subscorers after the current document determined by the - * top of the scorerDocQueue. - * Repeat until at least the minimum number of subscorers match on the same - * document and all subscorers are after that document or are exhausted. - *
    On entry the scorerDocQueue has at least minimumNrMatchers - * available. At least the scorer with the minimum document number will be advanced. - * @return true iff there is a match. - *
    In case there is a match, currentDoc, currentSumScore, - * and nrMatchers describe the match. - * - * @todo Investigate whether it is possible to use skipTo() when - * the minimum number of matchers is bigger than one, ie. try and use the - * character of ConjunctionScorer for the minimum number of matchers. - * Also delay calling score() on the sub scorers until the minimum number of - * matchers is reached. - *
    For this, a Scorer array with minimumNrMatchers elements might - * hold Scorers at currentDoc that are temporarily popped from scorerQueue. - */ - protected boolean advanceAfterCurrent() throws IOException { - do { // repeat until minimum nr of matchers - currentDoc = scorerDocQueue.topDoc(); - currentScore = scorerDocQueue.topScore(); - nrMatchers = 1; - do { // Until all subscorers are after currentDoc - if (! scorerDocQueue.topNextAndAdjustElsePop()) { - if (--queueSize == 0) { - break; // nothing more to advance, check for last match. - } - } - if (scorerDocQueue.topDoc() != currentDoc) { - break; // All remaining subscorers are after currentDoc. - } - currentScore += scorerDocQueue.topScore(); - nrMatchers++; - } while (true); - - if (nrMatchers >= minimumNrMatchers) { - return true; - } else if (queueSize < minimumNrMatchers) { - return false; - } - } while (true); - } - /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #next()} is called the first time. - */ - public float score() throws IOException { return currentScore; } - - public int doc() { return currentDoc; } - - /** Returns the number of subscorers matching the current document. - * Initially invalid, until {@link #next()} is called the first time. - */ - public int nrMatchers() { - return nrMatchers; + @Override + protected float getFinal() { + return (float)score * coord[freq]; } - - /** Skips to the first match beyond the current whose document number is - * greater than or equal to a given target. - *
    When this method is used the {@link #explain(int)} method should not be used. - *
    The implementation uses the skipTo() method on the subscorers. - * @param target The target document number. - * @return true iff there is such a match. - */ - public boolean skipTo(int target) throws IOException { - if (scorerDocQueue == null) { - initScorerDocQueue(); - } - if (queueSize < minimumNrMatchers) { - return false; - } - if (target <= currentDoc) { - return true; - } - do { - if (scorerDocQueue.topDoc() >= target) { - return advanceAfterCurrent(); - } else if (! scorerDocQueue.topSkipToAndAdjustElsePop(target)) { - if (--queueSize < minimumNrMatchers) { - return false; - } - } - } while (true); - } - - /** @return An explanation for the score of a given document. */ - public Explanation explain(int doc) throws IOException { - Explanation res = new Explanation(); - Iterator ssi = subScorers.iterator(); - float sumScore = 0.0f; - int nrMatches = 0; - while (ssi.hasNext()) { - Explanation es = ((Scorer) ssi.next()).explain(doc); - if (es.getValue() > 0.0f) { // indicates match - sumScore += es.getValue(); - nrMatches++; - } - res.addDetail(es); - } - if (nrMatchers >= minimumNrMatchers) { - res.setValue(sumScore); - res.setDescription("sum over at least " + minimumNrMatchers - + " of " + subScorers.size() + ":"); - } else { - res.setValue(0.0f); - res.setDescription(nrMatches + " match(es) but at least " - + minimumNrMatchers + " of " - + subScorers.size() + " needed"); - } - return res; - } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/DocIdSet.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/DocIdSet.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/DocIdSet.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/DocIdSet.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,11 +17,84 @@ * limitations under the License. */ +import java.io.IOException; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; + /** - * A DocIdSet contains a set of doc ids. Implementing classes must provide - * a {@link DocIdSetIterator} to access the set. + * A DocIdSet contains a set of doc ids. Implementing classes must + * only implement {@link #iterator} to provide access to the set. */ -public abstract class DocIdSet { - public abstract DocIdSetIterator iterator(); +public abstract class DocIdSet implements Accountable { + + /** An empty {@code DocIdSet} instance */ + public static final DocIdSet EMPTY = new DocIdSet() { + + @Override + public DocIdSetIterator iterator() { + return DocIdSetIterator.empty(); + } + + @Override + public boolean isCacheable() { + return true; + } + + // we explicitly provide no random access, as this filter is 100% sparse and iterator exits faster + @Override + public Bits bits() { + return null; + } + + @Override + public long ramBytesUsed() { + return 0L; + } + }; + + /** Provides a {@link DocIdSetIterator} to access the set. + * This implementation can return null if there + * are no docs that match. */ + public abstract DocIdSetIterator iterator() throws IOException; + + // TODO: somehow this class should express the cost of + // iteration vs the cost of random access Bits; for + // expensive Filters (e.g. distance < 1 km) we should use + // bits() after all other Query/Filters have matched, but + // this is the opposite of what bits() is for now + // (down-low filtering using e.g. FixedBitSet) + + /** Optionally provides a {@link Bits} interface for random access + * to matching documents. + * @return {@code null}, if this {@code DocIdSet} does not support random access. + * In contrast to {@link #iterator()}, a return value of {@code null} + * does not imply that no documents match the filter! + * The default implementation does not provide random access, so you + * only need to implement this method if your DocIdSet can + * guarantee random access to every docid in O(1) time without + * external disk access (as {@link Bits} interface cannot throw + * {@link IOException}). This is generally true for bit sets + * like {@link org.apache.lucene.util.FixedBitSet}, which return + * itself if they are used as {@code DocIdSet}. + */ + public Bits bits() throws IOException { + return null; + } + + /** + * This method is a hint for {@link CachingWrapperFilter}, if this DocIdSet + * should be cached without copying it. The default is to return + * false. If you have an own DocIdSet implementation + * that does its iteration very effective and fast without doing disk I/O, + * override this method and return true. + */ + public boolean isCacheable() { + return false; + } + + @Override + public long ramBytesUsed() { + throw new UnsupportedOperationException(); + } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/DocIdSetIterator.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/DocIdSetIterator.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/DocIdSetIterator.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/DocIdSetIterator.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,30 +20,128 @@ import java.io.IOException; /** - * This abstract class defines methods to iterate over a set of - * non-decreasing doc ids. + * This abstract class defines methods to iterate over a set of non-decreasing + * doc ids. Note that this class assumes it iterates on doc Ids, and therefore + * {@link #NO_MORE_DOCS} is set to {@value #NO_MORE_DOCS} in order to be used as + * a sentinel object. Implementations of this class are expected to consider + * {@link Integer#MAX_VALUE} as an invalid value. */ public abstract class DocIdSetIterator { - /** Returns the current document number.

    This is invalid until {@link - #next()} is called for the first time.*/ - public abstract int doc(); - - /** Moves to the next docId in the set. Returns true, iff - * there is such a docId. */ - public abstract boolean next() throws IOException; - - /** Skips entries to the first beyond the current whose document number is - * greater than or equal to target.

    Returns true iff there is such - * an entry.

    Behaves as if written:

    -     *   boolean skipTo(int target) {
    -     *     do {
    -     *       if (!next())
    -     *         return false;
    -     *     } while (target > doc());
    -     *     return true;
    -     *   }
    -     * 
    - * Some implementations are considerably more efficient than that. - */ - public abstract boolean skipTo(int target) throws IOException; + + /** An empty {@code DocIdSetIterator} instance */ + public static final DocIdSetIterator empty() { + return new DocIdSetIterator() { + boolean exhausted = false; + + @Override + public int advance(int target) { + assert !exhausted; + assert target >= 0; + exhausted = true; + return NO_MORE_DOCS; + } + + @Override + public int docID() { + return exhausted ? NO_MORE_DOCS : -1; + } + @Override + public int nextDoc() { + assert !exhausted; + exhausted = true; + return NO_MORE_DOCS; + } + + @Override + public long cost() { + return 0; + } + }; + } + + /** + * When returned by {@link #nextDoc()}, {@link #advance(int)} and + * {@link #docID()} it means there are no more docs in the iterator. + */ + public static final int NO_MORE_DOCS = Integer.MAX_VALUE; + + /** + * Returns the following: + *
      + *
    • -1 or {@link #NO_MORE_DOCS} if {@link #nextDoc()} or + * {@link #advance(int)} were not called yet. + *
    • {@link #NO_MORE_DOCS} if the iterator has exhausted. + *
    • Otherwise it should return the doc ID it is currently on. + *
    + *

    + * + * @since 2.9 + */ + public abstract int docID(); + + /** + * Advances to the next document in the set and returns the doc it is + * currently on, or {@link #NO_MORE_DOCS} if there are no more docs in the + * set.
    + * + * NOTE: after the iterator has exhausted you should not call this + * method, as it may result in unpredicted behavior. + * + * @since 2.9 + */ + public abstract int nextDoc() throws IOException; + + /** + * Advances to the first beyond the current whose document number is greater + * than or equal to target, and returns the document number itself. + * Exhausts the iterator and returns {@link #NO_MORE_DOCS} if target + * is greater than the highest document number in the set. + *

    + * The behavior of this method is undefined when called with + * target ≤ current, or after the iterator has exhausted. + * Both cases may result in unpredicted behavior. + *

    + * When target > current it behaves as if written: + * + *

    +   * int advance(int target) {
    +   *   int doc;
    +   *   while ((doc = nextDoc()) < target) {
    +   *   }
    +   *   return doc;
    +   * }
    +   * 
    + * + * Some implementations are considerably more efficient than that. + *

    + * NOTE: this method may be called with {@link #NO_MORE_DOCS} for + * efficiency by some Scorers. If your implementation cannot efficiently + * determine that it should exhaust, it is recommended that you check for that + * value in each call to this method. + *

    + * + * @since 2.9 + */ + public abstract int advance(int target) throws IOException; + + /** Slow (linear) implementation of {@link #advance} relying on + * {@link #nextDoc()} to advance beyond the target position. */ + protected final int slowAdvance(int target) throws IOException { + assert docID() == NO_MORE_DOCS // can happen when the enum is not positioned yet + || docID() < target; + int doc; + do { + doc = nextDoc(); + } while (doc < target); + return doc; + } + + /** + * Returns the estimated cost of this {@link DocIdSetIterator}. + *

    + * This is generally an upper bound of the number of documents this iterator + * might match, but may be a rough heuristic, hardcoded value, or otherwise + * completely inaccurate. + */ + public abstract long cost(); } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/DocTermOrdsRangeFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/DocTermOrdsRewriteMethod.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/ExactPhraseScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ExactPhraseScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ExactPhraseScorer.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ExactPhraseScorer.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,38 +18,242 @@ */ import java.io.IOException; +import java.util.Arrays; + import org.apache.lucene.index.*; +import org.apache.lucene.search.similarities.Similarity; -final class ExactPhraseScorer extends PhraseScorer { +final class ExactPhraseScorer extends Scorer { + private final int endMinus1; - ExactPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, - byte[] norms) { - super(weight, tps, offsets, similarity, norms); + private final static int CHUNK = 4096; + + private int gen; + private final int[] counts = new int[CHUNK]; + private final int[] gens = new int[CHUNK]; + + private final long cost; + + private final static class ChunkState { + final DocsAndPositionsEnum posEnum; + final int offset; + int posUpto; + int posLimit; + int pos; + int lastPos; + + public ChunkState(DocsAndPositionsEnum posEnum, int offset) { + this.posEnum = posEnum; + this.offset = offset; + } } - protected final float phraseFreq() throws IOException { - // sort list with pq - pq.clear(); - for (PhrasePositions pp = first; pp != null; pp = pp.next) { - pp.firstPosition(); - pq.put(pp); // build pq from list + private final ChunkState[] chunkStates; + private final DocsAndPositionsEnum lead; + + private int docID = -1; + private int freq; + + private final Similarity.SimScorer docScorer; + + ExactPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + Similarity.SimScorer docScorer) throws IOException { + super(weight); + this.docScorer = docScorer; + + chunkStates = new ChunkState[postings.length]; + + endMinus1 = postings.length-1; + + lead = postings[0].postings; + // min(cost) + cost = lead.cost(); + + for(int i=0;i doc) { + // DocsEnum beyond the current doc - break and advance lead to the new highest doc. + doc = d; + break advanceHead; + } + } + } + // all DocsEnums are on the same doc + if (doc == NO_MORE_DOCS) { + return doc; + } else if (phraseFreq() > 0) { + return doc; // success: matches phrase + } else { + doc = lead.nextDoc(); // doesn't match phrase + } } - freq++; // all equal: a match - } while (last.nextPosition()); - - return (float)freq; + // advance head for next iteration + doc = lead.advance(doc); + } } + + @Override + public int nextDoc() throws IOException { + return docID = doNext(lead.nextDoc()); + } + + @Override + public int advance(int target) throws IOException { + return docID = doNext(lead.advance(target)); + } + + @Override + public String toString() { + return "ExactPhraseScorer(" + weight + ")"; + } + + @Override + public int freq() { + return freq; + } + + @Override + public int docID() { + return docID; + } + + @Override + public float score() { + return docScorer.score(docID, freq); + } + + private int phraseFreq() throws IOException { + + freq = 0; + + // init chunks + for(int i=0;i cs.lastPos) { + cs.lastPos = cs.pos; + final int posIndex = cs.pos - chunkStart; + counts[posIndex] = 1; + assert gens[posIndex] != gen; + gens[posIndex] = gen; + } + + if (cs.posUpto == cs.posLimit) { + end = true; + break; + } + cs.posUpto++; + cs.pos = cs.offset + cs.posEnum.nextPosition(); + } + } + + // middle terms + boolean any = true; + for(int t=1;t cs.lastPos) { + cs.lastPos = cs.pos; + final int posIndex = cs.pos - chunkStart; + if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == t) { + // viable + counts[posIndex]++; + any = true; + } + } + + if (cs.posUpto == cs.posLimit) { + end = true; + break; + } + cs.posUpto++; + cs.pos = cs.offset + cs.posEnum.nextPosition(); + } + + if (!any) { + break; + } + } + + if (!any) { + // petered out for this chunk + chunkStart += CHUNK; + chunkEnd += CHUNK; + continue; + } + + // last term + + { + final ChunkState cs = chunkStates[endMinus1]; + while(cs.pos < chunkEnd) { + if (cs.pos > cs.lastPos) { + cs.lastPos = cs.pos; + final int posIndex = cs.pos - chunkStart; + if (posIndex >= 0 && gens[posIndex] == gen && counts[posIndex] == endMinus1) { + freq++; + } + } + + if (cs.posUpto == cs.posLimit) { + end = true; + break; + } + cs.posUpto++; + cs.pos = cs.offset + cs.posEnum.nextPosition(); + } + } + + chunkStart += CHUNK; + chunkEnd += CHUNK; + } + + return freq; + } + + @Override + public long cost() { + return cost; + } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/Explanation.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/Explanation.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/Explanation.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/Explanation.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,10 +20,10 @@ import java.util.ArrayList; /** Expert: Describes the score computation for document and query. */ -public class Explanation implements java.io.Serializable { +public class Explanation { private float value; // the value of this node private String description; // what it represents - private ArrayList details; // sub-explanations + private ArrayList details; // sub-explanations public Explanation() {} @@ -70,22 +70,23 @@ public Explanation[] getDetails() { if (details == null) return null; - return (Explanation[])details.toArray(new Explanation[0]); + return details.toArray(new Explanation[0]); } /** Adds a sub-node to this explanation node. */ public void addDetail(Explanation detail) { if (details == null) - details = new ArrayList(); + details = new ArrayList<>(); details.add(detail); } /** Render an explanation as text. */ + @Override public String toString() { return toString(0); } protected String toString(int depth) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); for (int i = 0; i < depth; i++) { buffer.append(" "); } @@ -105,7 +106,7 @@ /** Render an explanation as HTML. */ public String toHtml() { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("

      \n"); buffer.append("
    • "); Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ExtendedFieldCache.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ExtendedFieldCacheImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FakeScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/FieldCache.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/FieldCache.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/FieldCache.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/FieldCache.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,87 +17,446 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; import java.io.IOException; +import java.io.PrintStream; +import org.apache.lucene.analysis.NumericTokenStream; +import org.apache.lucene.document.DoubleField; +import org.apache.lucene.document.FloatField; +import org.apache.lucene.document.IntField; +import org.apache.lucene.document.LongField; +import org.apache.lucene.document.NumericDocValuesField; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.index.IndexReader; // javadocs +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.NumericUtils; +import org.apache.lucene.util.RamUsageEstimator; + /** * Expert: Maintains caches of term values. * *

      Created: May 19, 2004 11:13:14 AM * * @since lucene 1.4 - * @version $Id$ + * @see org.apache.lucene.util.FieldCacheSanityChecker + * + * @lucene.internal */ public interface FieldCache { - /** Indicator for StringIndex values in the cache. */ - // NOTE: the value assigned to this constant must not be - // the same as any of those in SortField!! - public static final int STRING_INDEX = -1; + /** Field values as 8-bit signed bytes */ + public static abstract class Bytes { + /** Return a single Byte representation of this field's value. */ + public abstract byte get(int docID); + + /** Zero value for every document */ + public static final Bytes EMPTY = new Bytes() { + @Override + public byte get(int docID) { + return 0; + } + }; + } + /** Field values as 16-bit signed shorts */ + public static abstract class Shorts { + /** Return a short representation of this field's value. */ + public abstract short get(int docID); + + /** Zero value for every document */ + public static final Shorts EMPTY = new Shorts() { + @Override + public short get(int docID) { + return 0; + } + }; + } - /** Expert: Stores term text values and document ordering data. */ - public static class StringIndex { + /** Field values as 32-bit signed integers */ + public static abstract class Ints { + /** Return an integer representation of this field's value. */ + public abstract int get(int docID); + + /** Zero value for every document */ + public static final Ints EMPTY = new Ints() { + @Override + public int get(int docID) { + return 0; + } + }; + } - /** All the term values, in natural order. */ - public final String[] lookup; + /** Field values as 64-bit signed long integers */ + public static abstract class Longs { + /** Return an long representation of this field's value. */ + public abstract long get(int docID); + + /** Zero value for every document */ + public static final Longs EMPTY = new Longs() { + @Override + public long get(int docID) { + return 0; + } + }; + } - /** For each document, an index into the lookup array. */ - public final int[] order; + /** Field values as 32-bit floats */ + public static abstract class Floats { + /** Return an float representation of this field's value. */ + public abstract float get(int docID); + + /** Zero value for every document */ + public static final Floats EMPTY = new Floats() { + @Override + public float get(int docID) { + return 0; + } + }; + } - /** Creates one of these objects */ - public StringIndex (int[] values, String[] lookup) { - this.order = values; - this.lookup = lookup; + /** Field values as 64-bit doubles */ + public static abstract class Doubles { + /** Return an double representation of this field's value. */ + public abstract double get(int docID); + + /** Zero value for every document */ + public static final Doubles EMPTY = new Doubles() { + @Override + public double get(int docID) { + return 0; + } + }; + } + + /** + * Placeholder indicating creation of this cache is currently in-progress. + */ + public static final class CreationPlaceholder implements Accountable { + Accountable value; + + @Override + public long ramBytesUsed() { + // don't call on the in-progress value, might make things angry. + return RamUsageEstimator.NUM_BYTES_OBJECT_REF; } } + /** + * Marker interface as super-interface to all parsers. It + * is used to specify a custom parser to {@link + * SortField#SortField(String, FieldCache.Parser)}. + */ + public interface Parser { + + /** + * Pulls a {@link TermsEnum} from the given {@link Terms}. This method allows certain parsers + * to filter the actual TermsEnum before the field cache is filled. + * + * @param terms the {@link Terms} instance to create the {@link TermsEnum} from. + * @return a possibly filtered {@link TermsEnum} instance, this method must not return null. + * @throws IOException if an {@link IOException} occurs + */ + public TermsEnum termsEnum(Terms terms) throws IOException; + } + /** Interface to parse bytes from document fields. - * @see FieldCache#getBytes(IndexReader, String, FieldCache.ByteParser) + * @see FieldCache#getBytes(AtomicReader, String, FieldCache.ByteParser, boolean) */ - public interface ByteParser { + @Deprecated + public interface ByteParser extends Parser { /** Return a single Byte representation of this field's value. */ - public byte parseByte(String string); + public byte parseByte(BytesRef term); } /** Interface to parse shorts from document fields. - * @see FieldCache#getShorts(IndexReader, String, FieldCache.ShortParser) + * @see FieldCache#getShorts(AtomicReader, String, FieldCache.ShortParser, boolean) */ - public interface ShortParser { + @Deprecated + public interface ShortParser extends Parser { /** Return a short representation of this field's value. */ - public short parseShort(String string); + public short parseShort(BytesRef term); } /** Interface to parse ints from document fields. - * @see FieldCache#getInts(IndexReader, String, FieldCache.IntParser) + * @see FieldCache#getInts(AtomicReader, String, FieldCache.IntParser, boolean) */ - public interface IntParser { + public interface IntParser extends Parser { /** Return an integer representation of this field's value. */ - public int parseInt(String string); + public int parseInt(BytesRef term); } /** Interface to parse floats from document fields. - * @see FieldCache#getFloats(IndexReader, String, FieldCache.FloatParser) + * @see FieldCache#getFloats(AtomicReader, String, FieldCache.FloatParser, boolean) */ - public interface FloatParser { + public interface FloatParser extends Parser { /** Return an float representation of this field's value. */ - public float parseFloat(String string); + public float parseFloat(BytesRef term); } + /** Interface to parse long from document fields. + * @see FieldCache#getLongs(AtomicReader, String, FieldCache.LongParser, boolean) + */ + public interface LongParser extends Parser { + /** Return an long representation of this field's value. */ + public long parseLong(BytesRef term); + } + + /** Interface to parse doubles from document fields. + * @see FieldCache#getDoubles(AtomicReader, String, FieldCache.DoubleParser, boolean) + */ + public interface DoubleParser extends Parser { + /** Return an double representation of this field's value. */ + public double parseDouble(BytesRef term); + } + /** Expert: The cache used internally by sorting and range query classes. */ public static FieldCache DEFAULT = new FieldCacheImpl(); + /** The default parser for byte values, which are encoded by {@link Byte#toString(byte)} */ + @Deprecated + public static final ByteParser DEFAULT_BYTE_PARSER = new ByteParser() { + @Override + public byte parseByte(BytesRef term) { + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // IntField, instead, which already decodes + // directly from byte[] + return Byte.parseByte(term.utf8ToString()); + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_BYTE_PARSER"; + } + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + }; + + /** The default parser for short values, which are encoded by {@link Short#toString(short)} */ + @Deprecated + public static final ShortParser DEFAULT_SHORT_PARSER = new ShortParser() { + @Override + public short parseShort(BytesRef term) { + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // IntField, instead, which already decodes + // directly from byte[] + return Short.parseShort(term.utf8ToString()); + } + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_SHORT_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + }; + + /** The default parser for int values, which are encoded by {@link Integer#toString(int)} */ + @Deprecated + public static final IntParser DEFAULT_INT_PARSER = new IntParser() { + @Override + public int parseInt(BytesRef term) { + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // IntField, instead, which already decodes + // directly from byte[] + return Integer.parseInt(term.utf8ToString()); + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_INT_PARSER"; + } + }; + + /** The default parser for float values, which are encoded by {@link Float#toString(float)} */ + @Deprecated + public static final FloatParser DEFAULT_FLOAT_PARSER = new FloatParser() { + @Override + public float parseFloat(BytesRef term) { + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // FloatField, instead, which already decodes + // directly from byte[] + return Float.parseFloat(term.utf8ToString()); + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_FLOAT_PARSER"; + } + }; + + /** The default parser for long values, which are encoded by {@link Long#toString(long)} */ + @Deprecated + public static final LongParser DEFAULT_LONG_PARSER = new LongParser() { + @Override + public long parseLong(BytesRef term) { + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // LongField, instead, which already decodes + // directly from byte[] + return Long.parseLong(term.utf8ToString()); + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_LONG_PARSER"; + } + }; + + /** The default parser for double values, which are encoded by {@link Double#toString(double)} */ + @Deprecated + public static final DoubleParser DEFAULT_DOUBLE_PARSER = new DoubleParser() { + @Override + public double parseDouble(BytesRef term) { + // TODO: would be far better to directly parse from + // UTF8 bytes... but really users should use + // DoubleField, instead, which already decodes + // directly from byte[] + return Double.parseDouble(term.utf8ToString()); + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return terms.iterator(null); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".DEFAULT_DOUBLE_PARSER"; + } + }; + + /** + * A parser instance for int values encoded by {@link NumericUtils}, e.g. when indexed + * via {@link IntField}/{@link NumericTokenStream}. + */ + public static final IntParser NUMERIC_UTILS_INT_PARSER=new IntParser(){ + @Override + public int parseInt(BytesRef term) { + return NumericUtils.prefixCodedToInt(term); + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedInts(terms.iterator(null)); + } + + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_INT_PARSER"; + } + }; + + /** + * A parser instance for float values encoded with {@link NumericUtils}, e.g. when indexed + * via {@link FloatField}/{@link NumericTokenStream}. + */ + public static final FloatParser NUMERIC_UTILS_FLOAT_PARSER=new FloatParser(){ + @Override + public float parseFloat(BytesRef term) { + return NumericUtils.sortableIntToFloat(NumericUtils.prefixCodedToInt(term)); + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_FLOAT_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedInts(terms.iterator(null)); + } + }; + + /** + * A parser instance for long values encoded by {@link NumericUtils}, e.g. when indexed + * via {@link LongField}/{@link NumericTokenStream}. + */ + public static final LongParser NUMERIC_UTILS_LONG_PARSER = new LongParser(){ + @Override + public long parseLong(BytesRef term) { + return NumericUtils.prefixCodedToLong(term); + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_LONG_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedLongs(terms.iterator(null)); + } + }; + + /** + * A parser instance for double values encoded with {@link NumericUtils}, e.g. when indexed + * via {@link DoubleField}/{@link NumericTokenStream}. + */ + public static final DoubleParser NUMERIC_UTILS_DOUBLE_PARSER = new DoubleParser(){ + @Override + public double parseDouble(BytesRef term) { + return NumericUtils.sortableLongToDouble(NumericUtils.prefixCodedToLong(term)); + } + @Override + public String toString() { + return FieldCache.class.getName()+".NUMERIC_UTILS_DOUBLE_PARSER"; + } + + @Override + public TermsEnum termsEnum(Terms terms) throws IOException { + return NumericUtils.filterPrefixCodedLongs(terms.iterator(null)); + } + }; + + /** Checks the internal cache for an appropriate entry, and if none is found, + * reads the terms in field and returns a bit set at the size of + * reader.maxDoc(), with turned on bits for each docid that + * does have a value for this field. + */ + public Bits getDocsWithField(AtomicReader reader, String field) throws IOException; + /** Checks the internal cache for an appropriate entry, and if none is * found, reads the terms in field as a single byte and returns an array * of size reader.maxDoc() of the value each document * has in the given field. * @param reader Used to get field values. * @param field Which field contains the single byte values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. * @return The values in the given field for each document. * @throws IOException If any error occurs. + * @deprecated (4.4) Index as a numeric field using {@link IntField} and then use {@link #getInts(AtomicReader, String, boolean)} instead. */ - public byte[] getBytes (IndexReader reader, String field) - throws IOException; + @Deprecated + public Bytes getBytes(AtomicReader reader, String field, boolean setDocsWithField) throws IOException; /** Checks the internal cache for an appropriate entry, and if none is found, * reads the terms in field as bytes and returns an array of @@ -106,23 +465,29 @@ * @param reader Used to get field values. * @param field Which field contains the bytes. * @param parser Computes byte for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. * @return The values in the given field for each document. * @throws IOException If any error occurs. + * @deprecated (4.4) Index as a numeric field using {@link IntField} and then use {@link #getInts(AtomicReader, String, boolean)} instead. */ - public byte[] getBytes (IndexReader reader, String field, ByteParser parser) - throws IOException; + @Deprecated + public Bytes getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField) throws IOException; /** Checks the internal cache for an appropriate entry, and if none is * found, reads the terms in field as shorts and returns an array * of size reader.maxDoc() of the value each document * has in the given field. * @param reader Used to get field values. * @param field Which field contains the shorts. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. * @return The values in the given field for each document. * @throws IOException If any error occurs. + * @deprecated (4.4) Index as a numeric field using {@link IntField} and then use {@link #getInts(AtomicReader, String, boolean)} instead. */ - public short[] getShorts (IndexReader reader, String field) - throws IOException; + @Deprecated + public Shorts getShorts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException; /** Checks the internal cache for an appropriate entry, and if none is found, * reads the terms in field as shorts and returns an array of @@ -131,112 +496,318 @@ * @param reader Used to get field values. * @param field Which field contains the shorts. * @param parser Computes short for string values. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. * @return The values in the given field for each document. * @throws IOException If any error occurs. + * @deprecated (4.4) Index as a numeric field using {@link IntField} and then use {@link #getInts(AtomicReader, String, boolean)} instead. */ - public short[] getShorts (IndexReader reader, String field, ShortParser parser) - throws IOException; + @Deprecated + public Shorts getShorts (AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField) throws IOException; + + /** + * Returns an {@link Ints} over the values found in documents in the given + * field. + * + * @see #getInts(AtomicReader, String, IntParser, boolean) + */ + public Ints getInts(AtomicReader reader, String field, boolean setDocsWithField) throws IOException; - /** Checks the internal cache for an appropriate entry, and if none is - * found, reads the terms in field as integers and returns an array - * of size reader.maxDoc() of the value each document + /** + * Returns an {@link Ints} over the values found in documents in the given + * field. If the field was indexed as {@link NumericDocValuesField}, it simply + * uses {@link AtomicReader#getNumericDocValues(String)} to read the values. + * Otherwise, it checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as ints and returns + * an array of size reader.maxDoc() of the value each document * has in the given field. - * @param reader Used to get field values. - * @param field Which field contains the integers. + * + * @param reader + * Used to get field values. + * @param field + * Which field contains the longs. + * @param parser + * Computes int for string values. May be {@code null} if the + * requested field was indexed as {@link NumericDocValuesField} or + * {@link IntField}. + * @param setDocsWithField + * If true then {@link #getDocsWithField} will also be computed and + * stored in the FieldCache. * @return The values in the given field for each document. - * @throws IOException If any error occurs. + * @throws IOException + * If any error occurs. */ - public int[] getInts (IndexReader reader, String field) - throws IOException; + public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField) throws IOException; - /** Checks the internal cache for an appropriate entry, and if none is found, - * reads the terms in field as integers and returns an array of - * size reader.maxDoc() of the value each document has in the - * given field. - * @param reader Used to get field values. - * @param field Which field contains the integers. - * @param parser Computes integer for string values. + /** + * Returns a {@link Floats} over the values found in documents in the given + * field. + * + * @see #getFloats(AtomicReader, String, FloatParser, boolean) + */ + public Floats getFloats(AtomicReader reader, String field, boolean setDocsWithField) throws IOException; + + /** + * Returns a {@link Floats} over the values found in documents in the given + * field. If the field was indexed as {@link NumericDocValuesField}, it simply + * uses {@link AtomicReader#getNumericDocValues(String)} to read the values. + * Otherwise, it checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as floats and returns + * an array of size reader.maxDoc() of the value each document + * has in the given field. + * + * @param reader + * Used to get field values. + * @param field + * Which field contains the floats. + * @param parser + * Computes float for string values. May be {@code null} if the + * requested field was indexed as {@link NumericDocValuesField} or + * {@link FloatField}. + * @param setDocsWithField + * If true then {@link #getDocsWithField} will also be computed and + * stored in the FieldCache. * @return The values in the given field for each document. - * @throws IOException If any error occurs. + * @throws IOException + * If any error occurs. */ - public int[] getInts (IndexReader reader, String field, IntParser parser) - throws IOException; + public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField) throws IOException; - /** Checks the internal cache for an appropriate entry, and if - * none is found, reads the terms in field as floats and returns an array - * of size reader.maxDoc() of the value each document + /** + * Returns a {@link Longs} over the values found in documents in the given + * field. + * + * @see #getLongs(AtomicReader, String, LongParser, boolean) + */ + public Longs getLongs(AtomicReader reader, String field, boolean setDocsWithField) throws IOException; + + /** + * Returns a {@link Longs} over the values found in documents in the given + * field. If the field was indexed as {@link NumericDocValuesField}, it simply + * uses {@link AtomicReader#getNumericDocValues(String)} to read the values. + * Otherwise, it checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as longs and returns + * an array of size reader.maxDoc() of the value each document * has in the given field. - * @param reader Used to get field values. - * @param field Which field contains the floats. + * + * @param reader + * Used to get field values. + * @param field + * Which field contains the longs. + * @param parser + * Computes long for string values. May be {@code null} if the + * requested field was indexed as {@link NumericDocValuesField} or + * {@link LongField}. + * @param setDocsWithField + * If true then {@link #getDocsWithField} will also be computed and + * stored in the FieldCache. * @return The values in the given field for each document. - * @throws IOException If any error occurs. + * @throws IOException + * If any error occurs. */ - public float[] getFloats (IndexReader reader, String field) - throws IOException; + public Longs getLongs(AtomicReader reader, String field, LongParser parser, boolean setDocsWithField) throws IOException; - /** Checks the internal cache for an appropriate entry, and if - * none is found, reads the terms in field as floats and returns an array - * of size reader.maxDoc() of the value each document + /** + * Returns a {@link Doubles} over the values found in documents in the given + * field. + * + * @see #getDoubles(AtomicReader, String, DoubleParser, boolean) + */ + public Doubles getDoubles(AtomicReader reader, String field, boolean setDocsWithField) throws IOException; + + /** + * Returns a {@link Doubles} over the values found in documents in the given + * field. If the field was indexed as {@link NumericDocValuesField}, it simply + * uses {@link AtomicReader#getNumericDocValues(String)} to read the values. + * Otherwise, it checks the internal cache for an appropriate entry, and if + * none is found, reads the terms in field as doubles and returns + * an array of size reader.maxDoc() of the value each document * has in the given field. - * @param reader Used to get field values. - * @param field Which field contains the floats. - * @param parser Computes float for string values. + * + * @param reader + * Used to get field values. + * @param field + * Which field contains the longs. + * @param parser + * Computes double for string values. May be {@code null} if the + * requested field was indexed as {@link NumericDocValuesField} or + * {@link DoubleField}. + * @param setDocsWithField + * If true then {@link #getDocsWithField} will also be computed and + * stored in the FieldCache. * @return The values in the given field for each document. - * @throws IOException If any error occurs. + * @throws IOException + * If any error occurs. */ - public float[] getFloats (IndexReader reader, String field, - FloatParser parser) throws IOException; + public Doubles getDoubles(AtomicReader reader, String field, DoubleParser parser, boolean setDocsWithField) throws IOException; /** Checks the internal cache for an appropriate entry, and if none - * is found, reads the term values in field and returns an array - * of size reader.maxDoc() containing the value each document - * has in the given field. + * is found, reads the term values in field + * and returns a {@link BinaryDocValues} instance, providing a + * method to retrieve the term (as a BytesRef) per document. * @param reader Used to get field values. * @param field Which field contains the strings. + * @param setDocsWithField If true then {@link #getDocsWithField} will + * also be computed and stored in the FieldCache. * @return The values in the given field for each document. * @throws IOException If any error occurs. */ - public String[] getStrings (IndexReader reader, String field) - throws IOException; + public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField) throws IOException; + /** Expert: just like {@link #getTerms(AtomicReader,String,boolean)}, + * but you can specify whether more RAM should be consumed in exchange for + * faster lookups (default is "true"). Note that the + * first call for a given reader and field "wins", + * subsequent calls will share the same cache entry. */ + public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException; + /** Checks the internal cache for an appropriate entry, and if none - * is found reads the term values in field and returns - * an array of them in natural order, along with an array telling - * which element in the term array each document uses. + * is found, reads the term values in field + * and returns a {@link SortedDocValues} instance, + * providing methods to retrieve sort ordinals and terms + * (as a ByteRef) per document. * @param reader Used to get field values. * @param field Which field contains the strings. - * @return Array of terms and index into the array for each document. + * @return The values in the given field for each document. * @throws IOException If any error occurs. */ - public StringIndex getStringIndex (IndexReader reader, String field) - throws IOException; + public SortedDocValues getTermsIndex(AtomicReader reader, String field) throws IOException; - /** Checks the internal cache for an appropriate entry, and if - * none is found reads field to see if it contains integers, floats - * or strings, and then calls one of the other methods in this class to get the - * values. For string values, a StringIndex is returned. After - * calling this method, there is an entry in the cache for both - * type AUTO and the actual found type. - * @param reader Used to get field values. - * @param field Which field contains the values. - * @return int[], float[] or StringIndex. + /** Expert: just like {@link + * #getTermsIndex(AtomicReader,String)}, but you can specify + * whether more RAM should be consumed in exchange for + * faster lookups (default is "true"). Note that the + * first call for a given reader and field "wins", + * subsequent calls will share the same cache entry. */ + public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException; + + /** + * Checks the internal cache for an appropriate entry, and if none is found, reads the term values + * in field and returns a {@link DocTermOrds} instance, providing a method to retrieve + * the terms (as ords) per document. + * + * @param reader Used to build a {@link DocTermOrds} instance + * @param field Which field contains the strings. + * @return a {@link DocTermOrds} instance * @throws IOException If any error occurs. */ - public Object getAuto (IndexReader reader, String field) - throws IOException; + public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException; - /** Checks the internal cache for an appropriate entry, and if none - * is found reads the terms out of field and calls the given SortComparator - * to get the sort values. A hit in the cache will happen if reader, - * field, and comparator are the same (using equals()) - * as a previous call to this method. - * @param reader Used to get field values. - * @param field Which field contains the values. - * @param comparator Used to convert terms into something to sort by. - * @return Array of sort objects, one for each document. - * @throws IOException If any error occurs. + /** + * EXPERT: A unique Identifier/Description for each item in the FieldCache. + * Can be useful for logging/debugging. + * @lucene.experimental */ - public Comparable[] getCustom (IndexReader reader, String field, SortComparator comparator) - throws IOException; + public final class CacheEntry { + + private final Object readerKey; + private final String fieldName; + private final Class cacheType; + private final Object custom; + private final Accountable value; + private String size; + + public CacheEntry(Object readerKey, String fieldName, + Class cacheType, + Object custom, + Accountable value) { + this.readerKey = readerKey; + this.fieldName = fieldName; + this.cacheType = cacheType; + this.custom = custom; + this.value = value; + } + + public Object getReaderKey() { + return readerKey; + } + + public String getFieldName() { + return fieldName; + } + + public Class getCacheType() { + return cacheType; + } + + public Object getCustom() { + return custom; + } + + public Object getValue() { + return value; + } + + /** + * The most recently estimated size of the value, null unless + * estimateSize has been called. + */ + public String getEstimatedSize() { + long bytesUsed = value == null ? 0 : value.ramBytesUsed(); + return RamUsageEstimator.humanReadableUnits(bytesUsed); + } + + @Override + public String toString() { + StringBuilder b = new StringBuilder(); + b.append("'").append(getReaderKey()).append("'=>"); + b.append("'").append(getFieldName()).append("',"); + b.append(getCacheType()).append(",").append(getCustom()); + b.append("=>").append(getValue().getClass().getName()).append("#"); + b.append(System.identityHashCode(getValue())); + + String s = getEstimatedSize(); + if(null != s) { + b.append(" (size =~ ").append(s).append(')'); + } + + return b.toString(); + } + } + /** + * EXPERT: Generates an array of CacheEntry objects representing all items + * currently in the FieldCache. + *

      + * NOTE: These CacheEntry objects maintain a strong reference to the + * Cached Values. Maintaining references to a CacheEntry the AtomicIndexReader + * associated with it has garbage collected will prevent the Value itself + * from being garbage collected when the Cache drops the WeakReference. + *

      + * @lucene.experimental + */ + public CacheEntry[] getCacheEntries(); + + /** + *

      + * EXPERT: Instructs the FieldCache to forcibly expunge all entries + * from the underlying caches. This is intended only to be used for + * test methods as a way to ensure a known base state of the Cache + * (with out needing to rely on GC to free WeakReferences). + * It should not be relied on for "Cache maintenance" in general + * application code. + *

      + * @lucene.experimental + */ + public void purgeAllCaches(); + + /** + * Expert: drops all cache entries associated with this + * reader {@link IndexReader#getCoreCacheKey}. NOTE: this cache key must + * precisely match the reader that the cache entry is + * keyed on. If you pass a top-level reader, it usually + * will have no effect as Lucene now caches at the segment + * reader level. + */ + public void purgeByCacheKey(Object coreCacheKey); + + /** + * If non-null, FieldCacheImpl will warn whenever + * entries are created that are not sane according to + * {@link org.apache.lucene.util.FieldCacheSanityChecker}. + */ + public void setInfoStream(PrintStream stream); + + /** counterpart of {@link #setInfoStream(PrintStream)} */ + public PrintStream getInfoStream(); } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheDocIdSet.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheImpl.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheImpl.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheImpl.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheImpl.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,44 +17,186 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.TermEnum; - import java.io.IOException; +import java.io.PrintStream; +import java.util.ArrayList; import java.util.HashMap; -import java.util.Locale; +import java.util.List; import java.util.Map; import java.util.WeakHashMap; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocTermOrds; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.SegmentReader; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Accountable; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.FieldCacheSanityChecker; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.PagedBytes; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.GrowableWriter; +import org.apache.lucene.util.packed.PackedInts; +import org.apache.lucene.util.packed.PackedLongValues; + /** * Expert: The default cache implementation, storing all values in memory. * A WeakHashMap is used for storage. * - *

      Created: May 19, 2004 4:40:36 PM - * * @since lucene 1.4 - * @version $Id$ */ -class FieldCacheImpl -implements FieldCache { - +class FieldCacheImpl implements FieldCache { + + private Map,Cache> caches; + FieldCacheImpl() { + init(); + } + + private synchronized void init() { + caches = new HashMap<>(9); + caches.put(Byte.TYPE, new ByteCache(this)); + caches.put(Short.TYPE, new ShortCache(this)); + caches.put(Integer.TYPE, new IntCache(this)); + caches.put(Float.TYPE, new FloatCache(this)); + caches.put(Long.TYPE, new LongCache(this)); + caches.put(Double.TYPE, new DoubleCache(this)); + caches.put(BinaryDocValues.class, new BinaryDocValuesCache(this)); + caches.put(SortedDocValues.class, new SortedDocValuesCache(this)); + caches.put(DocTermOrds.class, new DocTermOrdsCache(this)); + caches.put(DocsWithFieldCache.class, new DocsWithFieldCache(this)); + } + + @Override + public synchronized void purgeAllCaches() { + init(); + } + + @Override + public synchronized void purgeByCacheKey(Object coreCacheKey) { + for(Cache c : caches.values()) { + c.purgeByCacheKey(coreCacheKey); + } + } + + @Override + public synchronized CacheEntry[] getCacheEntries() { + List result = new ArrayList<>(17); + for(final Map.Entry,Cache> cacheEntry: caches.entrySet()) { + final Cache cache = cacheEntry.getValue(); + final Class cacheType = cacheEntry.getKey(); + synchronized(cache.readerCache) { + for (final Map.Entry> readerCacheEntry : cache.readerCache.entrySet()) { + final Object readerKey = readerCacheEntry.getKey(); + if (readerKey == null) continue; + final Map innerCache = readerCacheEntry.getValue(); + for (final Map.Entry mapEntry : innerCache.entrySet()) { + CacheKey entry = mapEntry.getKey(); + result.add(new CacheEntry(readerKey, entry.field, + cacheType, entry.custom, + mapEntry.getValue())); + } + } + } + } + return result.toArray(new CacheEntry[result.size()]); + } + + // per-segment fieldcaches don't purge until the shared core closes. + final SegmentReader.CoreClosedListener purgeCore = new SegmentReader.CoreClosedListener() { + @Override + public void onClose(Object ownerCoreCacheKey) { + FieldCacheImpl.this.purgeByCacheKey(ownerCoreCacheKey); + } + }; + + // composite/SlowMultiReaderWrapper fieldcaches don't purge until composite reader is closed. + final IndexReader.ReaderClosedListener purgeReader = new IndexReader.ReaderClosedListener() { + @Override + public void onClose(IndexReader owner) { + assert owner instanceof AtomicReader; + FieldCacheImpl.this.purgeByCacheKey(((AtomicReader) owner).getCoreCacheKey()); + } + }; + + private void initReader(AtomicReader reader) { + if (reader instanceof SegmentReader) { + ((SegmentReader) reader).addCoreClosedListener(purgeCore); + } else { + // we have a slow reader of some sort, try to register a purge event + // rather than relying on gc: + Object key = reader.getCoreCacheKey(); + if (key instanceof AtomicReader) { + ((AtomicReader)key).addReaderClosedListener(purgeReader); + } else { + // last chance + reader.addReaderClosedListener(purgeReader); + } + } + } + /** Expert: Internal cache. */ abstract static class Cache { - private final Map readerCache = new WeakHashMap(); + + Cache(FieldCacheImpl wrapper) { + this.wrapper = wrapper; + } + + final FieldCacheImpl wrapper; + + final Map> readerCache = new WeakHashMap<>(); - protected abstract Object createValue(IndexReader reader, Object key) + protected abstract Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException; - public Object get(IndexReader reader, Object key) throws IOException { - Map innerCache; - Object value; + /** Remove this reader from the cache, if present. */ + public void purgeByCacheKey(Object coreCacheKey) { + synchronized(readerCache) { + readerCache.remove(coreCacheKey); + } + } + + /** Sets the key to the value for the provided reader; + * if the key is already set then this doesn't change it. */ + public void put(AtomicReader reader, CacheKey key, Accountable value) { + final Object readerKey = reader.getCoreCacheKey(); synchronized (readerCache) { - innerCache = (Map) readerCache.get(reader); + Map innerCache = readerCache.get(readerKey); if (innerCache == null) { - innerCache = new HashMap(); - readerCache.put(reader, innerCache); + // First time this reader is using FieldCache + innerCache = new HashMap<>(); + readerCache.put(readerKey, innerCache); + wrapper.initReader(reader); + } + if (innerCache.get(key) == null) { + innerCache.put(key, value); + } else { + // Another thread beat us to it; leave the current + // value + } + } + } + + public Accountable get(AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { + Map innerCache; + Accountable value; + final Object readerKey = reader.getCoreCacheKey(); + synchronized (readerCache) { + innerCache = readerCache.get(readerKey); + if (innerCache == null) { + // First time this reader is using FieldCache + innerCache = new HashMap<>(); + readerCache.put(readerKey, innerCache); + wrapper.initReader(reader); value = null; } else { value = innerCache.get(key); @@ -68,443 +210,1319 @@ synchronized (value) { CreationPlaceholder progress = (CreationPlaceholder) value; if (progress.value == null) { - progress.value = createValue(reader, key); + progress.value = createValue(reader, key, setDocsWithField); synchronized (readerCache) { innerCache.put(key, progress.value); } + + // Only check if key.custom (the parser) is + // non-null; else, we check twice for a single + // call to FieldCache.getXXX + if (key.custom != null && wrapper != null) { + final PrintStream infoStream = wrapper.getInfoStream(); + if (infoStream != null) { + printNewInsanity(infoStream, progress.value); + } + } } return progress.value; } } return value; } - } - static final class CreationPlaceholder { - Object value; + private void printNewInsanity(PrintStream infoStream, Object value) { + final FieldCacheSanityChecker.Insanity[] insanities = FieldCacheSanityChecker.checkSanity(wrapper); + for(int i=0;i= maxDoc) { + // The cardinality of the BitSet is maxDoc if all documents have a value. + assert numSet == maxDoc; + bits = new Bits.MatchAllBits(maxDoc); + } else { + bits = docsWithField; + } + } else { + bits = docsWithField; + } + caches.get(DocsWithFieldCache.class).put(reader, new CacheKey(field, null), new BitsEntry(bits)); + } + + static class BitsEntry implements Accountable { + final Bits bits; + + BitsEntry(Bits bits) { + this.bits = bits; + } + + @Override + public long ramBytesUsed() { + long base = RamUsageEstimator.NUM_BYTES_OBJECT_REF; + if (bits instanceof Bits.MatchAllBits || bits instanceof Bits.MatchNoBits) { + return base; + } else { + return base + (bits.length() >>> 3); + } + } + } + // inherit javadocs - public byte[] getBytes(IndexReader reader, String field, ByteParser parser) + public Bytes getBytes (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { + return getBytes(reader, field, null, setDocsWithField); + } + + @Override + public Bytes getBytes(AtomicReader reader, String field, ByteParser parser, boolean setDocsWithField) throws IOException { - return (byte[]) bytesCache.get(reader, new Entry(field, parser)); + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return new Bytes() { + @Override + public byte get(int docID) { + return (byte) valuesIn.get(docID); + } + }; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return Bytes.EMPTY; + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Bytes.EMPTY; + } + return (Bytes) caches.get(Byte.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } } - Cache bytesCache = new Cache() { + static class BytesFromArray extends Bytes implements Accountable { + private final byte[] values; - protected Object createValue(IndexReader reader, Object entryKey) + public BytesFromArray(byte[] values) { + this.values = values; + } + + @Override + public byte get(int docID) { + return values[docID]; + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.sizeOf(values); + } + } + + static final class ByteCache extends Cache { + ByteCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - Entry entry = (Entry) entryKey; - String field = entry.field; - ByteParser parser = (ByteParser) entry.custom; - final byte[] retArray = new byte[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - byte termval = parser.parseByte(term.text()); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; + + int maxDoc = reader.maxDoc(); + final byte[] values; + final ByteParser parser = (ByteParser) key.custom; + if (parser == null) { + // Confusing: must delegate to wrapper (vs simply + // setting parser = DEFAULT_SHORT_PARSER) so cache + // key includes DEFAULT_SHORT_PARSER: + return (Accountable) wrapper.getBytes(reader, key.field, DEFAULT_BYTE_PARSER, setDocsWithField); + } + + values = new byte[maxDoc]; + + Uninvert u = new Uninvert() { + private byte currentValue; + + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseByte(term); } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + + @Override + public void visitDoc(int docID) { + values[docID] = currentValue; + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; + + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField); } - return retArray; + + return new BytesFromArray(values); } - }; + } // inherit javadocs - public short[] getShorts (IndexReader reader, String field) throws IOException { - return getShorts(reader, field, SHORT_PARSER); + public Shorts getShorts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { + return getShorts(reader, field, null, setDocsWithField); } // inherit javadocs - public short[] getShorts(IndexReader reader, String field, ShortParser parser) + public Shorts getShorts(AtomicReader reader, String field, ShortParser parser, boolean setDocsWithField) throws IOException { - return (short[]) shortsCache.get(reader, new Entry(field, parser)); + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return new Shorts() { + @Override + public short get(int docID) { + return (short) valuesIn.get(docID); + } + }; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return Shorts.EMPTY; + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Shorts.EMPTY; + } + return (Shorts) caches.get(Short.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } } - Cache shortsCache = new Cache() { + static class ShortsFromArray extends Shorts implements Accountable { + private final short[] values; - protected Object createValue(IndexReader reader, Object entryKey) + public ShortsFromArray(short[] values) { + this.values = values; + } + + @Override + public short get(int docID) { + return values[docID]; + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.sizeOf(values); + } + } + + static final class ShortCache extends Cache { + ShortCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - Entry entry = (Entry) entryKey; - String field = entry.field; - ShortParser parser = (ShortParser) entry.custom; - final short[] retArray = new short[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - short termval = parser.parseShort(term.text()); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; + + int maxDoc = reader.maxDoc(); + final short[] values; + final ShortParser parser = (ShortParser) key.custom; + if (parser == null) { + // Confusing: must delegate to wrapper (vs simply + // setting parser = DEFAULT_SHORT_PARSER) so cache + // key includes DEFAULT_SHORT_PARSER: + return (Accountable) wrapper.getShorts(reader, key.field, DEFAULT_SHORT_PARSER, setDocsWithField); + } + + values = new short[maxDoc]; + Uninvert u = new Uninvert() { + private short currentValue; + + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseShort(term); } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + + @Override + public void visitDoc(int docID) { + values[docID] = currentValue; + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; + + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField); } - return retArray; + return new ShortsFromArray(values); } - }; - - // inherit javadocs - public int[] getInts (IndexReader reader, String field) throws IOException { - return getInts(reader, field, INT_PARSER); } // inherit javadocs - public int[] getInts(IndexReader reader, String field, IntParser parser) + public Ints getInts (AtomicReader reader, String field, boolean setDocsWithField) throws IOException { + return getInts(reader, field, null, setDocsWithField); + } + + @Override + public Ints getInts(AtomicReader reader, String field, IntParser parser, boolean setDocsWithField) throws IOException { - return (int[]) intsCache.get(reader, new Entry(field, parser)); + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return new Ints() { + @Override + public int get(int docID) { + return (int) valuesIn.get(docID); + } + }; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return Ints.EMPTY; + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Ints.EMPTY; + } + return (Ints) caches.get(Integer.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } } - Cache intsCache = new Cache() { + static class IntsFromArray extends Ints implements Accountable { + private final PackedInts.Reader values; + private final int minValue; - protected Object createValue(IndexReader reader, Object entryKey) + public IntsFromArray(PackedInts.Reader values, int minValue) { + this.values = values; + this.minValue = minValue; + } + + @Override + public int get(int docID) { + final long delta = values.get(docID); + return minValue + (int) delta; + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_INT + values.ramBytesUsed(); + } + } + + private static class HoldsOneThing { + private T it; + + public void set(T it) { + this.it = it; + } + + public T get() { + return it; + } + } + + private static class GrowableWriterAndMinValue { + GrowableWriterAndMinValue(GrowableWriter array, long minValue) { + this.writer = array; + this.minValue = minValue; + } + public GrowableWriter writer; + public long minValue; + } + + static final class IntCache extends Cache { + IntCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - Entry entry = (Entry) entryKey; - String field = entry.field; - IntParser parser = (IntParser) entry.custom; - final int[] retArray = new int[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - int termval = parser.parseInt(term.text()); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; + + final IntParser parser = (IntParser) key.custom; + if (parser == null) { + // Confusing: must delegate to wrapper (vs simply + // setting parser = + // DEFAULT_INT_PARSER/NUMERIC_UTILS_INT_PARSER) so + // cache key includes + // DEFAULT_INT_PARSER/NUMERIC_UTILS_INT_PARSER: + try { + return (Accountable) wrapper.getInts(reader, key.field, DEFAULT_INT_PARSER, setDocsWithField); + } catch (NumberFormatException ne) { + return (Accountable) wrapper.getInts(reader, key.field, NUMERIC_UTILS_INT_PARSER, setDocsWithField); + } + } + + final HoldsOneThing valuesRef = new HoldsOneThing<>(); + + Uninvert u = new Uninvert() { + private int minValue; + private int currentValue; + private GrowableWriter values; + + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseInt(term); + if (values == null) { + // Lazy alloc so for the numeric field case + // (which will hit a NumberFormatException + // when we first try the DEFAULT_INT_PARSER), + // we don't double-alloc: + int startBitsPerValue; + // Make sure than missing values (0) can be stored without resizing + if (currentValue < 0) { + minValue = currentValue; + startBitsPerValue = PackedInts.bitsRequired((-minValue) & 0xFFFFFFFFL); + } else { + minValue = 0; + startBitsPerValue = PackedInts.bitsRequired(currentValue); + } + values = new GrowableWriter(startBitsPerValue, reader.maxDoc(), PackedInts.FAST); + if (minValue != 0) { + values.fill(0, values.size(), (-minValue) & 0xFFFFFFFFL); // default value must be 0 + } + valuesRef.set(new GrowableWriterAndMinValue(values, minValue)); + } } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + + @Override + public void visitDoc(int docID) { + values.set(docID, (currentValue - minValue) & 0xFFFFFFFFL); + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; + + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField); } - return retArray; + GrowableWriterAndMinValue values = valuesRef.get(); + if (values == null) { + return new IntsFromArray(new PackedInts.NullReader(reader.maxDoc()), 0); + } + assert values.writer.getBitsPerValue() <= 32; + return new IntsFromArray(values.writer.getMutable(), (int) values.minValue); } - }; + } + public Bits getDocsWithField(AtomicReader reader, String field) throws IOException { + final FieldInfo fieldInfo = reader.getFieldInfos().fieldInfo(field); + if (fieldInfo == null) { + // field does not exist or has no value + return new Bits.MatchNoBits(reader.maxDoc()); + } else if (fieldInfo.hasDocValues()) { + return reader.getDocsWithField(field); + } else if (!fieldInfo.isIndexed()) { + return new Bits.MatchNoBits(reader.maxDoc()); + } + BitsEntry entry = (BitsEntry) caches.get(DocsWithFieldCache.class).get(reader, new CacheKey(field, null), false); + return entry.bits; + } - // inherit javadocs - public float[] getFloats (IndexReader reader, String field) + static final class DocsWithFieldCache extends Cache { + DocsWithFieldCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) throws IOException { - return getFloats(reader, field, FLOAT_PARSER); + final String field = key.field; + final int maxDoc = reader.maxDoc(); + + // Visit all docs that have terms for this field + FixedBitSet res = null; + Terms terms = reader.terms(field); + if (terms != null) { + final int termsDocCount = terms.getDocCount(); + assert termsDocCount <= maxDoc; + if (termsDocCount == maxDoc) { + // Fast case: all docs have this field: + return new BitsEntry(new Bits.MatchAllBits(maxDoc)); + } + final TermsEnum termsEnum = terms.iterator(null); + DocsEnum docs = null; + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + if (res == null) { + // lazy init + res = new FixedBitSet(maxDoc); + } + + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + // TODO: use bulk API + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + res.set(docID); + } + } + } + if (res == null) { + return new BitsEntry(new Bits.MatchNoBits(maxDoc)); + } + final int numSet = res.cardinality(); + if (numSet >= maxDoc) { + // The cardinality of the BitSet is maxDoc if all documents have a value. + assert numSet == maxDoc; + return new BitsEntry(new Bits.MatchAllBits(maxDoc)); + } + return new BitsEntry(res); + } } - // inherit javadocs - public float[] getFloats(IndexReader reader, String field, FloatParser parser) - throws IOException { - return (float[]) floatsCache.get(reader, new Entry(field, parser)); + @Override + public Floats getFloats (AtomicReader reader, String field, boolean setDocsWithField) + throws IOException { + return getFloats(reader, field, null, setDocsWithField); } - Cache floatsCache = new Cache() { + @Override + public Floats getFloats(AtomicReader reader, String field, FloatParser parser, boolean setDocsWithField) + throws IOException { + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return new Floats() { + @Override + public float get(int docID) { + return Float.intBitsToFloat((int) valuesIn.get(docID)); + } + }; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return Floats.EMPTY; + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Floats.EMPTY; + } + return (Floats) caches.get(Float.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } + } - protected Object createValue(IndexReader reader, Object entryKey) + static class FloatsFromArray extends Floats implements Accountable { + private final float[] values; + + public FloatsFromArray(float[] values) { + this.values = values; + } + + @Override + public float get(int docID) { + return values[docID]; + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.sizeOf(values); + } + } + + static final class FloatCache extends Cache { + FloatCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - Entry entry = (Entry) entryKey; - String field = entry.field; - FloatParser parser = (FloatParser) entry.custom; - final float[] retArray = new float[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - float termval = parser.parseFloat(term.text()); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; + + final FloatParser parser = (FloatParser) key.custom; + if (parser == null) { + // Confusing: must delegate to wrapper (vs simply + // setting parser = + // DEFAULT_FLOAT_PARSER/NUMERIC_UTILS_FLOAT_PARSER) so + // cache key includes + // DEFAULT_FLOAT_PARSER/NUMERIC_UTILS_FLOAT_PARSER: + try { + return (Accountable) wrapper.getFloats(reader, key.field, DEFAULT_FLOAT_PARSER, setDocsWithField); + } catch (NumberFormatException ne) { + return (Accountable) wrapper.getFloats(reader, key.field, NUMERIC_UTILS_FLOAT_PARSER, setDocsWithField); + } + } + + final HoldsOneThing valuesRef = new HoldsOneThing<>(); + + Uninvert u = new Uninvert() { + private float currentValue; + private float[] values; + + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseFloat(term); + if (values == null) { + // Lazy alloc so for the numeric field case + // (which will hit a NumberFormatException + // when we first try the DEFAULT_INT_PARSER), + // we don't double-alloc: + values = new float[reader.maxDoc()]; + valuesRef.set(values); + } } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + + @Override + public void visitDoc(int docID) { + values[docID] = currentValue; + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; + + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField); } - return retArray; + + float[] values = valuesRef.get(); + if (values == null) { + values = new float[reader.maxDoc()]; + } + return new FloatsFromArray(values); } - }; + } - // inherit javadocs - public String[] getStrings(IndexReader reader, String field) + @Override + public Longs getLongs(AtomicReader reader, String field, boolean setDocsWithField) throws IOException { + return getLongs(reader, field, null, setDocsWithField); + } + + @Override + public Longs getLongs(AtomicReader reader, String field, FieldCache.LongParser parser, boolean setDocsWithField) throws IOException { - return (String[]) stringsCache.get(reader, field); + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return new Longs() { + @Override + public long get(int docID) { + return valuesIn.get(docID); + } + }; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return Longs.EMPTY; + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Longs.EMPTY; + } + return (Longs) caches.get(Long.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } } - Cache stringsCache = new Cache() { + static class LongsFromArray extends Longs implements Accountable { + private final PackedInts.Reader values; + private final long minValue; - protected Object createValue(IndexReader reader, Object fieldKey) + public LongsFromArray(PackedInts.Reader values, long minValue) { + this.values = values; + this.minValue = minValue; + } + + @Override + public long get(int docID) { + return minValue + values.get(docID); + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.NUM_BYTES_LONG + values.ramBytesUsed(); + } + } + + static final class LongCache extends Cache { + LongCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - String field = ((String) fieldKey).intern(); - final String[] retArray = new String[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - String termval = term.text(); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; + + final LongParser parser = (LongParser) key.custom; + if (parser == null) { + // Confusing: must delegate to wrapper (vs simply + // setting parser = + // DEFAULT_LONG_PARSER/NUMERIC_UTILS_LONG_PARSER) so + // cache key includes + // DEFAULT_LONG_PARSER/NUMERIC_UTILS_LONG_PARSER: + try { + return (Accountable) wrapper.getLongs(reader, key.field, DEFAULT_LONG_PARSER, setDocsWithField); + } catch (NumberFormatException ne) { + return (Accountable) wrapper.getLongs(reader, key.field, NUMERIC_UTILS_LONG_PARSER, setDocsWithField); + } + } + + final HoldsOneThing valuesRef = new HoldsOneThing<>(); + + Uninvert u = new Uninvert() { + private long minValue; + private long currentValue; + private GrowableWriter values; + + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseLong(term); + if (values == null) { + // Lazy alloc so for the numeric field case + // (which will hit a NumberFormatException + // when we first try the DEFAULT_INT_PARSER), + // we don't double-alloc: + int startBitsPerValue; + // Make sure than missing values (0) can be stored without resizing + if (currentValue < 0) { + minValue = currentValue; + startBitsPerValue = minValue == Long.MIN_VALUE ? 64 : PackedInts.bitsRequired(-minValue); + } else { + minValue = 0; + startBitsPerValue = PackedInts.bitsRequired(currentValue); + } + values = new GrowableWriter(startBitsPerValue, reader.maxDoc(), PackedInts.FAST); + if (minValue != 0) { + values.fill(0, values.size(), -minValue); // default value must be 0 + } + valuesRef.set(new GrowableWriterAndMinValue(values, minValue)); + } } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + + @Override + public void visitDoc(int docID) { + values.set(docID, currentValue - minValue); + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; + + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField); } - return retArray; + GrowableWriterAndMinValue values = valuesRef.get(); + if (values == null) { + return new LongsFromArray(new PackedInts.NullReader(reader.maxDoc()), 0L); + } + return new LongsFromArray(values.writer.getMutable(), values.minValue); } - }; + } - // inherit javadocs - public StringIndex getStringIndex(IndexReader reader, String field) + @Override + public Doubles getDoubles(AtomicReader reader, String field, boolean setDocsWithField) + throws IOException { + return getDoubles(reader, field, null, setDocsWithField); + } + + @Override + public Doubles getDoubles(AtomicReader reader, String field, FieldCache.DoubleParser parser, boolean setDocsWithField) throws IOException { - return (StringIndex) stringsIndexCache.get(reader, field); + final NumericDocValues valuesIn = reader.getNumericDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return new Doubles() { + @Override + public double get(int docID) { + return Double.longBitsToDouble(valuesIn.get(docID)); + } + }; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return Doubles.EMPTY; + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return Doubles.EMPTY; + } + return (Doubles) caches.get(Double.TYPE).get(reader, new CacheKey(field, parser), setDocsWithField); + } } - Cache stringsIndexCache = new Cache() { + static class DoublesFromArray extends Doubles implements Accountable { + private final double[] values; - protected Object createValue(IndexReader reader, Object fieldKey) + public DoublesFromArray(double[] values) { + this.values = values; + } + + @Override + public double get(int docID) { + return values[docID]; + } + + @Override + public long ramBytesUsed() { + return RamUsageEstimator.NUM_BYTES_OBJECT_REF + RamUsageEstimator.sizeOf(values); + } + } + + static final class DoubleCache extends Cache { + DoubleCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(final AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - String field = ((String) fieldKey).intern(); - final int[] retArray = new int[reader.maxDoc()]; - String[] mterms = new String[reader.maxDoc()+1]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - int t = 0; // current term number - // an entry for documents that have no terms in this field - // should a document with no terms be at top or bottom? - // this puts them at the top - if it is changed, FieldDocSortedHitQueue - // needs to change as well. - mterms[t++] = null; + final DoubleParser parser = (DoubleParser) key.custom; + if (parser == null) { + // Confusing: must delegate to wrapper (vs simply + // setting parser = + // DEFAULT_DOUBLE_PARSER/NUMERIC_UTILS_DOUBLE_PARSER) so + // cache key includes + // DEFAULT_DOUBLE_PARSER/NUMERIC_UTILS_DOUBLE_PARSER: + try { + return (Accountable) wrapper.getDoubles(reader, key.field, DEFAULT_DOUBLE_PARSER, setDocsWithField); + } catch (NumberFormatException ne) { + return (Accountable) wrapper.getDoubles(reader, key.field, NUMERIC_UTILS_DOUBLE_PARSER, setDocsWithField); + } + } - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; + final HoldsOneThing valuesRef = new HoldsOneThing<>(); - // store term text - // we expect that there is at most one term per document - if (t >= mterms.length) throw new RuntimeException ("there are more terms than " + - "documents in field \"" + field + "\", but it's impossible to sort on " + - "tokenized fields"); - mterms[t] = term.text(); + Uninvert u = new Uninvert() { + private double currentValue; + private double[] values; - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = t; + @Override + public void visitTerm(BytesRef term) { + currentValue = parser.parseDouble(term); + if (values == null) { + // Lazy alloc so for the numeric field case + // (which will hit a NumberFormatException + // when we first try the DEFAULT_INT_PARSER), + // we don't double-alloc: + values = new double[reader.maxDoc()]; + valuesRef.set(values); + } } - t++; - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); - } + @Override + public void visitDoc(int docID) { + values[docID] = currentValue; + } + + @Override + protected TermsEnum termsEnum(Terms terms) throws IOException { + return parser.termsEnum(terms); + } + }; - if (t == 0) { - // if there are no terms, make the term array - // have a single null entry - mterms = new String[1]; - } else if (t < mterms.length) { - // if there are less terms than documents, - // trim off the dead array space - String[] terms = new String[t]; - System.arraycopy (mterms, 0, terms, 0, t); - mterms = terms; + u.uninvert(reader, key.field, setDocsWithField); + + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, u.docsWithField); } + double[] values = valuesRef.get(); + if (values == null) { + values = new double[reader.maxDoc()]; + } + return new DoublesFromArray(values); + } + } - StringIndex value = new StringIndex (retArray, mterms); - return value; + public static class SortedDocValuesImpl implements Accountable { + private final PagedBytes.Reader bytes; + private final PackedLongValues termOrdToBytesOffset; + private final PackedInts.Reader docToTermOrd; + private final int numOrd; + + public SortedDocValuesImpl(PagedBytes.Reader bytes, PackedLongValues termOrdToBytesOffset, PackedInts.Reader docToTermOrd, int numOrd) { + this.bytes = bytes; + this.docToTermOrd = docToTermOrd; + this.termOrdToBytesOffset = termOrdToBytesOffset; + this.numOrd = numOrd; } - }; + + public SortedDocValues iterator() { + final BytesRef term = new BytesRef(); + + return new SortedDocValues() { + @Override + public int getValueCount() { + return numOrd; + } - /** The pattern used to detect integer values in a field */ - /** removed for java 1.3 compatibility - protected static final Pattern pIntegers = Pattern.compile ("[0-9\\-]+"); - **/ + @Override + public int getOrd(int docID) { + // Subtract 1, matching the 1+ord we did when + // storing, so that missing values, which are 0 in the + // packed ints, are returned as -1 ord: + return (int) docToTermOrd.get(docID)-1; + } + + @Override + public BytesRef lookupOrd(int ord) { + if (ord < 0) { + throw new IllegalArgumentException("ord must be >=0 (got ord=" + ord + ")"); + } + bytes.fill(term, termOrdToBytesOffset.get(ord)); + return term; + } + }; + } - /** The pattern used to detect float values in a field */ - /** - * removed for java 1.3 compatibility - * protected static final Object pFloats = Pattern.compile ("[0-9+\\-\\.eEfFdD]+"); - */ + @Override + public long ramBytesUsed() { + return 3*RamUsageEstimator.NUM_BYTES_OBJECT_REF + + RamUsageEstimator.NUM_BYTES_INT + + bytes.ramBytesUsed() + + termOrdToBytesOffset.ramBytesUsed() + + docToTermOrd.ramBytesUsed(); + } + } - // inherit javadocs - public Object getAuto(IndexReader reader, String field) throws IOException { - return autoCache.get(reader, field); + public SortedDocValues getTermsIndex(AtomicReader reader, String field) throws IOException { + return getTermsIndex(reader, field, PackedInts.FAST); } - Cache autoCache = new Cache() { + public SortedDocValues getTermsIndex(AtomicReader reader, String field, float acceptableOverheadRatio) throws IOException { + SortedDocValues valuesIn = reader.getSortedDocValues(field); + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return valuesIn; + } else { + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptySorted(); + } else if (info.hasDocValues()) { + // we don't try to build a sorted instance from numeric/binary doc + // values because dedup can be very costly + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return DocValues.emptySorted(); + } + SortedDocValuesImpl impl = (SortedDocValuesImpl) caches.get(SortedDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), false); + return impl.iterator(); + } + } - protected Object createValue(IndexReader reader, Object fieldKey) + static class SortedDocValuesCache extends Cache { + SortedDocValuesCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) throws IOException { - String field = ((String)fieldKey).intern(); - TermEnum enumerator = reader.terms (new Term (field)); - try { - Term term = enumerator.term(); - if (term == null) { - throw new RuntimeException ("no terms in field " + field + " - cannot determine sort type"); - } - Object ret = null; - if (term.field() == field) { - String termtext = term.text().trim(); - /** - * Java 1.4 level code: + final int maxDoc = reader.maxDoc(); - if (pIntegers.matcher(termtext).matches()) - return IntegerSortedHitQueue.comparator (reader, enumerator, field); + Terms terms = reader.terms(key.field); - else if (pFloats.matcher(termtext).matches()) - return FloatSortedHitQueue.comparator (reader, enumerator, field); - */ + final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); - // Java 1.3 level code: - try { - Integer.parseInt (termtext); - ret = getInts (reader, field); - } catch (NumberFormatException nfe1) { - try { - Float.parseFloat (termtext); - ret = getFloats (reader, field); - } catch (NumberFormatException nfe3) { - ret = getStringIndex (reader, field); - } - } + final PagedBytes bytes = new PagedBytes(15); + + int startTermsBPV; + + final int termCountHardLimit; + if (maxDoc == Integer.MAX_VALUE) { + termCountHardLimit = Integer.MAX_VALUE; + } else { + termCountHardLimit = maxDoc+1; + } + + // TODO: use Uninvert? + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = terms.size(); + if (numUniqueTerms != -1L) { + if (numUniqueTerms > termCountHardLimit) { + // app is misusing the API (there is more than + // one term per doc); in this case we make best + // effort to load what we can (see LUCENE-2142) + numUniqueTerms = termCountHardLimit; + } + + startTermsBPV = PackedInts.bitsRequired(numUniqueTerms); } else { - throw new RuntimeException ("field \"" + field + "\" does not appear to be indexed"); + startTermsBPV = 1; } - return ret; - } finally { - enumerator.close(); + } else { + startTermsBPV = 1; } + + PackedLongValues.Builder termOrdToBytesOffset = PackedLongValues.monotonicBuilder(PackedInts.COMPACT); + final GrowableWriter docToTermOrd = new GrowableWriter(startTermsBPV, maxDoc, acceptableOverheadRatio); + + int termOrd = 0; + + // TODO: use Uninvert? + + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + DocsEnum docs = null; + + while(true) { + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + if (termOrd >= termCountHardLimit) { + break; + } + + termOrdToBytesOffset.add(bytes.copyUsingLengthPrefix(term)); + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + // Store 1+ ord into packed bits + docToTermOrd.set(docID, 1+termOrd); + } + termOrd++; + } + } + + // maybe an int-only impl? + return new SortedDocValuesImpl(bytes.freeze(true), termOrdToBytesOffset.build(), docToTermOrd.getMutable(), termOrd); } - }; + } - // inherit javadocs - public Comparable[] getCustom(IndexReader reader, String field, - SortComparator comparator) throws IOException { - return (Comparable[]) customCache.get(reader, new Entry(field, comparator)); + private static class BinaryDocValuesImpl implements Accountable { + private final PagedBytes.Reader bytes; + private final PackedInts.Reader docToOffset; + + public BinaryDocValuesImpl(PagedBytes.Reader bytes, PackedInts.Reader docToOffset) { + this.bytes = bytes; + this.docToOffset = docToOffset; + } + + public BinaryDocValues iterator() { + final BytesRef term = new BytesRef(); + return new BinaryDocValues() { + @Override + public BytesRef get(int docID) { + final int pointer = (int) docToOffset.get(docID); + if (pointer == 0) { + term.length = 0; + } else { + bytes.fill(term, pointer); + } + return term; + } + }; + } + + @Override + public long ramBytesUsed() { + return 2*RamUsageEstimator.NUM_BYTES_OBJECT_REF + bytes.ramBytesUsed() + docToOffset.ramBytesUsed(); + } } - Cache customCache = new Cache() { + // TODO: this if DocTermsIndex was already created, we + // should share it... + public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField) throws IOException { + return getTerms(reader, field, setDocsWithField, PackedInts.FAST); + } - protected Object createValue(IndexReader reader, Object entryKey) + public BinaryDocValues getTerms(AtomicReader reader, String field, boolean setDocsWithField, float acceptableOverheadRatio) throws IOException { + BinaryDocValues valuesIn = reader.getBinaryDocValues(field); + if (valuesIn == null) { + valuesIn = reader.getSortedDocValues(field); + } + + if (valuesIn != null) { + // Not cached here by FieldCacheImpl (cached instead + // per-thread by SegmentReader): + return valuesIn; + } + + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptyBinary(); + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return DocValues.emptyBinary(); + } + + BinaryDocValuesImpl impl = (BinaryDocValuesImpl) caches.get(BinaryDocValues.class).get(reader, new CacheKey(field, acceptableOverheadRatio), setDocsWithField); + return impl.iterator(); + } + + static final class BinaryDocValuesCache extends Cache { + BinaryDocValuesCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField) throws IOException { - Entry entry = (Entry) entryKey; - String field = entry.field; - SortComparator comparator = (SortComparator) entry.custom; - final Comparable[] retArray = new Comparable[reader.maxDoc()]; - TermDocs termDocs = reader.termDocs(); - TermEnum termEnum = reader.terms (new Term (field)); - try { - do { - Term term = termEnum.term(); - if (term==null || term.field() != field) break; - Comparable termval = comparator.getComparable (term.text()); - termDocs.seek (termEnum); - while (termDocs.next()) { - retArray[termDocs.doc()] = termval; + + // TODO: would be nice to first check if DocTermsIndex + // was already cached for this field and then return + // that instead, to avoid insanity + + final int maxDoc = reader.maxDoc(); + Terms terms = reader.terms(key.field); + + final float acceptableOverheadRatio = ((Float) key.custom).floatValue(); + + final int termCountHardLimit = maxDoc; + + // Holds the actual term data, expanded. + final PagedBytes bytes = new PagedBytes(15); + + int startBPV; + + if (terms != null) { + // Try for coarse estimate for number of bits; this + // should be an underestimate most of the time, which + // is fine -- GrowableWriter will reallocate as needed + long numUniqueTerms = terms.size(); + if (numUniqueTerms != -1L) { + if (numUniqueTerms > termCountHardLimit) { + numUniqueTerms = termCountHardLimit; } - } while (termEnum.next()); - } finally { - termDocs.close(); - termEnum.close(); + startBPV = PackedInts.bitsRequired(numUniqueTerms*4); + } else { + startBPV = 1; + } + } else { + startBPV = 1; } - return retArray; + + final GrowableWriter docToOffset = new GrowableWriter(startBPV, maxDoc, acceptableOverheadRatio); + + // pointer==0 means not set + bytes.copyUsingLengthPrefix(new BytesRef()); + + if (terms != null) { + int termCount = 0; + final TermsEnum termsEnum = terms.iterator(null); + DocsEnum docs = null; + while(true) { + if (termCount++ == termCountHardLimit) { + // app is misusing the API (there is more than + // one term per doc); in this case we make best + // effort to load what we can (see LUCENE-2142) + break; + } + + final BytesRef term = termsEnum.next(); + if (term == null) { + break; + } + final long pointer = bytes.copyUsingLengthPrefix(term); + docs = termsEnum.docs(null, docs, DocsEnum.FLAG_NONE); + while (true) { + final int docID = docs.nextDoc(); + if (docID == DocIdSetIterator.NO_MORE_DOCS) { + break; + } + docToOffset.set(docID, pointer); + } + } + } + + final PackedInts.Reader offsetReader = docToOffset.getMutable(); + if (setDocsWithField) { + wrapper.setDocsWithField(reader, key.field, new Bits() { + @Override + public boolean get(int index) { + return offsetReader.get(index) != 0; + } + + @Override + public int length() { + return maxDoc; + } + }); + } + // maybe an int-only impl? + return new BinaryDocValuesImpl(bytes.freeze(true), offsetReader); } - }; - + } + + // TODO: this if DocTermsIndex was already created, we + // should share it... + public SortedSetDocValues getDocTermOrds(AtomicReader reader, String field) throws IOException { + SortedSetDocValues dv = reader.getSortedSetDocValues(field); + if (dv != null) { + return dv; + } + + SortedDocValues sdv = reader.getSortedDocValues(field); + if (sdv != null) { + return DocValues.singleton(sdv); + } + + final FieldInfo info = reader.getFieldInfos().fieldInfo(field); + if (info == null) { + return DocValues.emptySortedSet(); + } else if (info.hasDocValues()) { + throw new IllegalStateException("Type mismatch: " + field + " was indexed as " + info.getDocValuesType()); + } else if (!info.isIndexed()) { + return DocValues.emptySortedSet(); + } + + DocTermOrds dto = (DocTermOrds) caches.get(DocTermOrds.class).get(reader, new CacheKey(field, null), false); + return dto.iterator(reader); + } + + static final class DocTermOrdsCache extends Cache { + DocTermOrdsCache(FieldCacheImpl wrapper) { + super(wrapper); + } + + @Override + protected Accountable createValue(AtomicReader reader, CacheKey key, boolean setDocsWithField /* ignored */) + throws IOException { + return new DocTermOrds(reader, null, key.field); + } + } + + private volatile PrintStream infoStream; + + public void setInfoStream(PrintStream stream) { + infoStream = stream; + } + + public PrintStream getInfoStream() { + return infoStream; + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheRangeFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheRewriteMethod.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldCacheTermsFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldComparator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldComparatorSource.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/FieldDoc.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/FieldDoc.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/FieldDoc.java 17 Aug 2012 14:54:57 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/FieldDoc.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,6 +17,7 @@ * limitations under the License. */ +import java.util.Arrays; /** * Expert: A ScoreDoc which also contains information about @@ -34,30 +35,46 @@ *

      Created: Feb 11, 2004 1:23:38 PM * * @since lucene 1.4 - * @version $Id$ * @see ScoreDoc * @see TopFieldDocs */ -public class FieldDoc -extends ScoreDoc { +public class FieldDoc extends ScoreDoc { - /** Expert: The values which are used to sort the referenced document. - * The order of these will match the original sort criteria given by a - * Sort object. Each Object will be either an Integer, Float or String, - * depending on the type of values in the terms of the original field. - * @see Sort - * @see Searcher#search(Query,Filter,int,Sort) - */ - public Comparable[] fields; + /** Expert: The values which are used to sort the referenced document. + * The order of these will match the original sort criteria given by a + * Sort object. Each Object will have been returned from + * the value method corresponding + * FieldComparator used to sort this field. + * @see Sort + * @see IndexSearcher#search(Query,Filter,int,Sort) + */ + public Object[] fields; - /** Expert: Creates one of these objects with empty sort information. */ - public FieldDoc (int doc, float score) { - super (doc, score); - } + /** Expert: Creates one of these objects with empty sort information. */ + public FieldDoc(int doc, float score) { + super (doc, score); + } - /** Expert: Creates one of these objects with the given sort information. */ - public FieldDoc (int doc, float score, Comparable[] fields) { - super (doc, score); - this.fields = fields; - } -} \ No newline at end of file + /** Expert: Creates one of these objects with the given sort information. */ + public FieldDoc(int doc, float score, Object[] fields) { + super (doc, score); + this.fields = fields; + } + + /** Expert: Creates one of these objects with the given sort information. */ + public FieldDoc(int doc, float score, Object[] fields, int shardIndex) { + super (doc, score, shardIndex); + this.fields = fields; + } + + // A convenience method for debugging. + @Override + public String toString() { + // super.toString returns the doc and score information, so just add the + // fields information + StringBuilder sb = new StringBuilder(super.toString()); + sb.append(" fields="); + sb.append(Arrays.toString(fields)); + return sb.toString(); + } +} Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldDocSortedHitQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldSortedHitQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldValueFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FieldValueHitQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/Filter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/Filter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/Filter.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/Filter.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,35 +17,44 @@ * limitations under the License. */ -import java.util.BitSet; import java.io.IOException; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.DocIdBitSet; -/** Abstract base class providing a mechanism to use a subset of an index - * for restriction or permission of index search results. - *

      - * Note: In Lucene 3.0 {@link #bits(IndexReader)} will be removed - * and {@link #getDocIdSet(IndexReader)} will be defined as abstract. - * All implementing classes must therefore implement {@link #getDocIdSet(IndexReader)} - * in order to work with Lucene 3.0. +import org.apache.lucene.index.AtomicReader; // javadocs +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.Bits; + +/** + * Abstract base class for restricting which documents may + * be returned during searching. */ -public abstract class Filter implements java.io.Serializable { +public abstract class Filter { + /** - * @return A BitSet with true for documents which should be permitted in - * search results, and false for those that should not. - * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. + * Creates a {@link DocIdSet} enumerating the documents that should be + * permitted in search results. NOTE: null can be + * returned if no documents are accepted by this Filter. + *

      + * Note: This method will be called once per segment in + * the index during searching. The returned {@link DocIdSet} + * must refer to document IDs for that segment, not for + * the top-level reader. + * + * @param context a {@link AtomicReaderContext} instance opened on the index currently + * searched on. Note, it is likely that the provided reader info does not + * represent the whole underlying index i.e. if the index has more than + * one segment the given reader only represents a single segment. + * The provided context is always an atomic context, so you can call + * {@link AtomicReader#fields()} + * on the context's reader, for example. + * + * @param acceptDocs + * Bits that represent the allowable docs to match (typically deleted docs + * but possibly filtering other documents) + * + * @return a DocIdSet that provides the documents which should be permitted or + * prohibited in search results. NOTE: null should be returned if + * the filter doesn't accept any documents otherwise internal optimization might not apply + * in the case an empty {@link DocIdSet} is returned. */ - public BitSet bits(IndexReader reader) throws IOException { - return null; - } - - /** - * @return a DocIdSet that provides the documents which should be - * permitted or prohibited in search results. - * @see DocIdBitSet - */ - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - return new DocIdBitSet(bits(reader)); - } + public abstract DocIdSet getDocIdSet(AtomicReaderContext context, Bits acceptDocs) throws IOException; } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FilterManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FilterScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FilteredDocIdSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FilteredDocIdSetIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/FilteredQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/FilteredQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/FilteredQuery.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/FilteredQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,10 +17,15 @@ * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; +import java.util.Collection; +import java.util.Collections; import java.util.Set; @@ -30,62 +35,78 @@ *

      Note: the bits are retrieved from the filter each time this * query is used in a search - use a CachingWrapperFilter to avoid * regenerating the bits every time. - * - *

      Created: Apr 20, 2004 8:58:29 AM - * * @since 1.4 - * @version $Id$ * @see CachingWrapperFilter */ -public class FilteredQuery -extends Query { +public class FilteredQuery extends Query { - Query query; - Filter filter; + private final Query query; + private final Filter filter; + private final FilterStrategy strategy; /** * Constructs a new query which applies a filter to the results of the original query. - * Filter.getDocIdSet() will be called every time this query is used in a search. + * {@link Filter#getDocIdSet} will be called every time this query is used in a search. * @param query Query to be filtered, cannot be null. * @param filter Filter to apply to query results, cannot be null. */ - public FilteredQuery (Query query, Filter filter) { + public FilteredQuery(Query query, Filter filter) { + this(query, filter, RANDOM_ACCESS_FILTER_STRATEGY); + } + + /** + * Expert: Constructs a new query which applies a filter to the results of the original query. + * {@link Filter#getDocIdSet} will be called every time this query is used in a search. + * @param query Query to be filtered, cannot be null. + * @param filter Filter to apply to query results, cannot be null. + * @param strategy a filter strategy used to create a filtered scorer. + * + * @see FilterStrategy + */ + public FilteredQuery(Query query, Filter filter, FilterStrategy strategy) { + if (query == null || filter == null) + throw new IllegalArgumentException("Query and filter cannot be null."); + if (strategy == null) + throw new IllegalArgumentException("FilterStrategy can not be null"); + this.strategy = strategy; this.query = query; this.filter = filter; } - - - + /** * Returns a Weight that applies the filter to the enclosed query's Weight. * This is accomplished by overriding the Scorer returned by the Weight. */ - protected Weight createWeight (final Searcher searcher) throws IOException { + @Override + public Weight createWeight(final IndexSearcher searcher) throws IOException { final Weight weight = query.createWeight (searcher); - final Similarity similarity = query.getSimilarity(searcher); return new Weight() { - private float value; - - // pass these methods through to enclosed query's weight - public float getValue() { return value; } - public float sumOfSquaredWeights() throws IOException { - return weight.sumOfSquaredWeights() * getBoost() * getBoost(); + + @Override + public boolean scoresDocsOutOfOrder() { + return true; } - public void normalize (float v) { - weight.normalize(v); - value = weight.getValue() * getBoost(); + + @Override + public float getValueForNormalization() throws IOException { + return weight.getValueForNormalization() * getBoost() * getBoost(); // boost sub-weight } - public Explanation explain (IndexReader ir, int i) throws IOException { + + @Override + public void normalize(float norm, float topLevelBoost) { + weight.normalize(norm, topLevelBoost * getBoost()); // incorporate boost + } + + @Override + public Explanation explain(AtomicReaderContext ir, int i) throws IOException { Explanation inner = weight.explain (ir, i); - if (getBoost()!=1) { - Explanation preBoost = inner; - inner = new Explanation(inner.getValue()*getBoost(),"product of:"); - inner.addDetail(new Explanation(getBoost(),"boost")); - inner.addDetail(preBoost); - } Filter f = FilteredQuery.this.filter; - DocIdSetIterator docIdSetIterator = f.getDocIdSet(ir).iterator(); - if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) { + DocIdSet docIdSet = f.getDocIdSet(ir, ir.reader().getLiveDocs()); + DocIdSetIterator docIdSetIterator = docIdSet == null ? DocIdSetIterator.empty() : docIdSet.iterator(); + if (docIdSetIterator == null) { + docIdSetIterator = DocIdSetIterator.empty(); + } + if (docIdSetIterator.advance(i) == i) { return inner; } else { Explanation result = new Explanation @@ -96,88 +117,276 @@ } // return this query - public Query getQuery() { return FilteredQuery.this; } + @Override + public Query getQuery() { + return FilteredQuery.this; + } // return a filtering scorer - public Scorer scorer (IndexReader indexReader) throws IOException { - final Scorer scorer = weight.scorer(indexReader); - final DocIdSetIterator docIdSetIterator = filter.getDocIdSet(indexReader).iterator(); + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + assert filter != null; - return new Scorer(similarity) { + DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); + if (filterDocIdSet == null) { + // this means the filter does not accept any documents. + return null; + } - private boolean advanceToCommon() throws IOException { - while (scorer.doc() != docIdSetIterator.doc()) { - if (scorer.doc() < docIdSetIterator.doc()) { - if (!scorer.skipTo(docIdSetIterator.doc())) { - return false; - } - } else if (!docIdSetIterator.skipTo(scorer.doc())) { - return false; - } - } - return true; - } + return strategy.filteredScorer(context, weight, filterDocIdSet); + } - public boolean next() throws IOException { - return docIdSetIterator.next() && scorer.next() && advanceToCommon(); - } + // return a filtering top scorer + @Override + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { + assert filter != null; - public int doc() { return scorer.doc(); } + DocIdSet filterDocIdSet = filter.getDocIdSet(context, acceptDocs); + if (filterDocIdSet == null) { + // this means the filter does not accept any documents. + return null; + } - public boolean skipTo(int i) throws IOException { - return docIdSetIterator.skipTo(i) - && scorer.skipTo(docIdSetIterator.doc()) - && advanceToCommon(); - } + return strategy.filteredBulkScorer(context, weight, scoreDocsInOrder, filterDocIdSet); + } + }; + } + + /** + * A scorer that consults the filter iff a document was matched by the + * delegate scorer. This is useful if the filter computation is more expensive + * than document scoring or if the filter has a linear running time to compute + * the next matching doc like exact geo distances. + */ + private static final class QueryFirstScorer extends Scorer { + private final Scorer scorer; + private int scorerDoc = -1; + private final Bits filterBits; - public float score() throws IOException { return getBoost() * scorer.score(); } + protected QueryFirstScorer(Weight weight, Bits filterBits, Scorer other) { + super(weight); + this.scorer = other; + this.filterBits = filterBits; + } - // add an explanation about whether the document was filtered - public Explanation explain (int i) throws IOException { - Explanation exp = scorer.explain(i); - - if (docIdSetIterator.skipTo(i) && (docIdSetIterator.doc() == i)) { - exp.setDescription ("allowed by filter: "+exp.getDescription()); - exp.setValue(getBoost() * exp.getValue()); - } else { - exp.setDescription ("removed by filter: "+exp.getDescription()); - exp.setValue(0.0f); - } - return exp; + @Override + public int nextDoc() throws IOException { + int doc; + for(;;) { + doc = scorer.nextDoc(); + if (doc == Scorer.NO_MORE_DOCS || filterBits.get(doc)) { + return scorerDoc = doc; + } + } + } + + @Override + public int advance(int target) throws IOException { + int doc = scorer.advance(target); + if (doc != Scorer.NO_MORE_DOCS && !filterBits.get(doc)) { + return scorerDoc = nextDoc(); + } else { + return scorerDoc = doc; + } + } + + @Override + public int docID() { + return scorerDoc; + } + + @Override + public float score() throws IOException { + return scorer.score(); + } + + @Override + public int freq() throws IOException { return scorer.freq(); } + + @Override + public Collection getChildren() { + return Collections.singleton(new ChildScorer(scorer, "FILTERED")); + } + + @Override + public long cost() { + return scorer.cost(); + } + } + + private static class QueryFirstBulkScorer extends BulkScorer { + + private final Scorer scorer; + private final Bits filterBits; + + public QueryFirstBulkScorer(Scorer scorer, Bits filterBits) { + this.scorer = scorer; + this.filterBits = filterBits; + } + + @Override + public boolean score(Collector collector, int maxDoc) throws IOException { + // the normalization trick already applies the boost of this query, + // so we can use the wrapped scorer directly: + collector.setScorer(scorer); + if (scorer.docID() == -1) { + scorer.nextDoc(); + } + while (true) { + final int scorerDoc = scorer.docID(); + if (scorerDoc < maxDoc) { + if (filterBits.get(scorerDoc)) { + collector.collect(scorerDoc); } - }; + scorer.nextDoc(); + } else { + break; + } } - }; + + return scorer.docID() != Scorer.NO_MORE_DOCS; + } } + + /** + * A Scorer that uses a "leap-frog" approach (also called "zig-zag join"). The scorer and the filter + * take turns trying to advance to each other's next matching document, often + * jumping past the target document. When both land on the same document, it's + * collected. + */ + private static class LeapFrogScorer extends Scorer { + private final DocIdSetIterator secondary; + private final DocIdSetIterator primary; + private final Scorer scorer; + protected int primaryDoc = -1; + protected int secondaryDoc = -1; - /** Rewrites the wrapped query. */ + protected LeapFrogScorer(Weight weight, DocIdSetIterator primary, DocIdSetIterator secondary, Scorer scorer) { + super(weight); + this.primary = primary; + this.secondary = secondary; + this.scorer = scorer; + } + + private final int advanceToNextCommonDoc() throws IOException { + for (;;) { + if (secondaryDoc < primaryDoc) { + secondaryDoc = secondary.advance(primaryDoc); + } else if (secondaryDoc == primaryDoc) { + return primaryDoc; + } else { + primaryDoc = primary.advance(secondaryDoc); + } + } + } + + @Override + public final int nextDoc() throws IOException { + primaryDoc = primaryNext(); + return advanceToNextCommonDoc(); + } + + protected int primaryNext() throws IOException { + return primary.nextDoc(); + } + + @Override + public final int advance(int target) throws IOException { + if (target > primaryDoc) { + primaryDoc = primary.advance(target); + } + return advanceToNextCommonDoc(); + } + + @Override + public final int docID() { + return secondaryDoc; + } + + @Override + public final float score() throws IOException { + return scorer.score(); + } + + @Override + public final int freq() throws IOException { + return scorer.freq(); + } + + @Override + public final Collection getChildren() { + return Collections.singleton(new ChildScorer(scorer, "FILTERED")); + } + + @Override + public long cost() { + return Math.min(primary.cost(), secondary.cost()); + } + } + + // TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer + private static final class PrimaryAdvancedLeapFrogScorer extends LeapFrogScorer { + private final int firstFilteredDoc; + + protected PrimaryAdvancedLeapFrogScorer(Weight weight, int firstFilteredDoc, DocIdSetIterator filterIter, Scorer other) { + super(weight, filterIter, other, other); + this.firstFilteredDoc = firstFilteredDoc; + this.primaryDoc = firstFilteredDoc; // initialize to prevent and advance call to move it further + } + + @Override + protected int primaryNext() throws IOException { + if (secondaryDoc != -1) { + return super.primaryNext(); + } else { + return firstFilteredDoc; + } + } + } + + /** Rewrites the query. If the wrapped is an instance of + * {@link MatchAllDocsQuery} it returns a {@link ConstantScoreQuery}. Otherwise + * it returns a new {@code FilteredQuery} wrapping the rewritten query. */ + @Override public Query rewrite(IndexReader reader) throws IOException { - Query rewritten = query.rewrite(reader); - if (rewritten != query) { - FilteredQuery clone = (FilteredQuery)this.clone(); - clone.query = rewritten; - return clone; + final Query queryRewritten = query.rewrite(reader); + + if (queryRewritten != query) { + // rewrite to a new FilteredQuery wrapping the rewritten query + final Query rewritten = new FilteredQuery(queryRewritten, filter, strategy); + rewritten.setBoost(this.getBoost()); + return rewritten; } else { + // nothing to rewrite, we are done! return this; } } - public Query getQuery() { + /** Returns this FilteredQuery's (unfiltered) Query */ + public final Query getQuery() { return query; } - public Filter getFilter() { + /** Returns this FilteredQuery's filter */ + public final Filter getFilter() { return filter; } + + /** Returns this FilteredQuery's {@link FilterStrategy} */ + public FilterStrategy getFilterStrategy() { + return this.strategy; + } // inherit javadoc - public void extractTerms(Set terms) { - getQuery().extractTerms(terms); + @Override + public void extractTerms(Set terms) { + getQuery().extractTerms(terms); } /** Prints a user-readable version of this query. */ + @Override public String toString (String s) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("filtered("); buffer.append(query.toString(s)); buffer.append(")->"); @@ -187,16 +396,251 @@ } /** Returns true iff o is equal to this. */ + @Override public boolean equals(Object o) { - if (o instanceof FilteredQuery) { - FilteredQuery fq = (FilteredQuery) o; - return (query.equals(fq.query) && filter.equals(fq.filter) && getBoost()==fq.getBoost()); - } - return false; + if (o == this) + return true; + if (!super.equals(o)) + return false; + assert o instanceof FilteredQuery; + final FilteredQuery fq = (FilteredQuery) o; + return fq.query.equals(this.query) && fq.filter.equals(this.filter) && fq.strategy.equals(this.strategy); } /** Returns a hash code value for this object. */ + @Override public int hashCode() { - return query.hashCode() ^ filter.hashCode() + Float.floatToRawIntBits(getBoost()); + int hash = super.hashCode(); + hash = hash * 31 + strategy.hashCode(); + hash = hash * 31 + query.hashCode(); + hash = hash * 31 + filter.hashCode(); + return hash; } + + /** + * A {@link FilterStrategy} that conditionally uses a random access filter if + * the given {@link DocIdSet} supports random access (returns a non-null value + * from {@link DocIdSet#bits()}) and + * {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns + * true. Otherwise this strategy falls back to a "zig-zag join" ( + * {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy. + * + *

      + * Note: this strategy is the default strategy in {@link FilteredQuery} + *

      + */ + public static final FilterStrategy RANDOM_ACCESS_FILTER_STRATEGY = new RandomAccessFilterStrategy(); + + /** + * A filter strategy that uses a "leap-frog" approach (also called "zig-zag join"). + * The scorer and the filter + * take turns trying to advance to each other's next matching document, often + * jumping past the target document. When both land on the same document, it's + * collected. + *

      + * Note: This strategy uses the filter to lead the iteration. + *

      + */ + public static final FilterStrategy LEAP_FROG_FILTER_FIRST_STRATEGY = new LeapFrogFilterStrategy(false); + + /** + * A filter strategy that uses a "leap-frog" approach (also called "zig-zag join"). + * The scorer and the filter + * take turns trying to advance to each other's next matching document, often + * jumping past the target document. When both land on the same document, it's + * collected. + *

      + * Note: This strategy uses the query to lead the iteration. + *

      + */ + public static final FilterStrategy LEAP_FROG_QUERY_FIRST_STRATEGY = new LeapFrogFilterStrategy(true); + + /** + * A filter strategy that advances the Query or rather its {@link Scorer} first and consults the + * filter {@link DocIdSet} for each matched document. + *

      + * Note: this strategy requires a {@link DocIdSet#bits()} to return a non-null value. Otherwise + * this strategy falls back to {@link FilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY} + *

      + *

      + * Use this strategy if the filter computation is more expensive than document + * scoring or if the filter has a linear running time to compute the next + * matching doc like exact geo distances. + *

      + */ + public static final FilterStrategy QUERY_FIRST_FILTER_STRATEGY = new QueryFirstFilterStrategy(); + + /** Abstract class that defines how the filter ({@link DocIdSet}) applied during document collection. */ + public static abstract class FilterStrategy { + + /** + * Returns a filtered {@link Scorer} based on this strategy. + * + * @param context + * the {@link AtomicReaderContext} for which to return the {@link Scorer}. + * @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer. + * @param docIdSet the filter {@link DocIdSet} to apply + * @return a filtered scorer + * + * @throws IOException if an {@link IOException} occurs + */ + public abstract Scorer filteredScorer(AtomicReaderContext context, + Weight weight, DocIdSet docIdSet) throws IOException; + + /** + * Returns a filtered {@link BulkScorer} based on this + * strategy. This is an optional method: the default + * implementation just calls {@link #filteredScorer} and + * wraps that into a BulkScorer. + * + * @param context + * the {@link AtomicReaderContext} for which to return the {@link Scorer}. + * @param weight the {@link FilteredQuery} {@link Weight} to create the filtered scorer. + * @param docIdSet the filter {@link DocIdSet} to apply + * @return a filtered top scorer + */ + public BulkScorer filteredBulkScorer(AtomicReaderContext context, + Weight weight, boolean scoreDocsInOrder, DocIdSet docIdSet) throws IOException { + Scorer scorer = filteredScorer(context, weight, docIdSet); + if (scorer == null) { + return null; + } + // This impl always scores docs in order, so we can + // ignore scoreDocsInOrder: + return new Weight.DefaultBulkScorer(scorer); + } + } + + /** + * A {@link FilterStrategy} that conditionally uses a random access filter if + * the given {@link DocIdSet} supports random access (returns a non-null value + * from {@link DocIdSet#bits()}) and + * {@link RandomAccessFilterStrategy#useRandomAccess(Bits, int)} returns + * true. Otherwise this strategy falls back to a "zig-zag join" ( + * {@link FilteredQuery#LEAP_FROG_FILTER_FIRST_STRATEGY}) strategy . + */ + public static class RandomAccessFilterStrategy extends FilterStrategy { + + @Override + public Scorer filteredScorer(AtomicReaderContext context, Weight weight, DocIdSet docIdSet) throws IOException { + final DocIdSetIterator filterIter = docIdSet.iterator(); + if (filterIter == null) { + // this means the filter does not accept any documents. + return null; + } + + final int firstFilterDoc = filterIter.nextDoc(); + if (firstFilterDoc == DocIdSetIterator.NO_MORE_DOCS) { + return null; + } + + final Bits filterAcceptDocs = docIdSet.bits(); + // force if RA is requested + final boolean useRandomAccess = filterAcceptDocs != null && useRandomAccess(filterAcceptDocs, firstFilterDoc); + if (useRandomAccess) { + // if we are using random access, we return the inner scorer, just with other acceptDocs + return weight.scorer(context, filterAcceptDocs); + } else { + assert firstFilterDoc > -1; + // we are gonna advance() this scorer, so we set inorder=true/toplevel=false + // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice + final Scorer scorer = weight.scorer(context, null); + // TODO once we have way to figure out if we use RA or LeapFrog we can remove this scorer + return (scorer == null) ? null : new PrimaryAdvancedLeapFrogScorer(weight, firstFilterDoc, filterIter, scorer); + } + } + + /** + * Expert: decides if a filter should be executed as "random-access" or not. + * random-access means the filter "filters" in a similar way as deleted docs are filtered + * in Lucene. This is faster when the filter accepts many documents. + * However, when the filter is very sparse, it can be faster to execute the query+filter + * as a conjunction in some cases. + * + * The default implementation returns true if the first document accepted by the + * filter is < 100. + * + * @lucene.internal + */ + protected boolean useRandomAccess(Bits bits, int firstFilterDoc) { + //TODO once we have a cost API on filters and scorers we should rethink this heuristic + return firstFilterDoc < 100; + } + } + + private static final class LeapFrogFilterStrategy extends FilterStrategy { + + private final boolean scorerFirst; + + private LeapFrogFilterStrategy(boolean scorerFirst) { + this.scorerFirst = scorerFirst; + } + + @Override + public Scorer filteredScorer(AtomicReaderContext context, + Weight weight, DocIdSet docIdSet) throws IOException { + final DocIdSetIterator filterIter = docIdSet.iterator(); + if (filterIter == null) { + // this means the filter does not accept any documents. + return null; + } + // we pass null as acceptDocs, as our filter has already respected acceptDocs, no need to do twice + final Scorer scorer = weight.scorer(context, null); + if (scorer == null) { + return null; + } + + if (scorerFirst) { + return new LeapFrogScorer(weight, scorer, filterIter, scorer); + } else { + return new LeapFrogScorer(weight, filterIter, scorer, scorer); + } + } + } + + /** + * A filter strategy that advances the {@link Scorer} first and consults the + * {@link DocIdSet} for each matched document. + *

      + * Note: this strategy requires a {@link DocIdSet#bits()} to return a non-null value. Otherwise + * this strategy falls back to {@link FilteredQuery#LEAP_FROG_QUERY_FIRST_STRATEGY} + *

      + *

      + * Use this strategy if the filter computation is more expensive than document + * scoring or if the filter has a linear running time to compute the next + * matching doc like exact geo distances. + *

      + */ + private static final class QueryFirstFilterStrategy extends FilterStrategy { + @Override + public Scorer filteredScorer(final AtomicReaderContext context, + Weight weight, + DocIdSet docIdSet) throws IOException { + Bits filterAcceptDocs = docIdSet.bits(); + if (filterAcceptDocs == null) { + // Filter does not provide random-access Bits; we + // must fallback to leapfrog: + return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredScorer(context, weight, docIdSet); + } + final Scorer scorer = weight.scorer(context, null); + return scorer == null ? null : new QueryFirstScorer(weight, + filterAcceptDocs, scorer); + } + + @Override + public BulkScorer filteredBulkScorer(final AtomicReaderContext context, + Weight weight, + boolean scoreDocsInOrder, // ignored (we always top-score in order) + DocIdSet docIdSet) throws IOException { + Bits filterAcceptDocs = docIdSet.bits(); + if (filterAcceptDocs == null) { + // Filter does not provide random-access Bits; we + // must fallback to leapfrog: + return LEAP_FROG_QUERY_FIRST_STRATEGY.filteredBulkScorer(context, weight, scoreDocsInOrder, docIdSet); + } + final Scorer scorer = weight.scorer(context, null); + return scorer == null ? null : new QueryFirstBulkScorer(scorer, filterAcceptDocs); + } + } + } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FilteredTermEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/FuzzyQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/FuzzyQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/FuzzyQuery.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/FuzzyQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,74 +17,117 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import java.io.IOException; + +import org.apache.lucene.index.SingleTermsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.util.PriorityQueue; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.util.automaton.LevenshteinAutomata; -import java.io.IOException; - -/** Implements the fuzzy search query. The similiarity measurement - * is based on the Levenshtein (edit distance) algorithm. +/** Implements the fuzzy search query. The similarity measurement + * is based on the Damerau-Levenshtein (optimal string alignment) algorithm, + * though you can explicitly choose classic Levenshtein by passing false + * to the transpositions parameter. + * + *

      This query uses {@link MultiTermQuery.TopTermsScoringBooleanQueryRewrite} + * as default. So terms will be collected and scored according to their + * edit distance. Only the top terms are used for building the {@link BooleanQuery}. + * It is not recommended to change the rewrite mode for fuzzy queries. + * + *

      At most, this query will match terms up to + * {@value org.apache.lucene.util.automaton.LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE} edits. + * Higher distances (especially with transpositions enabled), are generally not useful and + * will match a significant amount of the term dictionary. If you really want this, consider + * using an n-gram indexing technique (such as the SpellChecker in the + * suggest module) instead. + * + *

      NOTE: terms of length 1 or 2 will sometimes not match because of how the scaled + * distance between two terms is computed. For a term to match, the edit distance between + * the terms must be less than the minimum length term (either the input term, or + * the candidate term). For example, FuzzyQuery on term "abcd" with maxEdits=2 will + * not match an indexed term "ab", and FuzzyQuery on term "a" with maxEdits=2 will not + * match an indexed term "abc". */ public class FuzzyQuery extends MultiTermQuery { - public final static float defaultMinSimilarity = 0.5f; + public final static int defaultMaxEdits = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE; public final static int defaultPrefixLength = 0; + public final static int defaultMaxExpansions = 50; + public final static boolean defaultTranspositions = true; - private float minimumSimilarity; - private int prefixLength; + private final int maxEdits; + private final int maxExpansions; + private final boolean transpositions; + private final int prefixLength; + private final Term term; /** - * Create a new FuzzyQuery that will match terms with a similarity - * of at least minimumSimilarity to term. + * Create a new FuzzyQuery that will match terms with an edit distance + * of at most maxEdits to term. * If a prefixLength > 0 is specified, a common prefix * of that length is also required. * * @param term the term to search for - * @param minimumSimilarity a value between 0 and 1 to set the required similarity - * between the query term and the matching terms. For example, for a - * minimumSimilarity of 0.5 a term of the same length - * as the query term is considered similar to the query term if the edit distance - * between both terms is less than length(term)*0.5 + * @param maxEdits must be >= 0 and <= {@link LevenshteinAutomata#MAXIMUM_SUPPORTED_DISTANCE}. * @param prefixLength length of common (non-fuzzy) prefix - * @throws IllegalArgumentException if minimumSimilarity is >= 1 or < 0 - * or if prefixLength < 0 + * @param maxExpansions the maximum number of terms to match. If this number is + * greater than {@link BooleanQuery#getMaxClauseCount} when the query is rewritten, + * then the maxClauseCount will be used instead. + * @param transpositions true if transpositions should be treated as a primitive + * edit operation. If this is false, comparisons will implement the classic + * Levenshtein algorithm. */ - public FuzzyQuery(Term term, float minimumSimilarity, int prefixLength) throws IllegalArgumentException { - super(term); + public FuzzyQuery(Term term, int maxEdits, int prefixLength, int maxExpansions, boolean transpositions) { + super(term.field()); - if (minimumSimilarity >= 1.0f) - throw new IllegalArgumentException("minimumSimilarity >= 1"); - else if (minimumSimilarity < 0.0f) - throw new IllegalArgumentException("minimumSimilarity < 0"); - if (prefixLength < 0) - throw new IllegalArgumentException("prefixLength < 0"); + if (maxEdits < 0 || maxEdits > LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE) { + throw new IllegalArgumentException("maxEdits must be between 0 and " + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); + } + if (prefixLength < 0) { + throw new IllegalArgumentException("prefixLength cannot be negative."); + } + if (maxExpansions <= 0) { + throw new IllegalArgumentException("maxExpansions must be positive."); + } - this.minimumSimilarity = minimumSimilarity; + this.term = term; + this.maxEdits = maxEdits; this.prefixLength = prefixLength; + this.transpositions = transpositions; + this.maxExpansions = maxExpansions; + setRewriteMethod(new MultiTermQuery.TopTermsScoringBooleanQueryRewrite(maxExpansions)); } /** - * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, minimumSimilarity, 0)}. + * Calls {@link #FuzzyQuery(Term, int, int, int, boolean) + * FuzzyQuery(term, maxEdits, prefixLength, defaultMaxExpansions, defaultTranspositions)}. */ - public FuzzyQuery(Term term, float minimumSimilarity) throws IllegalArgumentException { - this(term, minimumSimilarity, defaultPrefixLength); + public FuzzyQuery(Term term, int maxEdits, int prefixLength) { + this(term, maxEdits, prefixLength, defaultMaxExpansions, defaultTranspositions); } + + /** + * Calls {@link #FuzzyQuery(Term, int, int) FuzzyQuery(term, maxEdits, defaultPrefixLength)}. + */ + public FuzzyQuery(Term term, int maxEdits) { + this(term, maxEdits, defaultPrefixLength); + } /** - * Calls {@link #FuzzyQuery(Term, float) FuzzyQuery(term, 0.5f, 0)}. + * Calls {@link #FuzzyQuery(Term, int) FuzzyQuery(term, defaultMaxEdits)}. */ public FuzzyQuery(Term term) { - this(term, defaultMinSimilarity, defaultPrefixLength); + this(term, defaultMaxEdits); } /** - * Returns the minimum similarity that is required for this query to match. - * @return float value between 0.0 and 1.0 + * @return the maximum number of edit distances allowed for this query to match. */ - public float getMinSimilarity() { - return minimumSimilarity; + public int getMaxEdits() { + return maxEdits; } /** @@ -95,115 +138,105 @@ public int getPrefixLength() { return prefixLength; } - - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new FuzzyTermEnum(reader, getTerm(), minimumSimilarity, prefixLength); - } - public Query rewrite(IndexReader reader) throws IOException { - FilteredTermEnum enumerator = getEnum(reader); - int maxClauseCount = BooleanQuery.getMaxClauseCount(); - ScoreTermQueue stQueue = new ScoreTermQueue(maxClauseCount); - ScoreTerm reusableST = null; + /** + * Returns true if transpositions should be treated as a primitive edit operation. + * If this is false, comparisons will implement the classic Levenshtein algorithm. + */ + public boolean getTranspositions() { + return transpositions; + } - try { - do { - float score = 0.0f; - Term t = enumerator.term(); - if (t != null) { - score = enumerator.difference(); - if (reusableST == null) { - reusableST = new ScoreTerm(t, score); - } else if (score >= reusableST.score) { - // reusableST holds the last "rejected" entry, so, if - // this new score is not better than that, there's no - // need to try inserting it - reusableST.score = score; - reusableST.term = t; - } else { - continue; - } - - reusableST = (ScoreTerm) stQueue.insertWithOverflow(reusableST); - } - } while (enumerator.next()); - } finally { - enumerator.close(); + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + if (maxEdits == 0 || prefixLength >= term.text().length()) { // can only match if it's exact + return new SingleTermsEnum(terms.iterator(null), term.bytes()); } - - BooleanQuery query = new BooleanQuery(true); - int size = stQueue.size(); - for(int i = 0; i < size; i++){ - ScoreTerm st = (ScoreTerm) stQueue.pop(); - TermQuery tq = new TermQuery(st.term); // found a match - tq.setBoost(getBoost() * st.score); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - } - - return query; + return new FuzzyTermsEnum(terms, atts, getTerm(), maxEdits, prefixLength, transpositions); } + + /** + * Returns the pattern term. + */ + public Term getTerm() { + return term; + } + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); - Term term = getTerm(); + final StringBuilder buffer = new StringBuilder(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); } buffer.append(term.text()); buffer.append('~'); - buffer.append(Float.toString(minimumSimilarity)); + buffer.append(Integer.toString(maxEdits)); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } - protected static class ScoreTerm { - public Term term; - public float score; - - public ScoreTerm(Term term, float score){ - this.term = term; - this.score = score; - } + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + maxEdits; + result = prime * result + prefixLength; + result = prime * result + maxExpansions; + result = prime * result + (transpositions ? 0 : 1); + result = prime * result + ((term == null) ? 0 : term.hashCode()); + return result; } - - protected static class ScoreTermQueue extends PriorityQueue { - - public ScoreTermQueue(int size){ - initialize(size); - } - - /* (non-Javadoc) - * @see org.apache.lucene.util.PriorityQueue#lessThan(java.lang.Object, java.lang.Object) - */ - protected boolean lessThan(Object a, Object b) { - ScoreTerm termA = (ScoreTerm)a; - ScoreTerm termB = (ScoreTerm)b; - if (termA.score == termB.score) - return termA.term.compareTo(termB.term) > 0; - else - return termA.score < termB.score; - } - - } - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof FuzzyQuery)) return false; - if (!super.equals(o)) return false; - - final FuzzyQuery fuzzyQuery = (FuzzyQuery) o; - - if (minimumSimilarity != fuzzyQuery.minimumSimilarity) return false; - if (prefixLength != fuzzyQuery.prefixLength) return false; - + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + FuzzyQuery other = (FuzzyQuery) obj; + if (maxEdits != other.maxEdits) + return false; + if (prefixLength != other.prefixLength) + return false; + if (maxExpansions != other.maxExpansions) + return false; + if (transpositions != other.transpositions) + return false; + if (term == null) { + if (other.term != null) + return false; + } else if (!term.equals(other.term)) + return false; return true; } + + /** + * @deprecated pass integer edit distances instead. + */ + @Deprecated + public final static float defaultMinSimilarity = LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE; - public int hashCode() { - int result = super.hashCode(); - result = 29 * result + minimumSimilarity != +0.0f ? Float.floatToIntBits(minimumSimilarity) : 0; - result = 29 * result + prefixLength; - return result; + /** + * Helper function to convert from deprecated "minimumSimilarity" fractions + * to raw edit distances. + * + * @param minimumSimilarity scaled similarity + * @param termLen length (in unicode codepoints) of the term. + * @return equivalent number of maxEdits + * @deprecated pass integer edit distances instead. + */ + @Deprecated + public static int floatToEdits(float minimumSimilarity, int termLen) { + if (minimumSimilarity >= 1f) { + return (int) Math.min(minimumSimilarity, LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); + } else if (minimumSimilarity == 0.0f) { + return 0; // 0 means exact, not infinite # of edits! + } else { + return Math.min((int) ((1D-minimumSimilarity) * termLen), + LevenshteinAutomata.MAXIMUM_SUPPORTED_DISTANCE); + } } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FuzzyTermEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/FuzzyTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Hit.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/HitCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/HitIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/HitQueue.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/HitQueue.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/HitQueue.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/HitQueue.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,14 +19,61 @@ import org.apache.lucene.util.PriorityQueue; -final class HitQueue extends PriorityQueue { - HitQueue(int size) { - initialize(size); +final class HitQueue extends PriorityQueue { + + /** + * Creates a new instance with size elements. If + * prePopulate is set to true, the queue will pre-populate itself + * with sentinel objects and set its {@link #size()} to size. In + * that case, you should not rely on {@link #size()} to get the number of + * actual elements that were added to the queue, but keep track yourself.
      + * NOTE: in case prePopulate is true, you should pop + * elements from the queue using the following code example: + * + *

      +   * PriorityQueue<ScoreDoc> pq = new HitQueue(10, true); // pre-populate.
      +   * ScoreDoc top = pq.top();
      +   * 
      +   * // Add/Update one element.
      +   * top.score = 1.0f;
      +   * top.doc = 0;
      +   * top = (ScoreDoc) pq.updateTop();
      +   * int totalHits = 1;
      +   * 
      +   * // Now pop only the elements that were *truly* inserted.
      +   * // First, pop all the sentinel elements (there are pq.size() - totalHits).
      +   * for (int i = pq.size() - totalHits; i > 0; i--) pq.pop();
      +   * 
      +   * // Now pop the truly added elements.
      +   * ScoreDoc[] results = new ScoreDoc[totalHits];
      +   * for (int i = totalHits - 1; i >= 0; i--) {
      +   *   results[i] = (ScoreDoc) pq.pop();
      +   * }
      +   * 
      + * + *

      NOTE: This class pre-allocate a full array of + * length size. + * + * @param size + * the requested size of this queue. + * @param prePopulate + * specifies whether to pre-populate the queue with sentinel values. + * @see #getSentinelObject() + */ + HitQueue(int size, boolean prePopulate) { + super(size, prePopulate); } - protected final boolean lessThan(Object a, Object b) { - ScoreDoc hitA = (ScoreDoc)a; - ScoreDoc hitB = (ScoreDoc)b; + @Override + protected ScoreDoc getSentinelObject() { + // Always set the doc Id to MAX_VALUE so that it won't be favored by + // lessThan. This generally should not happen since if score is not NEG_INF, + // TopScoreDocCollector will always add the object to the queue. + return new ScoreDoc(Integer.MAX_VALUE, Float.NEGATIVE_INFINITY); + } + + @Override + protected final boolean lessThan(ScoreDoc hitA, ScoreDoc hitB) { if (hitA.score == hitB.score) return hitA.doc > hitB.doc; else Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Hits.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/IndexSearcher.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/IndexSearcher.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/IndexSearcher.java 17 Aug 2012 14:54:57 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/IndexSearcher.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,146 +17,620 @@ * limitations under the License. */ +import java.io.IOException; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CompletionService; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.Executor; +import java.util.concurrent.ExecutorCompletionService; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; + import org.apache.lucene.document.Document; -import org.apache.lucene.document.FieldSelector; -import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DirectoryReader; // javadocs import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.MultiFields; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.ReaderUtil; +import org.apache.lucene.index.StoredFieldVisitor; import org.apache.lucene.index.Term; -import org.apache.lucene.store.Directory; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.search.similarities.DefaultSimilarity; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.store.NIOFSDirectory; // javadoc +import org.apache.lucene.util.ThreadInterruptedException; +import org.apache.lucene.index.IndexWriter; // javadocs -import java.io.IOException; - /** Implements search over a single IndexReader. * - *

      Applications usually need only call the inherited {@link #search(Query)} - * or {@link #search(Query,Filter)} methods. For performance reasons it is - * recommended to open only one IndexSearcher and use it for all of your searches. + *

      Applications usually need only call the inherited + * {@link #search(Query,int)} + * or {@link #search(Query,Filter,int)} methods. For + * performance reasons, if your index is unchanging, you + * should share a single IndexSearcher instance across + * multiple searches instead of creating a new one + * per-search. If your index has changed and you wish to + * see the changes reflected in searching, you should + * use {@link DirectoryReader#openIfChanged(DirectoryReader)} + * to obtain a new reader and + * then create a new IndexSearcher from that. Also, for + * low-latency turnaround it's best to use a near-real-time + * reader ({@link DirectoryReader#open(IndexWriter,boolean)}). + * Once you have a new {@link IndexReader}, it's relatively + * cheap to create a new IndexSearcher from it. * - *

      Note that you can only access Hits from an IndexSearcher as long as it is - * not yet closed, otherwise an IOException will be thrown. + *

      NOTE: {@link + * IndexSearcher} instances are completely + * thread safe, meaning multiple threads can call any of its + * methods, concurrently. If your application requires + * external synchronization, you should not + * synchronize on the IndexSearcher instance; + * use your own (non-Lucene) objects instead.

      */ -public class IndexSearcher extends Searcher { - IndexReader reader; - private boolean closeReader; +public class IndexSearcher { + final IndexReader reader; // package private for testing! + + // NOTE: these members might change in incompatible ways + // in the next release + protected final IndexReaderContext readerContext; + protected final List leafContexts; + /** used with executor - each slice holds a set of leafs executed within one thread */ + protected final LeafSlice[] leafSlices; - /** Creates a searcher searching the index in the named directory. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error - */ - public IndexSearcher(String path) throws CorruptIndexException, IOException { - this(IndexReader.open(path), true); - } + // These are only used for multi-threaded search + private final ExecutorService executor; - /** Creates a searcher searching the index in the provided directory. - * @throws CorruptIndexException if the index is corrupt - * @throws IOException if there is a low-level IO error + // the default Similarity + private static final Similarity defaultSimilarity = new DefaultSimilarity(); + + /** + * Expert: returns a default Similarity instance. + * In general, this method is only called to initialize searchers and writers. + * User code and query implementations should respect + * {@link IndexSearcher#getSimilarity()}. + * @lucene.internal */ - public IndexSearcher(Directory directory) throws CorruptIndexException, IOException { - this(IndexReader.open(directory), true); + public static Similarity getDefaultSimilarity() { + return defaultSimilarity; } + + /** The Similarity implementation used by this searcher. */ + private Similarity similarity = defaultSimilarity; /** Creates a searcher searching the provided index. */ public IndexSearcher(IndexReader r) { - this(r, false); + this(r, null); } + + /** Runs searches for each segment separately, using the + * provided ExecutorService. IndexSearcher will not + * shutdown/awaitTermination this ExecutorService on + * close; you must do so, eventually, on your own. NOTE: + * if you are using {@link NIOFSDirectory}, do not use + * the shutdownNow method of ExecutorService as this uses + * Thread.interrupt under-the-hood which can silently + * close file descriptors (see LUCENE-2239). + * + * @lucene.experimental */ + public IndexSearcher(IndexReader r, ExecutorService executor) { + this(r.getContext(), executor); + } + + /** + * Creates a searcher searching the provided top-level {@link IndexReaderContext}. + *

      + * Given a non-null {@link ExecutorService} this method runs + * searches for each segment separately, using the provided ExecutorService. + * IndexSearcher will not shutdown/awaitTermination this ExecutorService on + * close; you must do so, eventually, on your own. NOTE: if you are using + * {@link NIOFSDirectory}, do not use the shutdownNow method of + * ExecutorService as this uses Thread.interrupt under-the-hood which can + * silently close file descriptors (see LUCENE-2239). + * + * @see IndexReaderContext + * @see IndexReader#getContext() + * @lucene.experimental + */ + public IndexSearcher(IndexReaderContext context, ExecutorService executor) { + assert context.isTopLevel: "IndexSearcher's ReaderContext must be topLevel for reader" + context.reader(); + reader = context.reader(); + this.executor = executor; + this.readerContext = context; + leafContexts = context.leaves(); + this.leafSlices = executor == null ? null : slices(leafContexts); + } + + /** + * Creates a searcher searching the provided top-level {@link IndexReaderContext}. + * + * @see IndexReaderContext + * @see IndexReader#getContext() + * @lucene.experimental + */ + public IndexSearcher(IndexReaderContext context) { + this(context, null); + } - private IndexSearcher(IndexReader r, boolean closeReader) { - reader = r; - this.closeReader = closeReader; + /** + * Expert: Creates an array of leaf slices each holding a subset of the given leaves. + * Each {@link LeafSlice} is executed in a single thread. By default there + * will be one {@link LeafSlice} per leaf ({@link AtomicReaderContext}). + */ + protected LeafSlice[] slices(List leaves) { + LeafSlice[] slices = new LeafSlice[leaves.size()]; + for (int i = 0; i < slices.length; i++) { + slices[i] = new LeafSlice(leaves.get(i)); + } + return slices; } + /** Return the {@link IndexReader} this searches. */ public IndexReader getIndexReader() { return reader; } + /** + * Sugar for .getIndexReader().document(docID) + * @see IndexReader#document(int) + */ + public Document doc(int docID) throws IOException { + return reader.document(docID); + } + + /** + * Sugar for .getIndexReader().document(docID, fieldVisitor) + * @see IndexReader#document(int, StoredFieldVisitor) + */ + public void doc(int docID, StoredFieldVisitor fieldVisitor) throws IOException { + reader.document(docID, fieldVisitor); + } + + /** + * Sugar for .getIndexReader().document(docID, fieldsToLoad) + * @see IndexReader#document(int, Set) + */ + public Document doc(int docID, Set fieldsToLoad) throws IOException { + return reader.document(docID, fieldsToLoad); + } + /** - * Note that the underlying IndexReader is not closed, if - * IndexSearcher was constructed with IndexSearcher(IndexReader r). - * If the IndexReader was supplied implicitly by specifying a directory, then - * the IndexReader gets closed. + * @deprecated Use {@link #doc(int, Set)} instead. */ - public void close() throws IOException { - if(closeReader) - reader.close(); + @Deprecated + public final Document document(int docID, Set fieldsToLoad) throws IOException { + return doc(docID, fieldsToLoad); } - // inherit javadoc - public int docFreq(Term term) throws IOException { - return reader.docFreq(term); + /** Expert: Set the Similarity implementation used by this IndexSearcher. + * + */ + public void setSimilarity(Similarity similarity) { + this.similarity = similarity; } - // inherit javadoc - public Document doc(int i) throws CorruptIndexException, IOException { - return reader.document(i); + public Similarity getSimilarity() { + return similarity; } - // inherit javadoc - public Document doc(int i, FieldSelector fieldSelector) throws CorruptIndexException, IOException { - return reader.document(i, fieldSelector); + /** @lucene.internal */ + protected Query wrapFilter(Query query, Filter filter) { + return (filter == null) ? query : new FilteredQuery(query, filter); } + + /** Finds the top n + * hits for query where all results are after a previous + * result (after). + *

      + * By passing the bottom result from a previous page as after, + * this method can be used for efficient 'deep-paging' across potentially + * large result sets. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs searchAfter(ScoreDoc after, Query query, int n) throws IOException { + return search(createNormalizedWeight(query), after, n); + } - // inherit javadoc - public int maxDoc() throws IOException { - return reader.maxDoc(); + /** Finds the top n + * hits for query, applying filter if non-null, + * where all results are after a previous result (after). + *

      + * By passing the bottom result from a previous page as after, + * this method can be used for efficient 'deep-paging' across potentially + * large result sets. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n) throws IOException { + return search(createNormalizedWeight(wrapFilter(query, filter)), after, n); } + + /** Finds the top n + * hits for query. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs search(Query query, int n) + throws IOException { + return search(query, null, n); + } - // inherit javadoc - public TopDocs search(Weight weight, Filter filter, final int nDocs) - throws IOException { - if (nDocs <= 0) // null might be returned from hq.top() below. - throw new IllegalArgumentException("nDocs must be > 0"); + /** Finds the top n + * hits for query, applying filter if non-null. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs search(Query query, Filter filter, int n) + throws IOException { + return search(createNormalizedWeight(wrapFilter(query, filter)), null, n); + } - TopDocCollector collector = new TopDocCollector(nDocs); - search(weight, filter, collector); - return collector.topDocs(); + /** Lower-level search API. + * + *

      {@link Collector#collect(int)} is called for every matching + * document. + * + * @param query to match documents + * @param filter if non-null, used to permit documents to be collected. + * @param results to receive hits + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public void search(Query query, Filter filter, Collector results) + throws IOException { + search(leafContexts, createNormalizedWeight(wrapFilter(query, filter)), results); } - // inherit javadoc - public TopFieldDocs search(Weight weight, Filter filter, final int nDocs, - Sort sort) - throws IOException { + /** Lower-level search API. + * + *

      {@link Collector#collect(int)} is called for every matching document. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public void search(Query query, Collector results) + throws IOException { + search(leafContexts, createNormalizedWeight(query), results); + } + + /** Search implementation with arbitrary sorting. Finds + * the top n hits for query, applying + * filter if non-null, and sorting the hits by the criteria in + * sort. + * + *

      NOTE: this does not compute scores by default; use + * {@link IndexSearcher#search(Query,Filter,int,Sort,boolean,boolean)} to + * control scoring. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopFieldDocs search(Query query, Filter filter, int n, + Sort sort) throws IOException { + return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, false, false); + } - TopFieldDocCollector collector = - new TopFieldDocCollector(reader, sort, nDocs); - search(weight, filter, collector); - return (TopFieldDocs)collector.topDocs(); + /** Search implementation with arbitrary sorting, plus + * control over whether hit scores and max score + * should be computed. Finds + * the top n hits for query, applying + * filter if non-null, and sorting the hits by the criteria in + * sort. If doDocScores is true + * then the score of each hit will be computed and + * returned. If doMaxScore is + * true then the maximum score over all + * collected hits will be computed. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopFieldDocs search(Query query, Filter filter, int n, + Sort sort, boolean doDocScores, boolean doMaxScore) throws IOException { + return search(createNormalizedWeight(wrapFilter(query, filter)), n, sort, doDocScores, doMaxScore); } - // inherit javadoc - public void search(Weight weight, Filter filter, - final HitCollector results) throws IOException { + /** Finds the top n + * hits for query, applying filter if non-null, + * where all results are after a previous result (after). + *

      + * By passing the bottom result from a previous page as after, + * this method can be used for efficient 'deep-paging' across potentially + * large result sets. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort) throws IOException { + if (after != null && !(after instanceof FieldDoc)) { + // TODO: if we fix type safety of TopFieldDocs we can + // remove this + throw new IllegalArgumentException("after must be a FieldDoc; got " + after); + } + return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true, false, false); + } - Scorer scorer = weight.scorer(reader); - if (scorer == null) - return; + /** + * Search implementation with arbitrary sorting and no filter. + * @param query The query to search for + * @param n Return only the top n results + * @param sort The {@link org.apache.lucene.search.Sort} object + * @return The top docs, sorted according to the supplied {@link org.apache.lucene.search.Sort} instance + * @throws IOException if there is a low-level I/O error + */ + public TopFieldDocs search(Query query, int n, + Sort sort) throws IOException { + return search(createNormalizedWeight(query), n, sort, false, false); + } - if (filter == null) { - scorer.score(results); - return; + /** Finds the top n + * hits for query where all results are after a previous + * result (after). + *

      + * By passing the bottom result from a previous page as after, + * this method can be used for efficient 'deep-paging' across potentially + * large result sets. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs searchAfter(ScoreDoc after, Query query, int n, Sort sort) throws IOException { + if (after != null && !(after instanceof FieldDoc)) { + // TODO: if we fix type safety of TopFieldDocs we can + // remove this + throw new IllegalArgumentException("after must be a FieldDoc; got " + after); } + return search(createNormalizedWeight(query), (FieldDoc) after, n, sort, true, false, false); + } - DocIdSetIterator filterDocIdIterator = filter.getDocIdSet(reader).iterator(); // CHECKME: use ConjunctionScorer here? + /** Finds the top n + * hits for query where all results are after a previous + * result (after), allowing control over + * whether hit scores and max score should be computed. + *

      + * By passing the bottom result from a previous page as after, + * this method can be used for efficient 'deep-paging' across potentially + * large result sets. If doDocScores is true + * then the score of each hit will be computed and + * returned. If doMaxScore is + * true then the maximum score over all + * collected hits will be computed. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + public TopDocs searchAfter(ScoreDoc after, Query query, Filter filter, int n, Sort sort, + boolean doDocScores, boolean doMaxScore) throws IOException { + if (after != null && !(after instanceof FieldDoc)) { + // TODO: if we fix type safety of TopFieldDocs we can + // remove this + throw new IllegalArgumentException("after must be a FieldDoc; got " + after); + } + return search(createNormalizedWeight(wrapFilter(query, filter)), (FieldDoc) after, n, sort, true, + doDocScores, doMaxScore); + } + + /** Expert: Low-level search implementation. Finds the top n + * hits for query, applying filter if non-null. + * + *

      Applications should usually call {@link IndexSearcher#search(Query,int)} or + * {@link IndexSearcher#search(Query,Filter,int)} instead. + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + protected TopDocs search(Weight weight, ScoreDoc after, int nDocs) throws IOException { + int limit = reader.maxDoc(); + if (limit == 0) { + limit = 1; + } + if (after != null && after.doc >= limit) { + throw new IllegalArgumentException("after.doc exceeds the number of documents in the reader: after.doc=" + + after.doc + " limit=" + limit); + } + nDocs = Math.min(nDocs, limit); - boolean more = filterDocIdIterator.next() && scorer.skipTo(filterDocIdIterator.doc()); + if (executor == null) { + return search(leafContexts, weight, after, nDocs); + } else { + final HitQueue hq = new HitQueue(nDocs, false); + final Lock lock = new ReentrantLock(); + final ExecutionHelper runner = new ExecutionHelper<>(executor); + + for (int i = 0; i < leafSlices.length; i++) { // search each sub + runner.submit(new SearcherCallableNoSort(lock, this, leafSlices[i], weight, after, nDocs, hq)); + } - while (more) { - int filterDocId = filterDocIdIterator.doc(); - if (filterDocId > scorer.doc() && !scorer.skipTo(filterDocId)) { - more = false; - } else { - int scorerDocId = scorer.doc(); - if (scorerDocId == filterDocId) { // permitted by filter - results.collect(scorerDocId, scorer.score()); - more = filterDocIdIterator.next(); - } else { - more = filterDocIdIterator.skipTo(scorerDocId); + int totalHits = 0; + float maxScore = Float.NEGATIVE_INFINITY; + for (final TopDocs topDocs : runner) { + if(topDocs.totalHits != 0) { + totalHits += topDocs.totalHits; + maxScore = Math.max(maxScore, topDocs.getMaxScore()); } } + + final ScoreDoc[] scoreDocs = new ScoreDoc[hq.size()]; + for (int i = hq.size() - 1; i >= 0; i--) // put docs in array + scoreDocs[i] = hq.pop(); + + return new TopDocs(totalHits, scoreDocs, maxScore); } } + /** Expert: Low-level search implementation. Finds the top n + * hits for query. + * + *

      Applications should usually call {@link IndexSearcher#search(Query,int)} or + * {@link IndexSearcher#search(Query,Filter,int)} instead. + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + protected TopDocs search(List leaves, Weight weight, ScoreDoc after, int nDocs) throws IOException { + // single thread + int limit = reader.maxDoc(); + if (limit == 0) { + limit = 1; + } + nDocs = Math.min(nDocs, limit); + TopScoreDocCollector collector = TopScoreDocCollector.create(nDocs, after, !weight.scoresDocsOutOfOrder()); + search(leaves, weight, collector); + return collector.topDocs(); + } + + /** Expert: Low-level search implementation with arbitrary + * sorting and control over whether hit scores and max + * score should be computed. Finds + * the top n hits for query and sorting the hits + * by the criteria in sort. + * + *

      Applications should usually call {@link + * IndexSearcher#search(Query,Filter,int,Sort)} instead. + * + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + protected TopFieldDocs search(Weight weight, + final int nDocs, Sort sort, + boolean doDocScores, boolean doMaxScore) throws IOException { + return search(weight, null, nDocs, sort, true, doDocScores, doMaxScore); + } + + /** + * Just like {@link #search(Weight, int, Sort, boolean, boolean)}, but you choose + * whether or not the fields in the returned {@link FieldDoc} instances should + * be set by specifying fillFields. + */ + protected TopFieldDocs search(Weight weight, FieldDoc after, int nDocs, + Sort sort, boolean fillFields, + boolean doDocScores, boolean doMaxScore) + throws IOException { + + if (sort == null) throw new NullPointerException("Sort must not be null"); + + int limit = reader.maxDoc(); + if (limit == 0) { + limit = 1; + } + nDocs = Math.min(nDocs, limit); + + if (executor == null) { + // use all leaves here! + return search(leafContexts, weight, after, nDocs, sort, fillFields, doDocScores, doMaxScore); + } else { + final TopFieldCollector topCollector = TopFieldCollector.create(sort, nDocs, + after, + fillFields, + doDocScores, + doMaxScore, + false); + + final Lock lock = new ReentrantLock(); + final ExecutionHelper runner = new ExecutionHelper<>(executor); + for (int i = 0; i < leafSlices.length; i++) { // search each leaf slice + runner.submit( + new SearcherCallableWithSort(lock, this, leafSlices[i], weight, after, nDocs, topCollector, sort, doDocScores, doMaxScore)); + } + int totalHits = 0; + float maxScore = Float.NEGATIVE_INFINITY; + for (final TopFieldDocs topFieldDocs : runner) { + if (topFieldDocs.totalHits != 0) { + totalHits += topFieldDocs.totalHits; + maxScore = Math.max(maxScore, topFieldDocs.getMaxScore()); + } + } + + final TopFieldDocs topDocs = (TopFieldDocs) topCollector.topDocs(); + + return new TopFieldDocs(totalHits, topDocs.scoreDocs, topDocs.fields, topDocs.getMaxScore()); + } + } + + + /** + * Just like {@link #search(Weight, int, Sort, boolean, boolean)}, but you choose + * whether or not the fields in the returned {@link FieldDoc} instances should + * be set by specifying fillFields. + */ + protected TopFieldDocs search(List leaves, Weight weight, FieldDoc after, int nDocs, + Sort sort, boolean fillFields, boolean doDocScores, boolean doMaxScore) throws IOException { + // single thread + int limit = reader.maxDoc(); + if (limit == 0) { + limit = 1; + } + nDocs = Math.min(nDocs, limit); + + TopFieldCollector collector = TopFieldCollector.create(sort, nDocs, after, + fillFields, doDocScores, + doMaxScore, !weight.scoresDocsOutOfOrder()); + search(leaves, weight, collector); + return (TopFieldDocs) collector.topDocs(); + } + + /** + * Lower-level search API. + * + *

      + * {@link Collector#collect(int)} is called for every document.
      + * + *

      + * NOTE: this method executes the searches on all given leaves exclusively. + * To search across all the searchers leaves use {@link #leafContexts}. + * + * @param leaves + * the searchers leaves to execute the searches on + * @param weight + * to match documents + * @param collector + * to receive hits + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + protected void search(List leaves, Weight weight, Collector collector) + throws IOException { + + // TODO: should we make this + // threaded...? the Collector could be sync'd? + // always use single thread: + for (AtomicReaderContext ctx : leaves) { // search each subreader + try { + collector.setNextReader(ctx); + } catch (CollectionTerminatedException e) { + // there is no doc of interest in this reader context + // continue with the following leaf + continue; + } + BulkScorer scorer = weight.bulkScorer(ctx, !collector.acceptsDocsOutOfOrder(), ctx.reader().getLiveDocs()); + if (scorer != null) { + try { + scorer.score(collector); + } catch (CollectionTerminatedException e) { + // collection was terminated prematurely + // continue with the following leaf + } + } + } + } + + /** Expert: called to re-write queries into primitive queries. + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ public Query rewrite(Query original) throws IOException { Query query = original; for (Query rewrittenQuery = query.rewrite(reader); rewrittenQuery != query; @@ -166,7 +640,278 @@ return query; } - public Explanation explain(Weight weight, int doc) throws IOException { - return weight.explain(reader, doc); + /** Returns an Explanation that describes how doc scored against + * query. + * + *

      This is intended to be used in developing Similarity implementations, + * and, for good performance, should not be displayed with every hit. + * Computing an explanation is as expensive as executing the query over the + * entire index. + */ + public Explanation explain(Query query, int doc) throws IOException { + return explain(createNormalizedWeight(query), doc); } + + /** Expert: low-level implementation method + * Returns an Explanation that describes how doc scored against + * weight. + * + *

      This is intended to be used in developing Similarity implementations, + * and, for good performance, should not be displayed with every hit. + * Computing an explanation is as expensive as executing the query over the + * entire index. + *

      Applications should call {@link IndexSearcher#explain(Query, int)}. + * @throws BooleanQuery.TooManyClauses If a query would exceed + * {@link BooleanQuery#getMaxClauseCount()} clauses. + */ + protected Explanation explain(Weight weight, int doc) throws IOException { + int n = ReaderUtil.subIndex(doc, leafContexts); + final AtomicReaderContext ctx = leafContexts.get(n); + int deBasedDoc = doc - ctx.docBase; + + return weight.explain(ctx, deBasedDoc); + } + + /** + * Creates a normalized weight for a top-level {@link Query}. + * The query is rewritten by this method and {@link Query#createWeight} called, + * afterwards the {@link Weight} is normalized. The returned {@code Weight} + * can then directly be used to get a {@link Scorer}. + * @lucene.internal + */ + public Weight createNormalizedWeight(Query query) throws IOException { + query = rewrite(query); + Weight weight = query.createWeight(this); + float v = weight.getValueForNormalization(); + float norm = getSimilarity().queryNorm(v); + if (Float.isInfinite(norm) || Float.isNaN(norm)) { + norm = 1.0f; + } + weight.normalize(norm, 1.0f); + return weight; + } + + /** + * Returns this searchers the top-level {@link IndexReaderContext}. + * @see IndexReader#getContext() + */ + /* sugar for #getReader().getTopReaderContext() */ + public IndexReaderContext getTopReaderContext() { + return readerContext; + } + + /** + * A thread subclass for searching a single searchable + */ + private static final class SearcherCallableNoSort implements Callable { + + private final Lock lock; + private final IndexSearcher searcher; + private final Weight weight; + private final ScoreDoc after; + private final int nDocs; + private final HitQueue hq; + private final LeafSlice slice; + + public SearcherCallableNoSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight, + ScoreDoc after, int nDocs, HitQueue hq) { + this.lock = lock; + this.searcher = searcher; + this.weight = weight; + this.after = after; + this.nDocs = nDocs; + this.hq = hq; + this.slice = slice; + } + + @Override + public TopDocs call() throws IOException { + final TopDocs docs = searcher.search(Arrays.asList(slice.leaves), weight, after, nDocs); + final ScoreDoc[] scoreDocs = docs.scoreDocs; + //it would be so nice if we had a thread-safe insert + lock.lock(); + try { + for (int j = 0; j < scoreDocs.length; j++) { // merge scoreDocs into hq + final ScoreDoc scoreDoc = scoreDocs[j]; + if (scoreDoc == hq.insertWithOverflow(scoreDoc)) { + break; + } + } + } finally { + lock.unlock(); + } + return docs; + } + } + + + /** + * A thread subclass for searching a single searchable + */ + private static final class SearcherCallableWithSort implements Callable { + + private final Lock lock; + private final IndexSearcher searcher; + private final Weight weight; + private final int nDocs; + private final TopFieldCollector hq; + private final Sort sort; + private final LeafSlice slice; + private final FieldDoc after; + private final boolean doDocScores; + private final boolean doMaxScore; + + public SearcherCallableWithSort(Lock lock, IndexSearcher searcher, LeafSlice slice, Weight weight, + FieldDoc after, int nDocs, TopFieldCollector hq, Sort sort, + boolean doDocScores, boolean doMaxScore) { + this.lock = lock; + this.searcher = searcher; + this.weight = weight; + this.nDocs = nDocs; + this.hq = hq; + this.sort = sort; + this.slice = slice; + this.after = after; + this.doDocScores = doDocScores; + this.doMaxScore = doMaxScore; + } + + private final FakeScorer fakeScorer = new FakeScorer(); + + @Override + public TopFieldDocs call() throws IOException { + assert slice.leaves.length == 1; + final TopFieldDocs docs = searcher.search(Arrays.asList(slice.leaves), + weight, after, nDocs, sort, true, doDocScores || sort.needsScores(), doMaxScore); + lock.lock(); + try { + final AtomicReaderContext ctx = slice.leaves[0]; + final int base = ctx.docBase; + hq.setNextReader(ctx); + hq.setScorer(fakeScorer); + for(ScoreDoc scoreDoc : docs.scoreDocs) { + fakeScorer.doc = scoreDoc.doc - base; + fakeScorer.score = scoreDoc.score; + hq.collect(scoreDoc.doc-base); + } + + // Carry over maxScore from sub: + if (doMaxScore && docs.getMaxScore() > hq.maxScore) { + hq.maxScore = docs.getMaxScore(); + } + } finally { + lock.unlock(); + } + return docs; + } + } + + /** + * A helper class that wraps a {@link CompletionService} and provides an + * iterable interface to the completed {@link Callable} instances. + * + * @param + * the type of the {@link Callable} return value + */ + private static final class ExecutionHelper implements Iterator, Iterable { + private final CompletionService service; + private int numTasks; + + ExecutionHelper(final Executor executor) { + this.service = new ExecutorCompletionService<>(executor); + } + + @Override + public boolean hasNext() { + return numTasks > 0; + } + + public void submit(Callable task) { + this.service.submit(task); + ++numTasks; + } + + @Override + public T next() { + if(!this.hasNext()) + throw new NoSuchElementException("next() is called but hasNext() returned false"); + try { + return service.take().get(); + } catch (InterruptedException e) { + throw new ThreadInterruptedException(e); + } catch (ExecutionException e) { + throw new RuntimeException(e); + } finally { + --numTasks; + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + + @Override + public Iterator iterator() { + // use the shortcut here - this is only used in a private context + return this; + } + } + + /** + * A class holding a subset of the {@link IndexSearcher}s leaf contexts to be + * executed within a single thread. + * + * @lucene.experimental + */ + public static class LeafSlice { + final AtomicReaderContext[] leaves; + + public LeafSlice(AtomicReaderContext... leaves) { + this.leaves = leaves; + } + } + + @Override + public String toString() { + return "IndexSearcher(" + reader + "; executor=" + executor + ")"; + } + + /** + * Returns {@link TermStatistics} for a term. + * + * This can be overridden for example, to return a term's statistics + * across a distributed collection. + * @lucene.experimental + */ + public TermStatistics termStatistics(Term term, TermContext context) throws IOException { + return new TermStatistics(term.bytes(), context.docFreq(), context.totalTermFreq()); + } + + /** + * Returns {@link CollectionStatistics} for a field. + * + * This can be overridden for example, to return a field's statistics + * across a distributed collection. + * @lucene.experimental + */ + public CollectionStatistics collectionStatistics(String field) throws IOException { + final int docCount; + final long sumTotalTermFreq; + final long sumDocFreq; + + assert field != null; + + Terms terms = MultiFields.getTerms(reader, field); + if (terms == null) { + docCount = 0; + sumTotalTermFreq = 0; + sumDocFreq = 0; + } else { + docCount = terms.getDocCount(); + sumTotalTermFreq = terms.getSumTotalTermFreq(); + sumDocFreq = terms.getSumDocFreq(); + } + return new CollectionStatistics(field, reader.maxDoc(), docCount, sumTotalTermFreq, sumDocFreq); + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/LiveFieldValues.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/MatchAllDocsQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/MatchAllDocsQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/MatchAllDocsQuery.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/MatchAllDocsQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,109 +17,112 @@ * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.Query; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Searcher; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.Weight; +import org.apache.lucene.index.Term; import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.util.Bits; import java.util.Set; +import java.io.IOException; /** * A query that matches all documents. * */ public class MatchAllDocsQuery extends Query { - public MatchAllDocsQuery() { - } - private class MatchAllScorer extends Scorer { - - final IndexReader reader; - int id; - final int maxId; final float score; + private int doc = -1; + private final int maxDoc; + private final Bits liveDocs; - MatchAllScorer(IndexReader reader, Similarity similarity, Weight w) { - super(similarity); - this.reader = reader; - id = -1; - maxId = reader.maxDoc() - 1; - score = w.getValue(); + MatchAllScorer(IndexReader reader, Bits liveDocs, Weight w, float score) { + super(w); + this.liveDocs = liveDocs; + this.score = score; + maxDoc = reader.maxDoc(); } - public Explanation explain(int doc) { - return null; // not called... see MatchAllDocsWeight.explain() + @Override + public int docID() { + return doc; } - public int doc() { - return id; - } - - public boolean next() { - while (id < maxId) { - id++; - if (!reader.isDeleted(id)) { - return true; - } + @Override + public int nextDoc() throws IOException { + doc++; + while(liveDocs != null && doc < maxDoc && !liveDocs.get(doc)) { + doc++; } - return false; + if (doc == maxDoc) { + doc = NO_MORE_DOCS; + } + return doc; } - + + @Override public float score() { return score; } - public boolean skipTo(int target) { - id = target - 1; - return next(); + @Override + public int freq() { + return 1; } + @Override + public int advance(int target) throws IOException { + doc = target-1; + return nextDoc(); + } + + @Override + public long cost() { + return maxDoc; + } } - private class MatchAllDocsWeight implements Weight { - private Similarity similarity; + private class MatchAllDocsWeight extends Weight { private float queryWeight; private float queryNorm; - public MatchAllDocsWeight(Searcher searcher) { - this.similarity = searcher.getSimilarity(); + public MatchAllDocsWeight(IndexSearcher searcher) { } + @Override public String toString() { return "weight(" + MatchAllDocsQuery.this + ")"; } + @Override public Query getQuery() { return MatchAllDocsQuery.this; } - public float getValue() { - return queryWeight; - } - - public float sumOfSquaredWeights() { + @Override + public float getValueForNormalization() { queryWeight = getBoost(); return queryWeight * queryWeight; } - public void normalize(float queryNorm) { - this.queryNorm = queryNorm; + @Override + public void normalize(float queryNorm, float topLevelBoost) { + this.queryNorm = queryNorm * topLevelBoost; queryWeight *= this.queryNorm; } - public Scorer scorer(IndexReader reader) { - return new MatchAllScorer(reader, similarity, this); + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + return new MatchAllScorer(context.reader(), acceptDocs, this, queryWeight); } - public Explanation explain(IndexReader reader, int doc) { + @Override + public Explanation explain(AtomicReaderContext context, int doc) { // explain query weight Explanation queryExpl = new ComplexExplanation - (true, getValue(), "MatchAllDocsQuery, product of:"); + (true, queryWeight, "MatchAllDocsQuery, product of:"); if (getBoost() != 1.0f) { queryExpl.addDetail(new Explanation(getBoost(),"boost")); } @@ -129,27 +132,32 @@ } } - protected Weight createWeight(Searcher searcher) { + @Override + public Weight createWeight(IndexSearcher searcher) { return new MatchAllDocsWeight(searcher); } - public void extractTerms(Set terms) { + @Override + public void extractTerms(Set terms) { } + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); - buffer.append("MatchAllDocsQuery"); + StringBuilder buffer = new StringBuilder(); + buffer.append("*:*"); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } + @Override public boolean equals(Object o) { if (!(o instanceof MatchAllDocsQuery)) return false; MatchAllDocsQuery other = (MatchAllDocsQuery) o; return this.getBoost() == other.getBoost(); } + @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ 0x1AA71190; } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/MaxNonCompetitiveBoostAttribute.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/MaxNonCompetitiveBoostAttributeImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/MinShouldMatchSumScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/MultiCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/MultiPhraseQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/MultiPhraseQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/MultiPhraseQuery.java 17 Aug 2012 14:54:57 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/MultiPhraseQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,11 +20,23 @@ import java.io.IOException; import java.util.*; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.MultipleTermPositions; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; -import org.apache.lucene.search.Query; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; /** @@ -35,19 +47,23 @@ * prefix using IndexReader.terms(Term), and use MultiPhraseQuery.add(Term[] * terms) to add them to the query. * - * @version 1.0 */ public class MultiPhraseQuery extends Query { private String field; - private ArrayList termArrays = new ArrayList(); - private ArrayList positions = new ArrayList(); + private ArrayList termArrays = new ArrayList<>(); + private ArrayList positions = new ArrayList<>(); private int slop = 0; /** Sets the phrase slop for this query. * @see PhraseQuery#setSlop(int) */ - public void setSlop(int s) { slop = s; } + public void setSlop(int s) { + if (s < 0) { + throw new IllegalArgumentException("slop value cannot be negative"); + } + slop = s; + } /** Sets the phrase slop for this query. * @see PhraseQuery#getSlop() @@ -67,7 +83,7 @@ public void add(Term[] terms) { int position = 0; if (positions.size() > 0) - position = ((Integer) positions.get(positions.size()-1)).intValue() + 1; + position = positions.get(positions.size()-1).intValue() + 1; add(terms, position); } @@ -76,31 +92,29 @@ * Allows to specify the relative position of terms within the phrase. * * @see PhraseQuery#add(Term, int) - * @param terms - * @param position */ public void add(Term[] terms, int position) { if (termArrays.size() == 0) field = terms[0].field(); for (int i = 0; i < terms.length; i++) { - if (terms[i].field() != field) { + if (!terms[i].field().equals(field)) { throw new IllegalArgumentException( "All phrase terms must be in the same field (" + field + "): " + terms[i]); } } termArrays.add(terms); - positions.add(new Integer(position)); + positions.add(Integer.valueOf(position)); } /** - * Returns a List of the terms in the multiphrase. + * Returns a List of the terms in the multiphrase. * Do not modify the List or its contents. */ - public List getTermArrays() { - return Collections.unmodifiableList(termArrays); + public List getTermArrays() { + return Collections.unmodifiableList(termArrays); } /** @@ -109,148 +123,168 @@ public int[] getPositions() { int[] result = new int[positions.size()]; for (int i = 0; i < positions.size(); i++) - result[i] = ((Integer) positions.get(i)).intValue(); + result[i] = positions.get(i).intValue(); return result; } // inherit javadoc - public void extractTerms(Set terms) { - for (Iterator iter = termArrays.iterator(); iter.hasNext();) { - Term[] arr = (Term[])iter.next(); - for (int i=0; i terms) { + for (final Term[] arr : termArrays) { + for (final Term term: arr) { + terms.add(term); } } } - private class MultiPhraseWeight implements Weight { - private Similarity similarity; - private float value; - private float idf; - private float queryNorm; - private float queryWeight; + private class MultiPhraseWeight extends Weight { + private final Similarity similarity; + private final Similarity.SimWeight stats; + private final Map termContexts = new HashMap<>(); - public MultiPhraseWeight(Searcher searcher) + public MultiPhraseWeight(IndexSearcher searcher) throws IOException { - this.similarity = getSimilarity(searcher); - + this.similarity = searcher.getSimilarity(); + final IndexReaderContext context = searcher.getTopReaderContext(); + // compute idf - Iterator i = termArrays.iterator(); - while (i.hasNext()) { - Term[] terms = (Term[])i.next(); - for (int j=0; j allTermStats = new ArrayList<>(); + for(final Term[] terms: termArrays) { + for (Term term: terms) { + TermContext termContext = termContexts.get(term); + if (termContext == null) { + termContext = TermContext.build(context, term); + termContexts.put(term, termContext); + } + allTermStats.add(searcher.termStatistics(term, termContext)); } } + stats = similarity.computeWeight(getBoost(), + searcher.collectionStatistics(field), + allTermStats.toArray(new TermStatistics[allTermStats.size()])); } + @Override public Query getQuery() { return MultiPhraseQuery.this; } - public float getValue() { return value; } - public float sumOfSquaredWeights() { - queryWeight = idf * getBoost(); // compute query weight - return queryWeight * queryWeight; // square it + @Override + public float getValueForNormalization() { + return stats.getValueForNormalization(); } - public void normalize(float queryNorm) { - this.queryNorm = queryNorm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + @Override + public void normalize(float queryNorm, float topLevelBoost) { + stats.normalize(queryNorm, topLevelBoost); } - public Scorer scorer(IndexReader reader) throws IOException { - if (termArrays.size() == 0) // optimize zero-term case - return null; + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + assert !termArrays.isEmpty(); + final AtomicReader reader = context.reader(); + final Bits liveDocs = acceptDocs; + + PhraseQuery.PostingsAndFreq[] postingsFreqs = new PhraseQuery.PostingsAndFreq[termArrays.size()]; - TermPositions[] tps = new TermPositions[termArrays.size()]; - for (int i=0; i 1) - p = new MultipleTermPositions(reader, terms); - else - p = reader.termPositions(terms[0]); - - if (p == null) - return null; - - tps[i] = p; + final Terms fieldTerms = reader.terms(field); + if (fieldTerms == null) { + return null; } - if (slop == 0) - return new ExactPhraseScorer(this, tps, getPositions(), similarity, - reader.norms(field)); - else - return new SloppyPhraseScorer(this, tps, getPositions(), similarity, - slop, reader.norms(field)); - } + // Reuse single TermsEnum below: + final TermsEnum termsEnum = fieldTerms.iterator(null); - public Explanation explain(IndexReader reader, int doc) - throws IOException { - ComplexExplanation result = new ComplexExplanation(); - result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); + for (int pos=0; pos 1) { + postingsEnum = new UnionDocsAndPositionsEnum(liveDocs, context, terms, termContexts, termsEnum); - Explanation boostExpl = new Explanation(getBoost(), "boost"); - if (getBoost() != 1.0f) - queryExpl.addDetail(boostExpl); + // coarse -- this overcounts since a given doc can + // have more than one term: + docFreq = 0; + for(int termIdx=0;termIdx i = termArrays.iterator(); + int lastPos = -1; + boolean first = true; while (i.hasNext()) { - Term[] terms = (Term[])i.next(); + Term[] terms = i.next(); + int position = positions.get(k); + if (first) { + first = false; + } else { + buffer.append(" "); + for (int j=1; j<(position-lastPos); j++) { + buffer.append("? "); + } + } if (terms.length > 1) { buffer.append("("); for (int j = 0; j < terms.length; j++) { @@ -289,8 +337,8 @@ } else { buffer.append(terms[0].text()); } - if (i.hasNext()) - buffer.append(" "); + lastPos = position; + ++k; } buffer.append("\""); @@ -306,21 +354,226 @@ /** Returns true if o is equal to this. */ + @Override public boolean equals(Object o) { if (!(o instanceof MultiPhraseQuery)) return false; MultiPhraseQuery other = (MultiPhraseQuery)o; return this.getBoost() == other.getBoost() && this.slop == other.slop - && this.termArrays.equals(other.termArrays) + && termArraysEquals(this.termArrays, other.termArrays) && this.positions.equals(other.positions); } /** Returns a hash code value for this object.*/ + @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ slop - ^ termArrays.hashCode() + ^ termArraysHashCode() ^ positions.hashCode() ^ 0x4AC65113; } + + // Breakout calculation of the termArrays hashcode + private int termArraysHashCode() { + int hashCode = 1; + for (final Term[] termArray: termArrays) { + hashCode = 31 * hashCode + + (termArray == null ? 0 : Arrays.hashCode(termArray)); + } + return hashCode; + } + + // Breakout calculation of the termArrays equals + private boolean termArraysEquals(List termArrays1, List termArrays2) { + if (termArrays1.size() != termArrays2.size()) { + return false; + } + ListIterator iterator1 = termArrays1.listIterator(); + ListIterator iterator2 = termArrays2.listIterator(); + while (iterator1.hasNext()) { + Term[] termArray1 = iterator1.next(); + Term[] termArray2 = iterator2.next(); + if (!(termArray1 == null ? termArray2 == null : Arrays.equals(termArray1, + termArray2))) { + return false; + } + } + return true; + } } + +/** + * Takes the logical union of multiple DocsEnum iterators. + */ + +// TODO: if ever we allow subclassing of the *PhraseScorer +class UnionDocsAndPositionsEnum extends DocsAndPositionsEnum { + + private static final class DocsQueue extends PriorityQueue { + DocsQueue(List docsEnums) throws IOException { + super(docsEnums.size()); + + Iterator i = docsEnums.iterator(); + while (i.hasNext()) { + DocsAndPositionsEnum postings = i.next(); + if (postings.nextDoc() != DocIdSetIterator.NO_MORE_DOCS) { + add(postings); + } + } + } + + @Override + public final boolean lessThan(DocsAndPositionsEnum a, DocsAndPositionsEnum b) { + return a.docID() < b.docID(); + } + } + + private static final class IntQueue { + private int _arraySize = 16; + private int _index = 0; + private int _lastIndex = 0; + private int[] _array = new int[_arraySize]; + + final void add(int i) { + if (_lastIndex == _arraySize) + growArray(); + + _array[_lastIndex++] = i; + } + + final int next() { + return _array[_index++]; + } + + final void sort() { + Arrays.sort(_array, _index, _lastIndex); + } + + final void clear() { + _index = 0; + _lastIndex = 0; + } + + final int size() { + return (_lastIndex - _index); + } + + private void growArray() { + int[] newArray = new int[_arraySize * 2]; + System.arraycopy(_array, 0, newArray, 0, _arraySize); + _array = newArray; + _arraySize *= 2; + } + } + + private int _doc = -1; + private int _freq; + private DocsQueue _queue; + private IntQueue _posList; + private long cost; + + public UnionDocsAndPositionsEnum(Bits liveDocs, AtomicReaderContext context, Term[] terms, Map termContexts, TermsEnum termsEnum) throws IOException { + List docsEnums = new LinkedList<>(); + for (int i = 0; i < terms.length; i++) { + final Term term = terms[i]; + TermState termState = termContexts.get(term).get(context.ord); + if (termState == null) { + // Term doesn't exist in reader + continue; + } + termsEnum.seekExact(term.bytes(), termState); + DocsAndPositionsEnum postings = termsEnum.docsAndPositions(liveDocs, null, DocsEnum.FLAG_NONE); + if (postings == null) { + // term does exist, but has no positions + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run PhraseQuery (term=" + term.text() + ")"); + } + cost += postings.cost(); + docsEnums.add(postings); + } + + _queue = new DocsQueue(docsEnums); + _posList = new IntQueue(); + } + + @Override + public final int nextDoc() throws IOException { + if (_queue.size() == 0) { + return NO_MORE_DOCS; + } + + // TODO: move this init into positions(): if the search + // doesn't need the positions for this doc then don't + // waste CPU merging them: + _posList.clear(); + _doc = _queue.top().docID(); + + // merge sort all positions together + DocsAndPositionsEnum postings; + do { + postings = _queue.top(); + + final int freq = postings.freq(); + for (int i = 0; i < freq; i++) { + _posList.add(postings.nextPosition()); + } + + if (postings.nextDoc() != NO_MORE_DOCS) { + _queue.updateTop(); + } else { + _queue.pop(); + } + } while (_queue.size() > 0 && _queue.top().docID() == _doc); + + _posList.sort(); + _freq = _posList.size(); + + return _doc; + } + + @Override + public int nextPosition() { + return _posList.next(); + } + + @Override + public int startOffset() { + return -1; + } + + @Override + public int endOffset() { + return -1; + } + + @Override + public BytesRef getPayload() { + return null; + } + + @Override + public final int advance(int target) throws IOException { + while (_queue.top() != null && target > _queue.top().docID()) { + DocsAndPositionsEnum postings = _queue.pop(); + if (postings.advance(target) != NO_MORE_DOCS) { + _queue.add(postings); + } + } + return nextDoc(); + } + + @Override + public final int freq() { + return _freq; + } + + @Override + public final int docID() { + return _doc; + } + + @Override + public long cost() { + return cost; + } +} Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/MultiSearcher.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/MultiTermQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/MultiTermQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/MultiTermQuery.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/MultiTermQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,80 +19,315 @@ import java.io.IOException; +import org.apache.lucene.index.FilteredTermsEnum; // javadocs import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.SingleTermsEnum; // javadocs import org.apache.lucene.index.Term; -import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.AttributeSource; /** - * A {@link Query} that matches documents containing a subset of terms provided - * by a {@link FilteredTermEnum} enumeration. - *

      - * MultiTermQuery is not designed to be used by itself. - *
      - * The reason being that it is not intialized with a {@link FilteredTermEnum} - * enumeration. A {@link FilteredTermEnum} enumeration needs to be provided. - *

      - * For example, {@link WildcardQuery} and {@link FuzzyQuery} extend - * MultiTermQuery to provide {@link WildcardTermEnum} and - * {@link FuzzyTermEnum}, respectively. + * An abstract {@link Query} that matches documents + * containing a subset of terms provided by a {@link + * FilteredTermsEnum} enumeration. + * + *

      This query cannot be used directly; you must subclass + * it and define {@link #getTermsEnum(Terms,AttributeSource)} to provide a {@link + * FilteredTermsEnum} that iterates through the terms to be + * matched. + * + *

      NOTE: if {@link #setRewriteMethod} is either + * {@link #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} or {@link + * #SCORING_BOOLEAN_QUERY_REWRITE}, you may encounter a + * {@link BooleanQuery.TooManyClauses} exception during + * searching, which happens when the number of terms to be + * searched exceeds {@link + * BooleanQuery#getMaxClauseCount()}. Setting {@link + * #setRewriteMethod} to {@link #CONSTANT_SCORE_FILTER_REWRITE} + * prevents this. + * + *

      The recommended rewrite method is {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT}: it doesn't spend CPU + * computing unhelpful scores, and it tries to pick the most + * performant rewrite method given the query. If you + * need scoring (like {@link FuzzyQuery}, use + * {@link TopTermsScoringBooleanQueryRewrite} which uses + * a priority queue to only collect competitive terms + * and not hit this limitation. + * + * Note that org.apache.lucene.queryparser.classic.QueryParser produces + * MultiTermQueries using {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} by default. */ public abstract class MultiTermQuery extends Query { - private Term term; + protected final String field; + protected RewriteMethod rewriteMethod = CONSTANT_SCORE_AUTO_REWRITE_DEFAULT; - /** Constructs a query for terms matching term. */ - public MultiTermQuery(Term term) { - this.term = term; + /** Abstract class that defines how the query is rewritten. */ + public static abstract class RewriteMethod { + public abstract Query rewrite(IndexReader reader, MultiTermQuery query) throws IOException; + /** + * Returns the {@link MultiTermQuery}s {@link TermsEnum} + * @see MultiTermQuery#getTermsEnum(Terms, AttributeSource) + */ + protected TermsEnum getTermsEnum(MultiTermQuery query, Terms terms, AttributeSource atts) throws IOException { + return query.getTermsEnum(terms, atts); // allow RewriteMethod subclasses to pull a TermsEnum from the MTQ } + } - /** Returns the pattern term. */ - public Term getTerm() { return term; } + /** A rewrite method that first creates a private Filter, + * by visiting each term in sequence and marking all docs + * for that term. Matching documents are assigned a + * constant score equal to the query's boost. + * + *

      This method is faster than the BooleanQuery + * rewrite methods when the number of matched terms or + * matched documents is non-trivial. Also, it will never + * hit an errant {@link BooleanQuery.TooManyClauses} + * exception. + * + * @see #setRewriteMethod */ + public static final RewriteMethod CONSTANT_SCORE_FILTER_REWRITE = new RewriteMethod() { + @Override + public Query rewrite(IndexReader reader, MultiTermQuery query) { + Query result = new ConstantScoreQuery(new MultiTermQueryWrapperFilter<>(query)); + result.setBoost(query.getBoost()); + return result; + } + }; - /** Construct the enumeration to be used, expanding the pattern term. */ - protected abstract FilteredTermEnum getEnum(IndexReader reader) - throws IOException; + /** A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a + * BooleanQuery, and keeps the scores as computed by the + * query. Note that typically such scores are + * meaningless to the user, and require non-trivial CPU + * to compute, so it's almost always better to use {@link + * #CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} instead. + * + *

      NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod SCORING_BOOLEAN_QUERY_REWRITE = ScoringRewrite.SCORING_BOOLEAN_QUERY_REWRITE; + + /** Like {@link #SCORING_BOOLEAN_QUERY_REWRITE} except + * scores are not computed. Instead, each matching + * document receives a constant score equal to the + * query's boost. + * + *

      NOTE: This rewrite method will hit {@link + * BooleanQuery.TooManyClauses} if the number of terms + * exceeds {@link BooleanQuery#getMaxClauseCount}. + * + * @see #setRewriteMethod */ + public final static RewriteMethod CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE = ScoringRewrite.CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE; - public Query rewrite(IndexReader reader) throws IOException { - FilteredTermEnum enumerator = getEnum(reader); - BooleanQuery query = new BooleanQuery(true); - try { - do { - Term t = enumerator.term(); - if (t != null) { - TermQuery tq = new TermQuery(t); // found a match - tq.setBoost(getBoost() * enumerator.difference()); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - } - } while (enumerator.next()); - } finally { - enumerator.close(); - } - return query; + /** + * A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, and keeps the + * scores as computed by the query. + * + *

      + * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. It is the default rewrite method for + * {@link FuzzyQuery}. + * + * @see #setRewriteMethod + */ + public static final class TopTermsScoringBooleanQueryRewrite extends TopTermsRewrite { + + /** + * Create a TopTermsScoringBooleanQueryRewrite for + * at most size terms. + *

      + * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than + * size, then it will be used instead. + */ + public TopTermsScoringBooleanQueryRewrite(int size) { + super(size); } + + @Override + protected int getMaxSize() { + return BooleanQuery.getMaxClauseCount(); + } + + @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, int docCount, float boost, TermContext states) { + final TermQuery tq = new TermQuery(term, states); + tq.setBoost(boost); + topLevel.add(tq, BooleanClause.Occur.SHOULD); + } + } + + /** + * A rewrite method that first translates each term into + * {@link BooleanClause.Occur#SHOULD} clause in a BooleanQuery, but the scores + * are only computed as the boost. + *

      + * This rewrite method only uses the top scoring terms so it will not overflow + * the boolean max clause count. + * + * @see #setRewriteMethod + */ + public static final class TopTermsBoostOnlyBooleanQueryRewrite extends TopTermsRewrite { + + /** + * Create a TopTermsBoostOnlyBooleanQueryRewrite for + * at most size terms. + *

      + * NOTE: if {@link BooleanQuery#getMaxClauseCount} is smaller than + * size, then it will be used instead. + */ + public TopTermsBoostOnlyBooleanQueryRewrite(int size) { + super(size); + } + + @Override + protected int getMaxSize() { + return BooleanQuery.getMaxClauseCount(); + } + + @Override + protected BooleanQuery getTopLevelQuery() { + return new BooleanQuery(true); + } + + @Override + protected void addClause(BooleanQuery topLevel, Term term, int docFreq, float boost, TermContext states) { + final Query q = new ConstantScoreQuery(new TermQuery(term, states)); + q.setBoost(boost); + topLevel.add(q, BooleanClause.Occur.SHOULD); + } + } + + /** A rewrite method that tries to pick the best + * constant-score rewrite method based on term and + * document counts from the query. If both the number of + * terms and documents is small enough, then {@link + * #CONSTANT_SCORE_BOOLEAN_QUERY_REWRITE} is used. + * Otherwise, {@link #CONSTANT_SCORE_FILTER_REWRITE} is + * used. + */ + public static class ConstantScoreAutoRewrite extends org.apache.lucene.search.ConstantScoreAutoRewrite {} - /** Prints a user-readable version of this query. */ - public String toString(String field) { - StringBuffer buffer = new StringBuffer(); - if (!term.field().equals(field)) { - buffer.append(term.field()); - buffer.append(":"); - } - buffer.append(term.text()); - buffer.append(ToStringUtils.boost(getBoost())); - return buffer.toString(); + /** Read-only default instance of {@link + * ConstantScoreAutoRewrite}, with {@link + * ConstantScoreAutoRewrite#setTermCountCutoff} set to + * {@link + * ConstantScoreAutoRewrite#DEFAULT_TERM_COUNT_CUTOFF} + * and {@link + * ConstantScoreAutoRewrite#setDocCountPercent} set to + * {@link + * ConstantScoreAutoRewrite#DEFAULT_DOC_COUNT_PERCENT}. + * Note that you cannot alter the configuration of this + * instance; you'll need to create a private instance + * instead. */ + public final static RewriteMethod CONSTANT_SCORE_AUTO_REWRITE_DEFAULT = new ConstantScoreAutoRewrite() { + @Override + public void setTermCountCutoff(int count) { + throw new UnsupportedOperationException("Please create a private instance"); } - public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof MultiTermQuery)) return false; + @Override + public void setDocCountPercent(double percent) { + throw new UnsupportedOperationException("Please create a private instance"); + } + }; - final MultiTermQuery multiTermQuery = (MultiTermQuery) o; + /** + * Constructs a query matching terms that cannot be represented with a single + * Term. + */ + public MultiTermQuery(final String field) { + if (field == null) { + throw new IllegalArgumentException("field must not be null"); + } + this.field = field; + } - if (!term.equals(multiTermQuery.term)) return false; + /** Returns the field name for this query */ + public final String getField() { return field; } - return getBoost() == multiTermQuery.getBoost(); - } + /** Construct the enumeration to be used, expanding the + * pattern term. This method should only be called if + * the field exists (ie, implementations can assume the + * field does exist). This method should not return null + * (should instead return {@link TermsEnum#EMPTY} if no + * terms match). The TermsEnum must already be + * positioned to the first matching term. + * The given {@link AttributeSource} is passed by the {@link RewriteMethod} to + * provide attributes, the rewrite method uses to inform about e.g. maximum competitive boosts. + * This is currently only used by {@link TopTermsRewrite} + */ + protected abstract TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException; - public int hashCode() { - return term.hashCode() + Float.floatToRawIntBits(getBoost()); + /** Convenience method, if no attributes are needed: + * This simply passes empty attributes and is equal to: + * getTermsEnum(terms, new AttributeSource()) + */ + protected final TermsEnum getTermsEnum(Terms terms) throws IOException { + return getTermsEnum(terms, new AttributeSource()); + } + + /** + * To rewrite to a simpler form, instead return a simpler + * enum from {@link #getTermsEnum(Terms, AttributeSource)}. For example, + * to rewrite to a single term, return a {@link SingleTermsEnum} + */ + @Override + public final Query rewrite(IndexReader reader) throws IOException { + return rewriteMethod.rewrite(reader, this); + } + + /** + * @see #setRewriteMethod + */ + public RewriteMethod getRewriteMethod() { + return rewriteMethod; + } + + /** + * Sets the rewrite method to be used when executing the + * query. You can use one of the four core methods, or + * implement your own subclass of {@link RewriteMethod}. */ + public void setRewriteMethod(RewriteMethod method) { + rewriteMethod = method; + } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Float.floatToIntBits(getBoost()); + result = prime * result + rewriteMethod.hashCode(); + if (field != null) result = prime * result + field.hashCode(); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + MultiTermQuery other = (MultiTermQuery) obj; + if (Float.floatToIntBits(getBoost()) != Float.floatToIntBits(other.getBoost())) + return false; + if (!rewriteMethod.equals(other.rewriteMethod)) { + return false; } + return (other.field == null ? field == null : other.field.equals(field)); + } + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/MultiTermQueryWrapperFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/NGramPhraseQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/NonMatchingScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/NumericRangeFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/NumericRangeQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ParallelMultiSearcher.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/PhrasePositions.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/PhrasePositions.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/PhrasePositions.java 17 Aug 2012 14:54:57 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/PhrasePositions.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -24,44 +24,42 @@ * Position of a term in a document that takes into account the term offset within the phrase. */ final class PhrasePositions { - int doc; // current doc - int position; // position in doc - int count; // remaining pos in this doc - int offset; // position in phrase - TermPositions tp; // stream of positions - PhrasePositions next; // used to make lists - boolean repeats; // there's other pp for same term (e.g. query="1st word 2nd word"~1) + int doc; // current doc + int position; // position in doc + int count; // remaining pos in this doc + int offset; // position in phrase + final int ord; // unique across all PhrasePositions instances + final DocsAndPositionsEnum postings; // stream of docs & positions + PhrasePositions next; // used to make lists + int rptGroup = -1; // >=0 indicates that this is a repeating PP + int rptInd; // index in the rptGroup + final Term[] terms; // for repetitions initialization - PhrasePositions(TermPositions t, int o) { - tp = t; + PhrasePositions(DocsAndPositionsEnum postings, int o, int ord, Term[] terms) { + this.postings = postings; offset = o; + this.ord = ord; + this.terms = terms; } - final boolean next() throws IOException { // increments to next doc - if (!tp.next()) { - tp.close(); // close stream - doc = Integer.MAX_VALUE; // sentinel value + final boolean next() throws IOException { // increments to next doc + doc = postings.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } - doc = tp.doc(); - position = 0; return true; } final boolean skipTo(int target) throws IOException { - if (!tp.skipTo(target)) { - tp.close(); // close stream - doc = Integer.MAX_VALUE; // sentinel value + doc = postings.advance(target); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } - doc = tp.doc(); - position = 0; return true; } - final void firstPosition() throws IOException { - count = tp.freq(); // read first pos + count = postings.freq(); // read first pos nextPosition(); } @@ -72,10 +70,20 @@ * have exactly the same position. */ final boolean nextPosition() throws IOException { - if (count-- > 0) { // read subsequent pos's - position = tp.nextPosition() - offset; + if (count-- > 0) { // read subsequent pos's + position = postings.nextPosition() - offset; return true; } else return false; } + + /** for debug purposes */ + @Override + public String toString() { + String s = "d:"+doc+" o:"+offset+" p:"+position+" c:"+count; + if (rptGroup >=0 ) { + s += " rpt:"+rptGroup+",i"+rptInd; + } + return s; + } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/PhraseQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/PhraseQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/PhraseQuery.java 17 Aug 2012 14:54:57 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/PhraseQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,12 +18,25 @@ */ import java.io.IOException; -import java.util.Set; import java.util.ArrayList; +import java.util.Arrays; +import java.util.Set; -import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.DocsEnum; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a particular sequence of terms. @@ -33,8 +46,8 @@ */ public class PhraseQuery extends Query { private String field; - private ArrayList terms = new ArrayList(4); - private ArrayList positions = new ArrayList(4); + private ArrayList terms = new ArrayList<>(4); + private ArrayList positions = new ArrayList<>(4); private int maxPosition = 0; private int slop = 0; @@ -55,7 +68,12 @@ results are sorted by exactness.

      The slop is zero by default, requiring exact matches.*/ - public void setSlop(int s) { slop = s; } + public void setSlop(int s) { + if (s < 0) { + throw new IllegalArgumentException("slop value cannot be negative"); + } + slop = s; + } /** Returns the slop. See setSlop(). */ public int getSlop() { return slop; } @@ -66,7 +84,7 @@ public void add(Term term) { int position = 0; if(positions.size() > 0) - position = ((Integer) positions.get(positions.size()-1)).intValue() + 1; + position = positions.get(positions.size()-1).intValue() + 1; add(term, position); } @@ -77,23 +95,22 @@ * This allows e.g. phrases with more than one term at the same position * or phrases with gaps (e.g. in connection with stopwords). * - * @param term - * @param position */ public void add(Term term, int position) { - if (terms.size() == 0) - field = term.field(); - else if (term.field() != field) - throw new IllegalArgumentException("All phrase terms must be in the same field: " + term); + if (terms.size() == 0) { + field = term.field(); + } else if (!term.field().equals(field)) { + throw new IllegalArgumentException("All phrase terms must be in the same field: " + term); + } - terms.add(term); - positions.add(new Integer(position)); - if (position > maxPosition) maxPosition = position; + terms.add(term); + positions.add(Integer.valueOf(position)); + if (position > maxPosition) maxPosition = position; } /** Returns the set of terms in this phrase. */ public Term[] getTerms() { - return (Term[])terms.toArray(new Term[0]); + return terms.toArray(new Term[0]); } /** @@ -102,161 +119,222 @@ public int[] getPositions() { int[] result = new int[positions.size()]; for(int i = 0; i < positions.size(); i++) - result[i] = ((Integer) positions.get(i)).intValue(); + result[i] = positions.get(i).intValue(); return result; } - private class PhraseWeight implements Weight { - private Similarity similarity; - private float value; - private float idf; - private float queryNorm; - private float queryWeight; + @Override + public Query rewrite(IndexReader reader) throws IOException { + if (terms.isEmpty()) { + BooleanQuery bq = new BooleanQuery(); + bq.setBoost(getBoost()); + return bq; + } else if (terms.size() == 1) { + TermQuery tq = new TermQuery(terms.get(0)); + tq.setBoost(getBoost()); + return tq; + } else + return super.rewrite(reader); + } - public PhraseWeight(Searcher searcher) - throws IOException { - this.similarity = getSimilarity(searcher); + static class PostingsAndFreq implements Comparable { + final DocsAndPositionsEnum postings; + final int docFreq; + final int position; + final Term[] terms; + final int nTerms; // for faster comparisons - idf = similarity.idf(terms, searcher); + public PostingsAndFreq(DocsAndPositionsEnum postings, int docFreq, int position, Term... terms) { + this.postings = postings; + this.docFreq = docFreq; + this.position = position; + nTerms = terms==null ? 0 : terms.length; + if (nTerms>0) { + if (terms.length==1) { + this.terms = terms; + } else { + Term[] terms2 = new Term[terms.length]; + System.arraycopy(terms, 0, terms2, 0, terms.length); + Arrays.sort(terms2); + this.terms = terms2; + } + } else { + this.terms = null; + } } - public String toString() { return "weight(" + PhraseQuery.this + ")"; } + @Override + public int compareTo(PostingsAndFreq other) { + if (docFreq != other.docFreq) { + return docFreq - other.docFreq; + } + if (position != other.position) { + return position - other.position; + } + if (nTerms != other.nTerms) { + return nTerms - other.nTerms; + } + if (nTerms == 0) { + return 0; + } + for (int i=0; i queryTerms) { queryTerms.addAll(terms); } /** Prints a user-readable version of this query. */ + @Override public String toString(String f) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); if (field != null && !field.equals(f)) { buffer.append(field); buffer.append(":"); @@ -265,12 +343,12 @@ buffer.append("\""); String[] pieces = new String[maxPosition + 1]; for (int i = 0; i < terms.size(); i++) { - int pos = ((Integer)positions.get(i)).intValue(); + int pos = positions.get(i).intValue(); String s = pieces[pos]; if (s == null) { - s = ((Term)terms.get(i)).text(); + s = (terms.get(i)).text(); } else { - s = s + "|" + ((Term)terms.get(i)).text(); + s = s + "|" + (terms.get(i)).text(); } pieces[pos] = s; } @@ -298,6 +376,7 @@ } /** Returns true iff o is equal to this. */ + @Override public boolean equals(Object o) { if (!(o instanceof PhraseQuery)) return false; @@ -309,6 +388,7 @@ } /** Returns a hash code value for this object.*/ + @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ slop Index: 3rdParty_sources/lucene/org/apache/lucene/search/PhraseQueue.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/PhraseQueue.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/PhraseQueue.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/PhraseQueue.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,22 +19,27 @@ import org.apache.lucene.util.PriorityQueue; -final class PhraseQueue extends PriorityQueue { +final class PhraseQueue extends PriorityQueue { PhraseQueue(int size) { - initialize(size); + super(size); } - protected final boolean lessThan(Object o1, Object o2) { - PhrasePositions pp1 = (PhrasePositions)o1; - PhrasePositions pp2 = (PhrasePositions)o2; + @Override + protected final boolean lessThan(PhrasePositions pp1, PhrasePositions pp2) { if (pp1.doc == pp2.doc) if (pp1.position == pp2.position) // same doc and pp.position, so decide by actual term positions. // rely on: pp.position == tp.position - offset. - return pp1.offset < pp2.offset; - else + if (pp1.offset == pp2.offset) { + return pp1.ord < pp2.ord; + } else { + return pp1.offset < pp2.offset; + } + else { return pp1.position < pp2.position; - else + } + else { return pp1.doc < pp2.doc; + } } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/PhraseScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/PositiveScoresOnlyCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/PrefixFilter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/PrefixFilter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/PrefixFilter.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/PrefixFilter.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,106 +17,31 @@ * limitations under the License. */ -import org.apache.lucene.search.Filter; -import org.apache.lucene.util.OpenBitSet; import org.apache.lucene.index.Term; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.TermDocs; -import java.util.BitSet; -import java.io.IOException; - /** - * - * @version $Id$ + * A Filter that restricts search results to values that have a matching prefix in a given + * field. */ -public class PrefixFilter extends Filter { - protected final Term prefix; +public class PrefixFilter extends MultiTermQueryWrapperFilter { public PrefixFilter(Term prefix) { - this.prefix = prefix; + super(new PrefixQuery(prefix)); } - public Term getPrefix() { return prefix; } + public Term getPrefix() { return query.getPrefix(); } - /** - * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. - */ - public BitSet bits(IndexReader reader) throws IOException { - final BitSet bitSet = new BitSet(reader.maxDoc()); - new PrefixGenerator(prefix) { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; - } - - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); - new PrefixGenerator(prefix) { - public void handleDoc(int doc) { - bitSet.set(doc); - } - }.generate(reader); - return bitSet; - } - /** Prints a user-readable version of this query. */ + @Override public String toString () { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("PrefixFilter("); - buffer.append(prefix.toString()); + buffer.append(getPrefix().toString()); buffer.append(")"); return buffer.toString(); } -} -// keep this protected until I decide if it's a good way -// to separate id generation from collection (or should -// I just reuse hitcollector???) -interface IdGenerator { - public void generate(IndexReader reader) throws IOException; - public void handleDoc(int doc); } -abstract class PrefixGenerator implements IdGenerator { - protected final Term prefix; - PrefixGenerator(Term prefix) { - this.prefix = prefix; - } - - public void generate(IndexReader reader) throws IOException { - TermEnum enumerator = reader.terms(prefix); - TermDocs termDocs = reader.termDocs(); - - try { - - String prefixText = prefix.text(); - String prefixField = prefix.field(); - do { - Term term = enumerator.term(); - if (term != null && - term.text().startsWith(prefixText) && - term.field() == prefixField) // interned comparison - { - termDocs.seek(term); - while (termDocs.next()) { - handleDoc(termDocs.doc()); - } - } else { - break; - } - } while (enumerator.next()); - } finally { - termDocs.close(); - enumerator.close(); - } - } -} - - - Index: 3rdParty_sources/lucene/org/apache/lucene/search/PrefixQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/PrefixQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/PrefixQuery.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/PrefixQuery.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,55 +19,47 @@ import java.io.IOException; +import org.apache.lucene.index.TermsEnum; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermEnum; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Terms; +import org.apache.lucene.util.AttributeSource; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing terms with a specified prefix. A PrefixQuery - * is built by QueryParser for input like app*. */ -public class PrefixQuery extends Query { + * is built by QueryParser for input like app*. + * + *

      This query uses the {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * rewrite method. */ +public class PrefixQuery extends MultiTermQuery { private Term prefix; /** Constructs a query for terms starting with prefix. */ public PrefixQuery(Term prefix) { + super(prefix.field()); this.prefix = prefix; } /** Returns the prefix of this query. */ public Term getPrefix() { return prefix; } - - public Query rewrite(IndexReader reader) throws IOException { - BooleanQuery query = new BooleanQuery(true); - TermEnum enumerator = reader.terms(prefix); - try { - String prefixText = prefix.text(); - String prefixField = prefix.field(); - do { - Term term = enumerator.term(); - if (term != null && - term.text().startsWith(prefixText) && - term.field() == prefixField) // interned comparison - { - TermQuery tq = new TermQuery(term); // found a match - tq.setBoost(getBoost()); // set the boost - query.add(tq, BooleanClause.Occur.SHOULD); // add to query - //System.out.println("added " + term); - } else { - break; - } - } while (enumerator.next()); - } finally { - enumerator.close(); + + @Override + protected TermsEnum getTermsEnum(Terms terms, AttributeSource atts) throws IOException { + TermsEnum tenum = terms.iterator(null); + + if (prefix.bytes().length == 0) { + // no prefix -- match all terms for this field: + return tenum; } - return query; + return new PrefixTermsEnum(tenum, prefix.bytes()); } /** Prints a user-readable version of this query. */ + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); - if (!prefix.field().equals(field)) { - buffer.append(prefix.field()); + StringBuilder buffer = new StringBuilder(); + if (!getField().equals(field)) { + buffer.append(getField()); buffer.append(":"); } buffer.append(prefix.text()); @@ -76,17 +68,29 @@ return buffer.toString(); } - /** Returns true iff o is equal to this. */ - public boolean equals(Object o) { - if (!(o instanceof PrefixQuery)) - return false; - PrefixQuery other = (PrefixQuery)o; - return (this.getBoost() == other.getBoost()) - && this.prefix.equals(other.prefix); + @Override + public int hashCode() { + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((prefix == null) ? 0 : prefix.hashCode()); + return result; } - /** Returns a hash code value for this object.*/ - public int hashCode() { - return Float.floatToIntBits(getBoost()) ^ prefix.hashCode() ^ 0x6634D93C; + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + PrefixQuery other = (PrefixQuery) obj; + if (prefix == null) { + if (other.prefix != null) + return false; + } else if (!prefix.equals(other.prefix)) + return false; + return true; } + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/PrefixTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/Query.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/Query.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/Query.java 17 Aug 2012 14:54:57 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/Query.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,32 +19,32 @@ import java.io.IOException; -import java.util.HashSet; -import java.util.Iterator; import java.util.Set; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; /** The abstract base class for queries.

      Instantiable subclasses are:

      • {@link TermQuery} -
      • {@link MultiTermQuery}
      • {@link BooleanQuery}
      • {@link WildcardQuery}
      • {@link PhraseQuery}
      • {@link PrefixQuery}
      • {@link MultiPhraseQuery}
      • {@link FuzzyQuery} -
      • {@link RangeQuery} -
      • {@link org.apache.lucene.search.spans.SpanQuery} +
      • {@link RegexpQuery} +
      • {@link TermRangeQuery} +
      • {@link NumericRangeQuery} +
      • {@link ConstantScoreQuery} +
      • {@link DisjunctionMaxQuery} +
      • {@link MatchAllDocsQuery}
      -

      A parser for queries is contained in: -

        -
      • {@link org.apache.lucene.queryParser.QueryParser QueryParser} -
      +

      See also the family of {@link org.apache.lucene.search.spans Span Queries} + and additional queries available in the Queries module */ -public abstract class Query implements java.io.Serializable, Cloneable { +public abstract class Query implements Cloneable { private float boost = 1.0f; // query boost factor /** Sets the boost for this query clause to b. Documents @@ -61,147 +61,73 @@ /** Prints a query to a string, with field assumed to be the * default field and omitted. - *

      The representation used is one that is supposed to be readable - * by {@link org.apache.lucene.queryParser.QueryParser QueryParser}. However, - * there are the following limitations: - *

        - *
      • If the query was created by the parser, the printed - * representation may not be exactly what was parsed. For example, - * characters that need to be escaped will be represented without - * the required backslash.
      • - *
      • Some of the more complicated queries (e.g. span queries) - * don't have a representation that can be parsed by QueryParser.
      • - *
      */ public abstract String toString(String field); /** Prints a query to a string. */ + @Override public String toString() { return toString(""); } - /** Expert: Constructs an appropriate Weight implementation for this query. - * - *

      Only implemented by primitive queries, which re-write to themselves. + /** + * Expert: Constructs an appropriate Weight implementation for this query. + * + *

      + * Only implemented by primitive queries, which re-write to themselves. */ - protected Weight createWeight(Searcher searcher) throws IOException { - throw new UnsupportedOperationException(); + public Weight createWeight(IndexSearcher searcher) throws IOException { + throw new UnsupportedOperationException("Query " + this + " does not implement createWeight"); } - /** Expert: Constructs and initializes a Weight for a top-level query. */ - public Weight weight(Searcher searcher) - throws IOException { - Query query = searcher.rewrite(this); - Weight weight = query.createWeight(searcher); - float sum = weight.sumOfSquaredWeights(); - float norm = getSimilarity(searcher).queryNorm(sum); - weight.normalize(norm); - return weight; - } - /** Expert: called to re-write queries into primitive queries. For example, * a PrefixQuery will be rewritten into a BooleanQuery that consists * of TermQuerys. */ public Query rewrite(IndexReader reader) throws IOException { return this; } - - /** Expert: called when re-writing queries under MultiSearcher. - * - * Create a single query suitable for use by all subsearchers (in 1-1 - * correspondence with queries). This is an optimization of the OR of - * all queries. We handle the common optimization cases of equal - * queries and overlapping clauses of boolean OR queries (as generated - * by MultiTermQuery.rewrite() and RangeQuery.rewrite()). - * Be careful overriding this method as queries[0] determines which - * method will be called and is not necessarily of the same type as - * the other queries. - */ - public Query combine(Query[] queries) { - HashSet uniques = new HashSet(); - for (int i = 0; i < queries.length; i++) { - Query query = queries[i]; - BooleanClause[] clauses = null; - // check if we can split the query into clauses - boolean splittable = (query instanceof BooleanQuery); - if(splittable){ - BooleanQuery bq = (BooleanQuery) query; - splittable = bq.isCoordDisabled(); - clauses = bq.getClauses(); - for (int j = 0; splittable && j < clauses.length; j++) { - splittable = (clauses[j].getOccur() == BooleanClause.Occur.SHOULD); - } - } - if(splittable){ - for (int j = 0; j < clauses.length; j++) { - uniques.add(clauses[j].getQuery()); - } - } else { - uniques.add(query); - } - } - // optimization: if we have just one query, just return it - if(uniques.size() == 1){ - return (Query)uniques.iterator().next(); - } - Iterator it = uniques.iterator(); - BooleanQuery result = new BooleanQuery(true); - while (it.hasNext()) - result.add((Query) it.next(), BooleanClause.Occur.SHOULD); - return result; - } - + /** - * Expert: adds all terms occuring in this query to the terms set. Only + * Expert: adds all terms occurring in this query to the terms set. Only * works if this query is in its {@link #rewrite rewritten} form. * * @throws UnsupportedOperationException if this query is not yet rewritten */ - public void extractTerms(Set terms) { + public void extractTerms(Set terms) { // needs to be implemented by query subclasses throw new UnsupportedOperationException(); } - - /** Expert: merges the clauses of a set of BooleanQuery's into a single - * BooleanQuery. - * - *

      A utility for use by {@link #combine(Query[])} implementations. - */ - public static Query mergeBooleanQueries(Query[] queries) { - HashSet allClauses = new HashSet(); - for (int i = 0; i < queries.length; i++) { - BooleanClause[] clauses = ((BooleanQuery)queries[i]).getClauses(); - for (int j = 0; j < clauses.length; j++) { - allClauses.add(clauses[j]); - } - } - - boolean coordDisabled = - queries.length==0? false : ((BooleanQuery)queries[0]).isCoordDisabled(); - BooleanQuery result = new BooleanQuery(coordDisabled); - Iterator i = allClauses.iterator(); - while (i.hasNext()) { - result.add((BooleanClause)i.next()); - } - return result; - } - - /** Expert: Returns the Similarity implementation to be used for this query. - * Subclasses may override this method to specify their own Similarity - * implementation, perhaps one that delegates through that of the Searcher. - * By default the Searcher's Similarity implementation is returned.*/ - public Similarity getSimilarity(Searcher searcher) { - return searcher.getSimilarity(); - } - /** Returns a clone of this query. */ - public Object clone() { + @Override + public Query clone() { try { return (Query)super.clone(); } catch (CloneNotSupportedException e) { throw new RuntimeException("Clone not supported: " + e.getMessage()); } } + + @Override + public int hashCode() { + final int prime = 31; + int result = 1; + result = prime * result + Float.floatToIntBits(boost); + return result; + } + + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (obj == null) + return false; + if (getClass() != obj.getClass()) + return false; + Query other = (Query) obj; + if (Float.floatToIntBits(boost) != Float.floatToIntBits(other.boost)) + return false; + return true; + } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/QueryFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/QueryRescorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/QueryTermVector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/QueryWrapperFilter.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/QueryWrapperFilter.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/QueryWrapperFilter.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/QueryWrapperFilter.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,68 +18,64 @@ */ import java.io.IOException; -import java.util.BitSet; -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.util.OpenBitSet; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.util.Bits; /** * Constrains search results to only match those which also match a provided * query. * - *

      This could be used, for example, with a {@link RangeQuery} on a suitably + *

      This could be used, for example, with a {@link NumericRangeQuery} on a suitably * formatted date field to implement date filtering. One could re-use a single - * QueryFilter that matches, e.g., only documents modified within the last - * week. The QueryFilter and RangeQuery would only need to be reconstructed - * once per day. - * - * @version $Id$ + * CachingWrapperFilter(QueryWrapperFilter) that matches, e.g., only documents modified + * within the last week. This would only need to be reconstructed once per day. */ public class QueryWrapperFilter extends Filter { - private Query query; + private final Query query; /** Constructs a filter which only matches documents matching * query. */ public QueryWrapperFilter(Query query) { + if (query == null) + throw new NullPointerException("Query may not be null"); this.query = query; } - - /** - * @deprecated Use {@link #getDocIdSet(IndexReader)} instead. - */ - public BitSet bits(IndexReader reader) throws IOException { - final BitSet bits = new BitSet(reader.maxDoc()); - - new IndexSearcher(reader).search(query, new HitCollector() { - public final void collect(int doc, float score) { - bits.set(doc); // set bit for hit - } - }); - return bits; - } - public DocIdSet getDocIdSet(IndexReader reader) throws IOException { - final OpenBitSet bits = new OpenBitSet(reader.maxDoc()); + /** returns the inner Query */ + public final Query getQuery() { + return query; + } - new IndexSearcher(reader).search(query, new HitCollector() { - public final void collect(int doc, float score) { - bits.set(doc); // set bit for hit + @Override + public DocIdSet getDocIdSet(final AtomicReaderContext context, final Bits acceptDocs) throws IOException { + // get a private context that is used to rewrite, createWeight and score eventually + final AtomicReaderContext privateContext = context.reader().getContext(); + final Weight weight = new IndexSearcher(privateContext).createNormalizedWeight(query); + return new DocIdSet() { + @Override + public DocIdSetIterator iterator() throws IOException { + return weight.scorer(privateContext, acceptDocs); } - }); - return bits; + @Override + public boolean isCacheable() { return false; } + }; } + @Override public String toString() { return "QueryWrapperFilter(" + query + ")"; } + @Override public boolean equals(Object o) { if (!(o instanceof QueryWrapperFilter)) return false; return this.query.equals(((QueryWrapperFilter)o).query); } + @Override public int hashCode() { return query.hashCode() ^ 0x923F64B9; } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/RangeFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/RangeQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ReferenceManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/RegexpQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/RemoteCachingWrapperFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/RemoteSearchable.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/ReqExclScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ReqExclScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ReqExclScorer.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ReqExclScorer.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,48 +18,44 @@ */ import java.io.IOException; +import java.util.Collection; +import java.util.Collections; - -/** A Scorer for queries with a required subscorer and an excluding (prohibited) subscorer. +/** A Scorer for queries with a required subscorer + * and an excluding (prohibited) sub DocIdSetIterator. *
      - * This Scorer implements {@link Scorer#skipTo(int)}, + * This Scorer implements {@link Scorer#advance(int)}, * and it uses the skipTo() on the given scorers. */ -public class ReqExclScorer extends Scorer { - private Scorer reqScorer, exclScorer; +class ReqExclScorer extends Scorer { + private Scorer reqScorer; + private DocIdSetIterator exclDisi; + private int doc = -1; /** Construct a ReqExclScorer. * @param reqScorer The scorer that must match, except where - * @param exclScorer indicates exclusion. + * @param exclDisi indicates exclusion. */ - public ReqExclScorer( - Scorer reqScorer, - Scorer exclScorer) { - super(null); // No similarity used. + public ReqExclScorer(Scorer reqScorer, DocIdSetIterator exclDisi) { + super(reqScorer.weight); this.reqScorer = reqScorer; - this.exclScorer = exclScorer; + this.exclDisi = exclDisi; } - private boolean firstTime = true; - - public boolean next() throws IOException { - if (firstTime) { - if (! exclScorer.next()) { - exclScorer = null; // exhausted at start - } - firstTime = false; - } + @Override + public int nextDoc() throws IOException { if (reqScorer == null) { - return false; + return doc; } - if (! reqScorer.next()) { + doc = reqScorer.nextDoc(); + if (doc == NO_MORE_DOCS) { reqScorer = null; // exhausted, nothing left - return false; + return doc; } - if (exclScorer == null) { - return true; // reqScorer.next() already returned true + if (exclDisi == null) { + return doc; } - return toNonExcluded(); + return doc = toNonExcluded(); } /** Advance to non excluded doc. @@ -73,73 +69,68 @@ * Advances reqScorer a non excluded required doc, if any. * @return true iff there is a non excluded required doc. */ - private boolean toNonExcluded() throws IOException { - int exclDoc = exclScorer.doc(); + private int toNonExcluded() throws IOException { + int exclDoc = exclDisi.docID(); + int reqDoc = reqScorer.docID(); // may be excluded do { - int reqDoc = reqScorer.doc(); // may be excluded if (reqDoc < exclDoc) { - return true; // reqScorer advanced to before exclScorer, ie. not excluded + return reqDoc; // reqScorer advanced to before exclScorer, ie. not excluded } else if (reqDoc > exclDoc) { - if (! exclScorer.skipTo(reqDoc)) { - exclScorer = null; // exhausted, no more exclusions - return true; + exclDoc = exclDisi.advance(reqDoc); + if (exclDoc == NO_MORE_DOCS) { + exclDisi = null; // exhausted, no more exclusions + return reqDoc; } - exclDoc = exclScorer.doc(); if (exclDoc > reqDoc) { - return true; // not excluded + return reqDoc; // not excluded } } - } while (reqScorer.next()); + } while ((reqDoc = reqScorer.nextDoc()) != NO_MORE_DOCS); reqScorer = null; // exhausted, nothing left - return false; + return NO_MORE_DOCS; } - public int doc() { - return reqScorer.doc(); // reqScorer may be null when next() or skipTo() already return false + @Override + public int docID() { + return doc; } /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #next()} is called the first time. + * Initially invalid, until {@link #nextDoc()} is called the first time. * @return The score of the required scorer. */ + @Override public float score() throws IOException { return reqScorer.score(); // reqScorer may be null when next() or skipTo() already return false } - /** Skips to the first match beyond the current whose document number is - * greater than or equal to a given target. - *
      When this method is used the {@link #explain(int)} method should not be used. - * @param target The target document number. - * @return true iff there is such a match. - */ - public boolean skipTo(int target) throws IOException { - if (firstTime) { - firstTime = false; - if (! exclScorer.skipTo(target)) { - exclScorer = null; // exhausted - } - } + @Override + public int freq() throws IOException { + return reqScorer.freq(); + } + + @Override + public Collection getChildren() { + return Collections.singleton(new ChildScorer(reqScorer, "MUST")); + } + + @Override + public int advance(int target) throws IOException { if (reqScorer == null) { - return false; + return doc = NO_MORE_DOCS; } - if (exclScorer == null) { - return reqScorer.skipTo(target); + if (exclDisi == null) { + return doc = reqScorer.advance(target); } - if (! reqScorer.skipTo(target)) { + if (reqScorer.advance(target) == NO_MORE_DOCS) { reqScorer = null; - return false; + return doc = NO_MORE_DOCS; } - return toNonExcluded(); + return doc = toNonExcluded(); } - public Explanation explain(int doc) throws IOException { - Explanation res = new Explanation(); - if (exclScorer.skipTo(doc) && (exclScorer.doc() == doc)) { - res.setDescription("excluded"); - } else { - res.setDescription("not excluded"); - res.addDetail(reqScorer.explain(doc)); - } - return res; + @Override + public long cost() { + return reqScorer.cost(); } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/ReqOptSumScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ReqOptSumScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ReqOptSumScorer.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ReqOptSumScorer.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,5 +1,5 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,18 +17,20 @@ */ import java.io.IOException; +import java.util.ArrayList; +import java.util.Collection; /** A Scorer for queries with a required part and an optional part. * Delays skipTo() on the optional part until a score() is needed. *
      - * This Scorer implements {@link Scorer#skipTo(int)}. + * This Scorer implements {@link Scorer#advance(int)}. */ -public class ReqOptSumScorer extends Scorer { +class ReqOptSumScorer extends Scorer { /** The scorers passed from the constructor. * These are set to null as soon as their next() or skipTo() returns false. */ - private Scorer reqScorer; - private Scorer optScorer; + protected Scorer reqScorer; + protected Scorer optScorer; /** Construct a ReqOptScorer. * @param reqScorer The required scorer. This must match. @@ -38,61 +40,69 @@ Scorer reqScorer, Scorer optScorer) { - super(null); // No similarity used. + super(reqScorer.weight); + assert reqScorer != null; + assert optScorer != null; this.reqScorer = reqScorer; this.optScorer = optScorer; } - private boolean firstTimeOptScorer = true; - - public boolean next() throws IOException { - return reqScorer.next(); + @Override + public int nextDoc() throws IOException { + return reqScorer.nextDoc(); } - - public boolean skipTo(int target) throws IOException { - return reqScorer.skipTo(target); + + @Override + public int advance(int target) throws IOException { + return reqScorer.advance(target); } - - public int doc() { - return reqScorer.doc(); + + @Override + public int docID() { + return reqScorer.docID(); } - + /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #next()} is called the first time. + * Initially invalid, until {@link #nextDoc()} is called the first time. * @return The score of the required scorer, eventually increased by the score * of the optional scorer when it also matches the current document. */ + @Override public float score() throws IOException { - int curDoc = reqScorer.doc(); + // TODO: sum into a double and cast to float if we ever send required clauses to BS1 + int curDoc = reqScorer.docID(); float reqScore = reqScorer.score(); - if (firstTimeOptScorer) { - firstTimeOptScorer = false; - if (! optScorer.skipTo(curDoc)) { - optScorer = null; - return reqScore; - } - } else if (optScorer == null) { + if (optScorer == null) { return reqScore; - } else if ((optScorer.doc() < curDoc) && (! optScorer.skipTo(curDoc))) { + } + + int optScorerDoc = optScorer.docID(); + if (optScorerDoc < curDoc && (optScorerDoc = optScorer.advance(curDoc)) == NO_MORE_DOCS) { optScorer = null; return reqScore; } - // assert (optScorer != null) && (optScorer.doc() >= curDoc); - return (optScorer.doc() == curDoc) - ? reqScore + optScorer.score() - : reqScore; + + return optScorerDoc == curDoc ? reqScore + optScorer.score() : reqScore; } - /** Explain the score of a document. - * @todo Also show the total score. - * See BooleanScorer.explain() on how to do this. - */ - public Explanation explain(int doc) throws IOException { - Explanation res = new Explanation(); - res.setDescription("required, optional"); - res.addDetail(reqScorer.explain(doc)); - res.addDetail(optScorer.explain(doc)); - return res; + @Override + public int freq() throws IOException { + // we might have deferred advance() + score(); + return (optScorer != null && optScorer.docID() == reqScorer.docID()) ? 2 : 1; } + + @Override + public Collection getChildren() { + ArrayList children = new ArrayList<>(2); + children.add(new ChildScorer(reqScorer, "MUST")); + children.add(new ChildScorer(optScorer, "SHOULD")); + return children; + } + + @Override + public long cost() { + return reqScorer.cost(); + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Rescorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ScoreCachingWrappingScorer.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/ScoreDoc.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/ScoreDoc.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/ScoreDoc.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/ScoreDoc.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,20 +17,35 @@ * limitations under the License. */ -/** Expert: Returned by low-level search implementations. - * @see TopDocs */ -public class ScoreDoc implements java.io.Serializable { - /** Expert: The score of this document for the query. */ +/** Holds one hit in {@link TopDocs}. */ + +public class ScoreDoc { + + /** The score of this document for the query. */ public float score; - /** Expert: A hit document's number. - * @see Searcher#doc(int) - */ + /** A hit document's number. + * @see IndexSearcher#doc(int) */ public int doc; - /** Expert: Constructs a ScoreDoc. */ + /** Only set by {@link TopDocs#merge} */ + public int shardIndex; + + /** Constructs a ScoreDoc. */ public ScoreDoc(int doc, float score) { + this(doc, score, -1); + } + + /** Constructs a ScoreDoc. */ + public ScoreDoc(int doc, float score, int shardIndex) { this.doc = doc; this.score = score; + this.shardIndex = shardIndex; } + + // A convenience method for debugging. + @Override + public String toString() { + return "doc=" + doc + " score=" + score + " shardIndex=" + shardIndex; + } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ScoreDocComparator.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/Scorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/Scorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/Scorer.java 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/Scorer.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,75 +18,85 @@ */ import java.io.IOException; +import java.util.Collection; +import java.util.Collections; +import org.apache.lucene.index.DocsEnum; + /** * Expert: Common scoring functionality for different types of queries. * *

      - * A Scorer either iterates over documents matching a - * query in increasing order of doc Id, or provides an explanation of - * the score for a query for a given document. + * A Scorer iterates over documents matching a + * query in increasing order of doc Id. *

      *

      * Document scores are computed using a given Similarity * implementation. *

      - * @see BooleanQuery#setAllowDocsOutOfOrder + * + *

      NOTE: The values Float.Nan, + * Float.NEGATIVE_INFINITY and Float.POSITIVE_INFINITY are + * not valid scores. Certain collectors (eg {@link + * TopScoreDocCollector}) will not properly collect hits + * with these scores. */ -public abstract class Scorer extends DocIdSetIterator { - private Similarity similarity; +public abstract class Scorer extends DocsEnum { + /** the Scorer's parent Weight. in some cases this may be null */ + // TODO can we clean this up? + protected final Weight weight; - /** Constructs a Scorer. - * @param similarity The Similarity implementation used by this scorer. + /** + * Constructs a Scorer + * @param weight The scorers Weight. */ - protected Scorer(Similarity similarity) { - this.similarity = similarity; + protected Scorer(Weight weight) { + this.weight = weight; } - /** Returns the Similarity implementation used by this scorer. */ - public Similarity getSimilarity() { - return this.similarity; - } - - /** Scores and collects all matching documents. - * @param hc The collector to which all matching documents are passed through - * {@link HitCollector#collect(int, float)}. - *
      When this method is used the {@link #explain(int)} method should not be used. - */ - public void score(HitCollector hc) throws IOException { - while (next()) { - hc.collect(doc(), score()); - } - } - - /** Expert: Collects matching documents in a range. Hook for optimization. - * Note that {@link #next()} must be called once before this method is called - * for the first time. - * @param hc The collector to which all matching documents are passed through - * {@link HitCollector#collect(int, float)}. - * @param max Do not score documents past this. - * @return true if more matching documents may remain. - */ - protected boolean score(HitCollector hc, int max) throws IOException { - while (doc() < max) { - hc.collect(doc(), score()); - if (!next()) - return false; - } - return true; - } - /** Returns the score of the current document matching the query. - * Initially invalid, until {@link #next()} or {@link #skipTo(int)} - * is called the first time. + * Initially invalid, until {@link #nextDoc()} or {@link #advance(int)} + * is called the first time, or when called from within + * {@link Collector#collect}. */ public abstract float score() throws IOException; - - /** Returns an explanation of the score for a document. - *
      When this method is used, the {@link #next()}, {@link #skipTo(int)} and - * {@link #score(HitCollector)} methods should not be used. - * @param doc The document number for the explanation. + + /** returns parent Weight + * @lucene.experimental */ - public abstract Explanation explain(int doc) throws IOException; - + public Weight getWeight() { + return weight; + } + + /** Returns child sub-scorers + * @lucene.experimental */ + public Collection getChildren() { + return Collections.emptyList(); + } + + /** A child Scorer and its relationship to its parent. + * the meaning of the relationship depends upon the parent query. + * @lucene.experimental */ + public static class ChildScorer { + /** + * Child Scorer. (note this is typically a direct child, and may + * itself also have children). + */ + public final Scorer child; + /** + * An arbitrary string relating this scorer to the parent. + */ + public final String relationship; + + /** + * Creates a new ChildScorer node with the specified relationship. + *

      + * The relationship can be any be any string that makes sense to + * the parent Scorer. + */ + public ChildScorer(Scorer child, String relationship) { + this.child = child; + this.relationship = relationship; + } + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/ScoringRewrite.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Searchable.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Searcher.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SearcherFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SearcherLifetimeManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SearcherManager.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/Similarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SimilarityDelegator.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/SloppyPhraseScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/SloppyPhraseScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/SloppyPhraseScorer.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/SloppyPhraseScorer.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,200 +17,589 @@ * limitations under the License. */ -import org.apache.lucene.index.TermPositions; - import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; import java.util.HashMap; +import java.util.HashSet; +import java.util.LinkedHashMap; -final class SloppyPhraseScorer extends PhraseScorer { - private int slop; - private PhrasePositions repeats[]; - private PhrasePositions tmpPos[]; // for flipping repeating pps. - private boolean checkedRepeats; +import org.apache.lucene.index.Term; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.FixedBitSet; - SloppyPhraseScorer(Weight weight, TermPositions[] tps, int[] offsets, Similarity similarity, - int slop, byte[] norms) { - super(weight, tps, offsets, similarity, norms); - this.slop = slop; - } +final class SloppyPhraseScorer extends Scorer { + private PhrasePositions min, max; - /** - * Score a candidate doc for all slop-valid position-combinations (matches) - * encountered while traversing/hopping the PhrasePositions. - *
      The score contribution of a match depends on the distance: - *
      - highest score for distance=0 (exact match). - *
      - score gets lower as distance gets higher. - *
      Example: for query "a b"~2, a document "x a b a y" can be scored twice: - * once for "a b" (distance=0), and once for "b a" (distance=2). - *
      Pssibly not all valid combinations are encountered, because for efficiency - * we always propagate the least PhrasePosition. This allows to base on - * PriorityQueue and move forward faster. - * As result, for example, document "a b c b a" - * would score differently for queries "a b c"~4 and "c b a"~4, although - * they really are equivalent. - * Similarly, for doc "a b c b a f g", query "c b"~2 - * would get same score as "g f"~2, although "c b"~2 could be matched twice. - * We may want to fix this in the future (currently not, for performance reasons). - */ - protected final float phraseFreq() throws IOException { - int end = initPhrasePositions(); - - float freq = 0.0f; - boolean done = (end<0); - while (!done) { - PhrasePositions pp = (PhrasePositions) pq.pop(); - int start = pp.position; - int next = ((PhrasePositions) pq.top()).position; + private float sloppyFreq; //phrase frequency in current doc as computed by phraseFreq(). - boolean tpsDiffer = true; - for (int pos = start; pos <= next || !tpsDiffer; pos = pp.position) { - if (pos<=next && tpsDiffer) - start = pos; // advance pp to min window - if (!pp.nextPosition()) { - done = true; // ran out of a term -- done - break; - } - PhrasePositions pp2 = null; - tpsDiffer = !pp.repeats || (pp2 = termPositionsDiffer(pp))==null; - if (pp2!=null && pp2!=pp) { - pp = flip(pp,pp2); // flip pp to pp2 - } - } + private final Similarity.SimScorer docScorer; + + private final int slop; + private final int numPostings; + private final PhraseQueue pq; // for advancing min position + + private int end; // current largest phrase position - int matchLength = end - start; - if (matchLength <= slop) - freq += getSimilarity().sloppyFreq(matchLength); // score match + private boolean hasRpts; // flag indicating that there are repetitions (as checked in first candidate doc) + private boolean checkedRpts; // flag to only check for repetitions in first candidate doc + private boolean hasMultiTermRpts; // + private PhrasePositions[][] rptGroups; // in each group are PPs that repeats each other (i.e. same term), sorted by (query) offset + private PhrasePositions[] rptStack; // temporary stack for switching colliding repeating pps + + private int numMatches; + private final long cost; + + SloppyPhraseScorer(Weight weight, PhraseQuery.PostingsAndFreq[] postings, + int slop, Similarity.SimScorer docScorer) { + super(weight); + this.docScorer = docScorer; + this.slop = slop; + this.numPostings = postings==null ? 0 : postings.length; + pq = new PhraseQueue(postings.length); + // min(cost) + cost = postings[0].postings.cost(); + // convert tps to a list of phrase positions. + // note: phrase-position differs from term-position in that its position + // reflects the phrase offset: pp.pos = tp.pos - offset. + // this allows to easily identify a matching (exact) phrase + // when all PhrasePositions have exactly the same position. + if (postings.length > 0) { + min = new PhrasePositions(postings[0].postings, postings[0].position, 0, postings[0].terms); + max = min; + max.doc = -1; + for (int i = 1; i < postings.length; i++) { + PhrasePositions pp = new PhrasePositions(postings[i].postings, postings[i].position, i, postings[i].terms); + max.next = pp; + max = pp; + max.doc = -1; + } + max.next = min; // make it cyclic for easier manipulation + } + } - if (pp.position > end) - end = pp.position; - pq.put(pp); // restore pq + /** + * Score a candidate doc for all slop-valid position-combinations (matches) + * encountered while traversing/hopping the PhrasePositions. + *
      The score contribution of a match depends on the distance: + *
      - highest score for distance=0 (exact match). + *
      - score gets lower as distance gets higher. + *
      Example: for query "a b"~2, a document "x a b a y" can be scored twice: + * once for "a b" (distance=0), and once for "b a" (distance=2). + *
      Possibly not all valid combinations are encountered, because for efficiency + * we always propagate the least PhrasePosition. This allows to base on + * PriorityQueue and move forward faster. + * As result, for example, document "a b c b a" + * would score differently for queries "a b c"~4 and "c b a"~4, although + * they really are equivalent. + * Similarly, for doc "a b c b a f g", query "c b"~2 + * would get same score as "g f"~2, although "c b"~2 could be matched twice. + * We may want to fix this in the future (currently not, for performance reasons). + */ + private float phraseFreq() throws IOException { + if (!initPhrasePositions()) { + return 0.0f; + } + float freq = 0.0f; + numMatches = 0; + PhrasePositions pp = pq.pop(); + int matchLength = end - pp.position; + int next = pq.top().position; + while (advancePP(pp)) { + if (hasRpts && !advanceRpts(pp)) { + break; // pps exhausted + } + if (pp.position > next) { // done minimizing current match-length + if (matchLength <= slop) { + freq += docScorer.computeSlopFactor(matchLength); // score match + numMatches++; + } + pq.add(pp); + pp = pq.pop(); + next = pq.top().position; + matchLength = end - pp.position; + } else { + int matchLength2 = end - pp.position; + if (matchLength2 < matchLength) { + matchLength = matchLength2; } + } + } + if (matchLength <= slop) { + freq += docScorer.computeSlopFactor(matchLength); // score match + numMatches++; + } + return freq; + } - return freq; + /** advance a PhrasePosition and update 'end', return false if exhausted */ + private boolean advancePP(PhrasePositions pp) throws IOException { + if (!pp.nextPosition()) { + return false; } - - // flip pp2 and pp in the queue: pop until finding pp2, insert back all but pp2, insert pp back. - // assumes: pp!=pp2, pp2 in pq, pp not in pq. - // called only when there are repeating pps. - private PhrasePositions flip(PhrasePositions pp, PhrasePositions pp2) { - int n=0; - PhrasePositions pp3; - //pop until finding pp2 - while ((pp3=(PhrasePositions)pq.pop()) != pp2) { - tmpPos[n++] = pp3; + if (pp.position > end) { + end = pp.position; + } + return true; + } + + /** pp was just advanced. If that caused a repeater collision, resolve by advancing the lesser + * of the two colliding pps. Note that there can only be one collision, as by the initialization + * there were no collisions before pp was advanced. */ + private boolean advanceRpts(PhrasePositions pp) throws IOException { + if (pp.rptGroup < 0) { + return true; // not a repeater + } + PhrasePositions[] rg = rptGroups[pp.rptGroup]; + FixedBitSet bits = new FixedBitSet(rg.length); // for re-queuing after collisions are resolved + int k0 = pp.rptInd; + int k; + while((k=collide(pp)) >= 0) { + pp = lesser(pp, rg[k]); // always advance the lesser of the (only) two colliding pps + if (!advancePP(pp)) { + return false; // exhausted } - //insert back all but pp2 - for (n--; n>=0; n--) { - pq.insert(tmpPos[n]); + if (k != k0) { // careful: mark only those currently in the queue + bits = FixedBitSet.ensureCapacity(bits, k); + bits.set(k); // mark that pp2 need to be re-queued } - //insert pp back - pq.put(pp); - return pp2; } + // collisions resolved, now re-queue + // empty (partially) the queue until seeing all pps advanced for resolving collisions + int n = 0; + // TODO would be good if we can avoid calling cardinality() in each iteration! + int numBits = bits.length(); // larges bit we set + while (bits.cardinality() > 0) { + PhrasePositions pp2 = pq.pop(); + rptStack[n++] = pp2; + if (pp2.rptGroup >= 0 + && pp2.rptInd < numBits // this bit may not have been set + && bits.get(pp2.rptInd)) { + bits.clear(pp2.rptInd); + } + } + // add back to queue + for (int i=n-1; i>=0; i--) { + pq.add(rptStack[i]); + } + return true; + } - /** - * Init PhrasePositions in place. - * There is a one time initialization for this scorer: - *
      - Put in repeats[] each pp that has another pp with same position in the doc. - *
      - Also mark each such pp by pp.repeats = true. - *
      Later can consult with repeats[] in termPositionsDiffer(pp), making that check efficient. - * In particular, this allows to score queries with no repetitions with no overhead due to this computation. - *
      - Example 1 - query with no repetitions: "ho my"~2 - *
      - Example 2 - query with repetitions: "ho my my"~2 - *
      - Example 3 - query with repetitions: "my ho my"~2 - *
      Init per doc w/repeats in query, includes propagating some repeating pp's to avoid false phrase detection. - * @return end (max position), or -1 if any term ran out (i.e. done) - * @throws IOException - */ - private int initPhrasePositions() throws IOException { - int end = 0; - - // no repeats at all (most common case is also the simplest one) - if (checkedRepeats && repeats==null) { - // build queue from list - pq.clear(); - for (PhrasePositions pp = first; pp != null; pp = pp.next) { - pp.firstPosition(); - if (pp.position > end) - end = pp.position; - pq.put(pp); // build pq from list + /** compare two pps, but only by position and offset */ + private PhrasePositions lesser(PhrasePositions pp, PhrasePositions pp2) { + if (pp.position < pp2.position || + (pp.position == pp2.position && pp.offset < pp2.offset)) { + return pp; + } + return pp2; + } + + /** index of a pp2 colliding with pp, or -1 if none */ + private int collide(PhrasePositions pp) { + int tpPos = tpPos(pp); + PhrasePositions[] rg = rptGroups[pp.rptGroup]; + for (int i=0; i + *

    • Check if there are repetitions + *
    • If there are, find groups of repetitions. + *
    + * Examples: + *
      + *
    1. no repetitions: "ho my"~2 + *
    2. repetitions: "ho my my"~2 + *
    3. repetitions: "my ho my"~2 + *
    + * @return false if PPs are exhausted (and so current doc will not be a match) + */ + private boolean initPhrasePositions() throws IOException { + end = Integer.MIN_VALUE; + if (!checkedRpts) { + return initFirstTime(); + } + if (!hasRpts) { + initSimple(); + return true; // PPs available + } + return initComplex(); + } + + /** no repeats: simplest case, and most common. It is important to keep this piece of the code simple and efficient */ + private void initSimple() throws IOException { + //System.err.println("initSimple: doc: "+min.doc); + pq.clear(); + // position pps and build queue from list + for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max + pp.firstPosition(); + if (pp.position > end) { + end = pp.position; + } + pq.add(pp); + } + } + + /** with repeats: not so simple. */ + private boolean initComplex() throws IOException { + //System.err.println("initComplex: doc: "+min.doc); + placeFirstPositions(); + if (!advanceRepeatGroups()) { + return false; // PPs exhausted + } + fillQueue(); + return true; // PPs available + } + + /** move all PPs to their first position */ + private void placeFirstPositions() throws IOException { + for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max + pp.firstPosition(); + } + } + + /** Fill the queue (all pps are already placed */ + private void fillQueue() { + pq.clear(); + for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max + if (pp.position > end) { + end = pp.position; + } + pq.add(pp); + } + } + + /** At initialization (each doc), each repetition group is sorted by (query) offset. + * This provides the start condition: no collisions. + *

    Case 1: no multi-term repeats
    + * It is sufficient to advance each pp in the group by one less than its group index. + * So lesser pp is not advanced, 2nd one advance once, 3rd one advanced twice, etc. + *

    Case 2: multi-term repeats
    + * + * @return false if PPs are exhausted. + */ + private boolean advanceRepeatGroups() throws IOException { + for (PhrasePositions[] rg: rptGroups) { + if (hasMultiTermRpts) { + // more involved, some may not collide + int incr; + for (int i=0; i= 0) { + PhrasePositions pp2 = lesser(pp, rg[k]); + if (!advancePP(pp2)) { // at initialization always advance pp with higher offset + return false; // exhausted } - return end; - } - - // position the pp's - for (PhrasePositions pp = first; pp != null; pp = pp.next) - pp.firstPosition(); - - // one time initializatin for this scorer - if (!checkedRepeats) { - checkedRepeats = true; - // check for repeats - HashMap m = null; - for (PhrasePositions pp = first; pp != null; pp = pp.next) { - int tpPos = pp.position + pp.offset; - for (PhrasePositions pp2 = pp.next; pp2 != null; pp2 = pp2.next) { - int tpPos2 = pp2.position + pp2.offset; - if (tpPos2 == tpPos) { - if (m == null) - m = new HashMap(); - pp.repeats = true; - pp2.repeats = true; - m.put(pp,null); - m.put(pp2,null); - } - } + if (pp2.rptInd < i) { // should not happen? + incr = 0; + break; } - if (m!=null) - repeats = (PhrasePositions[]) m.keySet().toArray(new PhrasePositions[0]); + } } - - // with repeats must advance some repeating pp's so they all start with differing tp's - if (repeats!=null) { - for (int i = 0; i < repeats.length; i++) { - PhrasePositions pp = repeats[i]; - PhrasePositions pp2; - while ((pp2 = termPositionsDiffer(pp)) != null) { - if (!pp2.nextPosition()) // out of pps that do not differ, advance the pp with higher offset - return -1; // ran out of a term -- done - } + } else { + // simpler, we know exactly how much to advance + for (int j=1; j end) - end = pp.position; - pq.put(pp); // build pq from list + } + } + return true; // PPs available + } + + /** initialize with checking for repeats. Heavy work, but done only for the first candidate doc.

    + * If there are repetitions, check if multi-term postings (MTP) are involved.

    + * Without MTP, once PPs are placed in the first candidate doc, repeats (and groups) are visible.
    + * With MTP, a more complex check is needed, up-front, as there may be "hidden collisions".
    + * For example P1 has {A,B}, P1 has {B,C}, and the first doc is: "A C B". At start, P1 would point + * to "A", p2 to "C", and it will not be identified that P1 and P2 are repetitions of each other.

    + * The more complex initialization has two parts:
    + * (1) identification of repetition groups.
    + * (2) advancing repeat groups at the start of the doc.
    + * For (1), a possible solution is to just create a single repetition group, + * made of all repeating pps. But this would slow down the check for collisions, + * as all pps would need to be checked. Instead, we compute "connected regions" + * on the bipartite graph of postings and terms. + */ + private boolean initFirstTime() throws IOException { + //System.err.println("initFirstTime: doc: "+min.doc); + checkedRpts = true; + placeFirstPositions(); + + LinkedHashMap rptTerms = repeatingTerms(); + hasRpts = !rptTerms.isEmpty(); + + if (hasRpts) { + rptStack = new PhrasePositions[numPostings]; // needed with repetitions + ArrayList> rgs = gatherRptGroups(rptTerms); + sortRptGroups(rgs); + if (!advanceRepeatGroups()) { + return false; // PPs exhausted + } + } + + fillQueue(); + return true; // PPs available + } + + /** sort each repetition group by (query) offset. + * Done only once (at first doc) and allows to initialize faster for each doc. */ + private void sortRptGroups(ArrayList> rgs) { + rptGroups = new PhrasePositions[rgs.size()][]; + Comparator cmprtr = new Comparator() { + @Override + public int compare(PhrasePositions pp1, PhrasePositions pp2) { + return pp1.offset - pp2.offset; + } + }; + for (int i=0; i> gatherRptGroups(LinkedHashMap rptTerms) throws IOException { + PhrasePositions[] rpp = repeatingPPs(rptTerms); + ArrayList> res = new ArrayList<>(); + if (!hasMultiTermRpts) { + // simpler - no multi-terms - can base on positions in first doc + for (int i=0; i=0) continue; // already marked as a repetition + int tpPos = tpPos(pp); + for (int j=i+1; j=0 // already marked as a repetition + || pp2.offset == pp.offset // not a repetition: two PPs are originally in same offset in the query! + || tpPos(pp2) != tpPos) { // not a repetition + continue; + } + // a repetition + int g = pp.rptGroup; + if (g < 0) { + g = res.size(); + pp.rptGroup = g; + ArrayList rl = new ArrayList<>(2); + rl.add(pp); + res.add(rl); + } + pp2.rptGroup = g; + res.get(g).add(pp2); } + } + } else { + // more involved - has multi-terms + ArrayList> tmp = new ArrayList<>(); + ArrayList bb = ppTermsBitSets(rpp, rptTerms); + unionTermGroups(bb); + HashMap tg = termGroups(rptTerms, bb); + HashSet distinctGroupIDs = new HashSet<>(tg.values()); + for (int i=0; i()); + } + for (PhrasePositions pp : rpp) { + for (Term t: pp.terms) { + if (rptTerms.containsKey(t)) { + int g = tg.get(t); + tmp.get(g).add(pp); + assert pp.rptGroup==-1 || pp.rptGroup==g; + pp.rptGroup = g; + } + } + } + for (HashSet hs : tmp) { + res.add(new ArrayList<>(hs)); + } + } + return res; + } - if (repeats!=null) { - tmpPos = new PhrasePositions[pq.size()]; + /** Actual position in doc of a PhrasePosition, relies on that position = tpPos - offset) */ + private final int tpPos(PhrasePositions pp) { + return pp.position + pp.offset; + } + + /** find repeating terms and assign them ordinal values */ + private LinkedHashMap repeatingTerms() { + LinkedHashMap tord = new LinkedHashMap<>(); + HashMap tcnt = new HashMap<>(); + for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max + for (Term t : pp.terms) { + Integer cnt0 = tcnt.get(t); + Integer cnt = cnt0==null ? new Integer(1) : new Integer(1+cnt0.intValue()); + tcnt.put(t, cnt); + if (cnt==2) { + tord.put(t,tord.size()); } - return end; + } } + return tord; + } - /** - * We disallow two pp's to have the same TermPosition, thereby verifying multiple occurrences - * in the query of the same word would go elsewhere in the matched doc. - * @return null if differ (i.e. valid) otherwise return the higher offset PhrasePositions - * out of the first two PPs found to not differ. - */ - private PhrasePositions termPositionsDiffer(PhrasePositions pp) { - // efficiency note: a more efficient implementation could keep a map between repeating - // pp's, so that if pp1a, pp1b, pp1c are repeats term1, and pp2a, pp2b are repeats - // of term2, pp2a would only be checked against pp2b but not against pp1a, pp1b, pp1c. - // However this would complicate code, for a rather rare case, so choice is to compromise here. - int tpPos = pp.position + pp.offset; - for (int i = 0; i < repeats.length; i++) { - PhrasePositions pp2 = repeats[i]; - if (pp2 == pp) - continue; - int tpPos2 = pp2.position + pp2.offset; - if (tpPos2 == tpPos) - return pp.offset > pp2.offset ? pp : pp2; // do not differ: return the one with higher offset. + /** find repeating pps, and for each, if has multi-terms, update this.hasMultiTermRpts */ + private PhrasePositions[] repeatingPPs(HashMap rptTerms) { + ArrayList rp = new ArrayList<>(); + for (PhrasePositions pp=min,prev=null; prev!=max; pp=(prev=pp).next) { // iterate cyclic list: done once handled max + for (Term t : pp.terms) { + if (rptTerms.containsKey(t)) { + rp.add(pp); + hasMultiTermRpts |= (pp.terms.length > 1); + break; } - return null; + } } + return rp.toArray(new PhrasePositions[0]); + } + + /** bit-sets - for each repeating pp, for each of its repeating terms, the term ordinal values is set */ + private ArrayList ppTermsBitSets(PhrasePositions[] rpp, HashMap tord) { + ArrayList bb = new ArrayList<>(rpp.length); + for (PhrasePositions pp : rpp) { + FixedBitSet b = new FixedBitSet(tord.size()); + Integer ord; + for (Term t: pp.terms) { + if ((ord=tord.get(t))!=null) { + b.set(ord); + } + } + bb.add(b); + } + return bb; + } + + /** union (term group) bit-sets until they are disjoint (O(n^^2)), and each group have different terms */ + private void unionTermGroups(ArrayList bb) { + int incr; + for (int i=0; i termGroups(LinkedHashMap tord, ArrayList bb) throws IOException { + HashMap tg = new HashMap<>(); + Term[] t = tord.keySet().toArray(new Term[0]); + for (int i=0; i0) { +// t[0] = pq.pop(); +// ps.println(" " + 0 + " " + t[0]); +// for (int i=1; i=0; i--) { +// pq.add(t[i]); +// } +// } +// } + + private boolean advanceMin(int target) throws IOException { + if (!min.skipTo(target)) { + max.doc = NO_MORE_DOCS; // for further calls to docID() + return false; + } + min = min.next; // cyclic + max = max.next; // cyclic + return true; + } + + @Override + public int docID() { + return max.doc; + } + + @Override + public int nextDoc() throws IOException { + return advance(max.doc + 1); // advance to the next doc after #docID() + } + + @Override + public float score() { + return docScorer.score(max.doc, sloppyFreq); + } + + @Override + public int advance(int target) throws IOException { + assert target > docID(); + do { + if (!advanceMin(target)) { + return NO_MORE_DOCS; + } + while (min.doc < max.doc) { + if (!advanceMin(max.doc)) { + return NO_MORE_DOCS; + } + } + // found a doc with all of the terms + sloppyFreq = phraseFreq(); // check for phrase + target = min.doc + 1; // next target in case sloppyFreq is still 0 + } while (sloppyFreq == 0f); + + // found a match + return max.doc; + } + + @Override + public long cost() { + return cost; + } + + @Override + public String toString() { return "scorer(" + weight + ")"; } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/Sort.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/Sort.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/Sort.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/Sort.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,7 +17,8 @@ * limitations under the License. */ -import java.io.Serializable; +import java.io.IOException; +import java.util.Arrays; /** @@ -95,15 +96,13 @@ *

    Created: Feb 12, 2004 10:53:57 AM * * @since lucene 1.4 - * @version $Id$ */ -public class Sort -implements Serializable { +public class Sort { /** * Represents sorting by computed relevance. Using this sort criteria returns * the same results as calling - * {@link Searcher#search(Query) Searcher#search()}without a sort criteria, + * {@link IndexSearcher#search(Query,int) IndexSearcher#search()}without a sort criteria, * only with slightly more overhead. */ public static final Sort RELEVANCE = new Sort(); @@ -116,90 +115,30 @@ /** * Sorts by computed relevance. This is the same sort criteria as calling - * {@link Searcher#search(Query) Searcher#search()}without a sort criteria, + * {@link IndexSearcher#search(Query,int) IndexSearcher#search()}without a sort criteria, * only with slightly more overhead. */ public Sort() { - this(new SortField[] { SortField.FIELD_SCORE, SortField.FIELD_DOC }); + this(SortField.FIELD_SCORE); } - /** - * Sorts by the terms in field then by index order (document - * number). The type of value in field is determined - * automatically. - * - * @see SortField#AUTO - */ - public Sort(String field) { - setSort(field, false); - } - - /** - * Sorts possibly in reverse by the terms in field then by - * index order (document number). The type of value in field is - * determined automatically. - * - * @see SortField#AUTO - */ - public Sort(String field, boolean reverse) { - setSort(field, reverse); - } - - /** - * Sorts in succession by the terms in each field. The type of value in - * field is determined automatically. - * - * @see SortField#AUTO - */ - public Sort(String[] fields) { - setSort(fields); - } - /** Sorts by the criteria in the given SortField. */ public Sort(SortField field) { setSort(field); } /** Sorts in succession by the criteria in each SortField. */ - public Sort(SortField[] fields) { + public Sort(SortField... fields) { setSort(fields); } - /** - * Sets the sort to the terms in field then by index order - * (document number). - */ - public final void setSort(String field) { - setSort(field, false); - } - - /** - * Sets the sort to the terms in field possibly in reverse, - * then by index order (document number). - */ - public void setSort(String field, boolean reverse) { - SortField[] nfields = new SortField[] { - new SortField(field, SortField.AUTO, reverse), SortField.FIELD_DOC }; - fields = nfields; - } - - /** Sets the sort to the terms in each field in succession. */ - public void setSort(String[] fieldnames) { - final int n = fieldnames.length; - SortField[] nfields = new SortField[n]; - for (int i = 0; i < n; ++i) { - nfields[i] = new SortField(fieldnames[i], SortField.AUTO); - } - fields = nfields; - } - /** Sets the sort to the given criteria. */ public void setSort(SortField field) { this.fields = new SortField[] { field }; } /** Sets the sort to the given criteria in succession. */ - public void setSort(SortField[] fields) { + public void setSort(SortField... fields) { this.fields = fields; } @@ -211,8 +150,33 @@ return fields; } + /** + * Rewrites the SortFields in this Sort, returning a new Sort if any of the fields + * changes during their rewriting. + * + * @param searcher IndexSearcher to use in the rewriting + * @return {@code this} if the Sort/Fields have not changed, or a new Sort if there + * is a change + * @throws IOException Can be thrown by the rewriting + * @lucene.experimental + */ + public Sort rewrite(IndexSearcher searcher) throws IOException { + boolean changed = false; + + SortField[] rewrittenSortFields = new SortField[fields.length]; + for (int i = 0; i < fields.length; i++) { + rewrittenSortFields[i] = fields[i].rewrite(searcher); + if (fields[i] != rewrittenSortFields[i]) { + changed = true; + } + } + + return (changed) ? new Sort(rewrittenSortFields) : this; + } + + @Override public String toString() { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); for (int i = 0; i < fields.length; i++) { buffer.append(fields[i].toString()); @@ -222,4 +186,30 @@ return buffer.toString(); } + + /** Returns true if o is equal to this. */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof Sort)) return false; + final Sort other = (Sort)o; + return Arrays.equals(this.fields, other.fields); + } + + /** Returns a hash code value for this object. */ + @Override + public int hashCode() { + return 0x45aaf665 + Arrays.hashCode(fields); + } + + /** Returns true if the relevance score is needed to sort documents. */ + public boolean needsScores() { + for (SortField sortField : fields) { + if (sortField.needsScores()) { + return true; + } + } + return false; + } + } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SortComparator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SortComparatorSource.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/SortField.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/SortField.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/SortField.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/SortField.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,116 +17,109 @@ * limitations under the License. */ -import java.io.Serializable; -import java.util.Locale; +import java.io.IOException; +import java.util.Comparator; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.StringHelper; + /** * Stores information about how to sort documents by terms in an individual * field. Fields must be indexed in order to sort by them. * *

    Created: Feb 11, 2004 1:25:29 PM * * @since lucene 1.4 - * @version $Id$ * @see Sort */ -public class SortField -implements Serializable { +public class SortField { - /** Sort by document score (relevancy). Sort values are Float and higher - * values are at the front. */ - public static final int SCORE = 0; + /** + * Specifies the type of the terms to be sorted, or special types such as CUSTOM + */ + public static enum Type { - /** Sort by document number (index order). Sort values are Integer and lower - * values are at the front. */ - public static final int DOC = 1; + /** Sort by document score (relevance). Sort values are Float and higher + * values are at the front. */ + SCORE, - /** Guess type of sort based on field contents. A regular expression is used - * to look at the first term indexed for the field and determine if it - * represents an integer number, a floating point number, or just arbitrary - * string characters. */ - public static final int AUTO = 2; + /** Sort by document number (index order). Sort values are Integer and lower + * values are at the front. */ + DOC, - /** Sort using term values as Strings. Sort values are String and lower - * values are at the front. */ - public static final int STRING = 3; + /** Sort using term values as Strings. Sort values are String and lower + * values are at the front. */ + STRING, - /** Sort using term values as encoded Integers. Sort values are Integer and - * lower values are at the front. */ - public static final int INT = 4; + /** Sort using term values as encoded Integers. Sort values are Integer and + * lower values are at the front. */ + INT, - /** Sort using term values as encoded Floats. Sort values are Float and - * lower values are at the front. */ - public static final int FLOAT = 5; + /** Sort using term values as encoded Floats. Sort values are Float and + * lower values are at the front. */ + FLOAT, - /** Sort using term values as encoded Longs. Sort values are Long and - * lower values are at the front. */ - public static final int LONG = 6; + /** Sort using term values as encoded Longs. Sort values are Long and + * lower values are at the front. */ + LONG, - /** Sort using term values as encoded Doubles. Sort values are Double and - * lower values are at the front. */ - public static final int DOUBLE = 7; + /** Sort using term values as encoded Doubles. Sort values are Double and + * lower values are at the front. */ + DOUBLE, - /** - * Sort using term values as encoded Shorts. Sort values are shorts and lower values are at the front - */ - public static final int SHORT = 8; + /** Sort using term values as encoded Shorts. Sort values are Short and + * lower values are at the front. */ + @Deprecated + SHORT, + /** Sort using a custom Comparator. Sort values are any Comparable and + * sorting is done according to natural order. */ + CUSTOM, - /** Sort using a custom Comparator. Sort values are any Comparable and - * sorting is done according to natural order. */ - public static final int CUSTOM = 9; - /** - * Sort using term values as encoded bytes. Sort values are bytes and lower values are at the front - */ - public static final int BYTE = 10; + /** Sort using term values as encoded Bytes. Sort values are Byte and + * lower values are at the front. */ + @Deprecated + BYTE, + /** Sort using term values as Strings, but comparing by + * value (using String.compareTo) for all comparisons. + * This is typically slower than {@link #STRING}, which + * uses ordinals to do the sorting. */ + STRING_VAL, - // IMPLEMENTATION NOTE: the FieldCache.STRING_INDEX is in the same "namespace" - // as the above static int values. Any new values must not have the same value - // as FieldCache.STRING_INDEX. + /** Sort use byte[] index values. */ + BYTES, + /** Force rewriting of SortField using {@link SortField#rewrite(IndexSearcher)} + * before it can be used for sorting */ + REWRITEABLE + } - /** Represents sorting by document score (relevancy). */ - public static final SortField FIELD_SCORE = new SortField (null, SCORE); + /** Represents sorting by document score (relevance). */ + public static final SortField FIELD_SCORE = new SortField(null, Type.SCORE); /** Represents sorting by document number (index order). */ - public static final SortField FIELD_DOC = new SortField (null, DOC); + public static final SortField FIELD_DOC = new SortField(null, Type.DOC); - private String field; - private int type = AUTO; // defaults to determining type dynamically - private Locale locale; // defaults to "natural order" (no Locale) + private Type type; // defaults to determining type dynamically boolean reverse = false; // defaults to natural order - private SortComparatorSource factory; + private FieldCache.Parser parser; - /** Creates a sort by terms in the given field where the type of term value - * is determined dynamically ({@link #AUTO AUTO}). - * @param field Name of field to sort by, cannot be null. - */ - public SortField (String field) { - this.field = field.intern(); - } + // Used for CUSTOM sort + private FieldComparatorSource comparatorSource; - /** Creates a sort, possibly in reverse, by terms in the given field where - * the type of term value is determined dynamically ({@link #AUTO AUTO}). - * @param field Name of field to sort by, cannot be null. - * @param reverse True if natural order should be reversed. - */ - public SortField (String field, boolean reverse) { - this.field = field.intern(); - this.reverse = reverse; - } + // Used for 'sortMissingFirst/Last' + public Object missingValue = null; /** Creates a sort by terms in the given field with the type of term * values explicitly given. * @param field Name of field to sort by. Can be null if * type is SCORE or DOC. * @param type Type of values in the terms. */ - public SortField (String field, int type) { - this.field = (field != null) ? field.intern() : field; - this.type = type; + public SortField(String field, Type type) { + initFieldType(field, type); } /** Creates a sort, possibly in reverse, by terms in the given field with the @@ -136,57 +129,113 @@ * @param type Type of values in the terms. * @param reverse True if natural order should be reversed. */ - public SortField (String field, int type, boolean reverse) { - this.field = (field != null) ? field.intern() : field; - this.type = type; + public SortField(String field, Type type, boolean reverse) { + initFieldType(field, type); this.reverse = reverse; } - /** Creates a sort by terms in the given field sorted - * according to the given locale. - * @param field Name of field to sort by, cannot be null. - * @param locale Locale of values in the field. + /** Creates a sort by terms in the given field, parsed + * to numeric values using a custom {@link FieldCache.Parser}. + * @param field Name of field to sort by. Must not be null. + * @param parser Instance of a {@link FieldCache.Parser}, + * which must subclass one of the existing numeric + * parsers from {@link FieldCache}. Sort type is inferred + * by testing which numeric parser the parser subclasses. + * @throws IllegalArgumentException if the parser fails to + * subclass an existing numeric parser, or field is null */ - public SortField (String field, Locale locale) { - this.field = field.intern(); - this.type = STRING; - this.locale = locale; + public SortField(String field, FieldCache.Parser parser) { + this(field, parser, false); } - /** Creates a sort, possibly in reverse, by terms in the given field sorted - * according to the given locale. - * @param field Name of field to sort by, cannot be null. - * @param locale Locale of values in the field. + /** Creates a sort, possibly in reverse, by terms in the given field, parsed + * to numeric values using a custom {@link FieldCache.Parser}. + * @param field Name of field to sort by. Must not be null. + * @param parser Instance of a {@link FieldCache.Parser}, + * which must subclass one of the existing numeric + * parsers from {@link FieldCache}. Sort type is inferred + * by testing which numeric parser the parser subclasses. + * @param reverse True if natural order should be reversed. + * @throws IllegalArgumentException if the parser fails to + * subclass an existing numeric parser, or field is null */ - public SortField (String field, Locale locale, boolean reverse) { - this.field = field.intern(); - this.type = STRING; - this.locale = locale; + public SortField(String field, FieldCache.Parser parser, boolean reverse) { + if (parser instanceof FieldCache.IntParser) initFieldType(field, Type.INT); + else if (parser instanceof FieldCache.FloatParser) initFieldType(field, Type.FLOAT); + else if (parser instanceof FieldCache.ShortParser) initFieldType(field, Type.SHORT); + else if (parser instanceof FieldCache.ByteParser) initFieldType(field, Type.BYTE); + else if (parser instanceof FieldCache.LongParser) initFieldType(field, Type.LONG); + else if (parser instanceof FieldCache.DoubleParser) initFieldType(field, Type.DOUBLE); + else { + throw new IllegalArgumentException("Parser instance does not subclass existing numeric parser from FieldCache (got " + parser + ")"); + } + this.reverse = reverse; + this.parser = parser; } + /** Pass this to {@link #setMissingValue} to have missing + * string values sort first. */ + public final static Object STRING_FIRST = new Object() { + @Override + public String toString() { + return "SortField.STRING_FIRST"; + } + }; + + /** Pass this to {@link #setMissingValue} to have missing + * string values sort last. */ + public final static Object STRING_LAST = new Object() { + @Override + public String toString() { + return "SortField.STRING_LAST"; + } + }; + + public void setMissingValue(Object missingValue) { + if (type == Type.STRING || type == Type.STRING_VAL) { + if (missingValue != STRING_FIRST && missingValue != STRING_LAST) { + throw new IllegalArgumentException("For STRING type, missing value must be either STRING_FIRST or STRING_LAST"); + } + } else if (type != Type.BYTE && type != Type.SHORT && type != Type.INT && type != Type.FLOAT && type != Type.LONG && type != Type.DOUBLE) { + throw new IllegalArgumentException("Missing value only works for numeric or STRING types"); + } + this.missingValue = missingValue; + } + /** Creates a sort with a custom comparison function. * @param field Name of field to sort by; cannot be null. * @param comparator Returns a comparator for sorting hits. */ - public SortField (String field, SortComparatorSource comparator) { - this.field = (field != null) ? field.intern() : field; - this.type = CUSTOM; - this.factory = comparator; + public SortField(String field, FieldComparatorSource comparator) { + initFieldType(field, Type.CUSTOM); + this.comparatorSource = comparator; } /** Creates a sort, possibly in reverse, with a custom comparison function. * @param field Name of field to sort by; cannot be null. * @param comparator Returns a comparator for sorting hits. * @param reverse True if natural order should be reversed. */ - public SortField (String field, SortComparatorSource comparator, boolean reverse) { - this.field = (field != null) ? field.intern() : field; - this.type = CUSTOM; + public SortField(String field, FieldComparatorSource comparator, boolean reverse) { + initFieldType(field, Type.CUSTOM); this.reverse = reverse; - this.factory = comparator; + this.comparatorSource = comparator; } + // Sets field & type, and ensures field is not NULL unless + // type is SCORE or DOC + private void initFieldType(String field, Type type) { + this.type = type; + if (field == null) { + if (type != Type.SCORE && type != Type.DOC) { + throw new IllegalArgumentException("field can only be null when type is SCORE or DOC"); + } + } else { + this.field = field; + } + } + /** Returns the name of the field. Could return null * if the sort is by SCORE or DOC. * @return Name of field, possibly null. @@ -196,18 +245,18 @@ } /** Returns the type of contents in the field. - * @return One of the constants SCORE, DOC, AUTO, STRING, INT or FLOAT. + * @return One of the constants SCORE, DOC, STRING, INT or FLOAT. */ - public int getType() { + public Type getType() { return type; } - /** Returns the Locale by which term values are interpreted. - * May return null if no Locale was specified. - * @return Locale, or null. + /** Returns the instance of a {@link FieldCache} parser that fits to the given sort type. + * May return null if no parser was specified. Sorting is using the default parser then. + * @return An instance of a {@link FieldCache} parser, or null. */ - public Locale getLocale() { - return locale; + public FieldCache.Parser getParser() { + return parser; } /** Returns whether the sort should be reversed. @@ -217,31 +266,192 @@ return reverse; } - public SortComparatorSource getFactory() { - return factory; + /** Returns the {@link FieldComparatorSource} used for + * custom sorting + */ + public FieldComparatorSource getComparatorSource() { + return comparatorSource; } + @Override public String toString() { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); switch (type) { - case SCORE: buffer.append(""); - break; + case SCORE: + buffer.append(""); + break; - case DOC: buffer.append(""); - break; + case DOC: + buffer.append(""); + break; + case STRING: + buffer.append(""); + break; + + case STRING_VAL: + buffer.append(""); + break; + + case BYTE: + buffer.append(""); + break; + + case SHORT: + buffer.append(""); + break; + + case INT: + buffer.append(""); + break; + + case LONG: + buffer.append(""); + break; + + case FLOAT: + buffer.append(""); + break; + + case DOUBLE: + buffer.append(""); + break; + case CUSTOM: - buffer.append("'); - break; + buffer.append("'); + break; + + case REWRITEABLE: + buffer.append(""); + break; default: - buffer.append('\"').append(field).append('\"'); - break; + buffer.append(""); + break; } - if (locale != null) buffer.append('(').append(locale).append(')'); if (reverse) buffer.append('!'); + if (missingValue != null) { + buffer.append(" missingValue="); + buffer.append(missingValue); + } return buffer.toString(); } + + /** Returns true if o is equal to this. If a + * {@link FieldComparatorSource} or {@link + * FieldCache.Parser} was provided, it must properly + * implement equals (unless a singleton is always used). */ + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (!(o instanceof SortField)) return false; + final SortField other = (SortField)o; + return ( + StringHelper.equals(other.field, this.field) + && other.type == this.type + && other.reverse == this.reverse + && (other.comparatorSource == null ? this.comparatorSource == null : other.comparatorSource.equals(this.comparatorSource)) + ); + } + + /** Returns true if o is equal to this. If a + * {@link FieldComparatorSource} or {@link + * FieldCache.Parser} was provided, it must properly + * implement hashCode (unless a singleton is always + * used). */ + @Override + public int hashCode() { + int hash = type.hashCode() ^ 0x346565dd + Boolean.valueOf(reverse).hashCode() ^ 0xaf5998bb; + if (field != null) hash += field.hashCode()^0xff5685dd; + if (comparatorSource != null) hash += comparatorSource.hashCode(); + return hash; + } + + private Comparator bytesComparator = BytesRef.getUTF8SortedAsUnicodeComparator(); + + public void setBytesComparator(Comparator b) { + bytesComparator = b; + } + + public Comparator getBytesComparator() { + return bytesComparator; + } + + /** Returns the {@link FieldComparator} to use for + * sorting. + * + * @lucene.experimental + * + * @param numHits number of top hits the queue will store + * @param sortPos position of this SortField within {@link + * Sort}. The comparator is primary if sortPos==0, + * secondary if sortPos==1, etc. Some comparators can + * optimize themselves when they are the primary sort. + * @return {@link FieldComparator} to use when sorting + */ + public FieldComparator getComparator(final int numHits, final int sortPos) throws IOException { + + switch (type) { + case SCORE: + return new FieldComparator.RelevanceComparator(numHits); + + case DOC: + return new FieldComparator.DocComparator(numHits); + + case INT: + return new FieldComparator.IntComparator(numHits, field, parser, (Integer) missingValue); + + case FLOAT: + return new FieldComparator.FloatComparator(numHits, field, parser, (Float) missingValue); + + case LONG: + return new FieldComparator.LongComparator(numHits, field, parser, (Long) missingValue); + + case DOUBLE: + return new FieldComparator.DoubleComparator(numHits, field, parser, (Double) missingValue); + + case BYTE: + return new FieldComparator.ByteComparator(numHits, field, parser, (Byte) missingValue); + + case SHORT: + return new FieldComparator.ShortComparator(numHits, field, parser, (Short) missingValue); + + case CUSTOM: + assert comparatorSource != null; + return comparatorSource.newComparator(field, numHits, sortPos, reverse); + + case STRING: + return new FieldComparator.TermOrdValComparator(numHits, field, missingValue == STRING_LAST); + + case STRING_VAL: + return new FieldComparator.TermValComparator(numHits, field, missingValue == STRING_LAST); + + case REWRITEABLE: + throw new IllegalStateException("SortField needs to be rewritten through Sort.rewrite(..) and SortField.rewrite(..)"); + + default: + throw new IllegalStateException("Illegal sort type: " + type); + } + } + + /** + * Rewrites this SortField, returning a new SortField if a change is made. + * Subclasses should override this define their rewriting behavior when this + * SortField is of type {@link SortField.Type#REWRITEABLE} + * + * @param searcher IndexSearcher to use during rewriting + * @return New rewritten SortField, or {@code this} if nothing has changed. + * @throws IOException Can be thrown by the rewriting + * @lucene.experimental + */ + public SortField rewrite(IndexSearcher searcher) throws IOException { + return this; + } + + /** Whether the relevance score is needed to sort documents. */ + public boolean needsScores() { + return type == Type.SCORE; + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SortRescorer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SortedNumericSelector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SortedNumericSortField.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SpanFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SpanFilterResult.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/SpanQueryFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TermCollectingRewrite.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/TermQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/TermQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/TermQuery.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/TermQuery.java 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -20,139 +20,170 @@ import java.io.IOException; import java.util.Set; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.index.AtomicReader; +import org.apache.lucene.index.IndexReaderContext; +import org.apache.lucene.index.ReaderUtil; import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermDocs; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; /** A Query that matches documents containing a term. This may be combined with other terms with a {@link BooleanQuery}. */ public class TermQuery extends Query { - private Term term; + private final Term term; + private final int docFreq; + private final TermContext perReaderTermState; - private class TermWeight implements Weight { - private Similarity similarity; - private float value; - private float idf; - private float queryNorm; - private float queryWeight; + final class TermWeight extends Weight { + private final Similarity similarity; + private final Similarity.SimWeight stats; + private final TermContext termStates; - public TermWeight(Searcher searcher) + public TermWeight(IndexSearcher searcher, TermContext termStates) throws IOException { - this.similarity = getSimilarity(searcher); - idf = similarity.idf(term, searcher); // compute idf + assert termStates != null : "TermContext must not be null"; + this.termStates = termStates; + this.similarity = searcher.getSimilarity(); + this.stats = similarity.computeWeight( + getBoost(), + searcher.collectionStatistics(term.field()), + searcher.termStatistics(term, termStates)); } + @Override public String toString() { return "weight(" + TermQuery.this + ")"; } + @Override public Query getQuery() { return TermQuery.this; } - public float getValue() { return value; } - public float sumOfSquaredWeights() { - queryWeight = idf * getBoost(); // compute query weight - return queryWeight * queryWeight; // square it + @Override + public float getValueForNormalization() { + return stats.getValueForNormalization(); } - public void normalize(float queryNorm) { - this.queryNorm = queryNorm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + @Override + public void normalize(float queryNorm, float topLevelBoost) { + stats.normalize(queryNorm, topLevelBoost); } - public Scorer scorer(IndexReader reader) throws IOException { - TermDocs termDocs = reader.termDocs(term); - - if (termDocs == null) + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + assert termStates.topReaderContext == ReaderUtil.getTopLevelContext(context) : "The top-reader used to create Weight (" + termStates.topReaderContext + ") is not the same as the current reader's top-reader (" + ReaderUtil.getTopLevelContext(context); + final TermsEnum termsEnum = getTermsEnum(context); + if (termsEnum == null) { return null; - - return new TermScorer(this, termDocs, similarity, - reader.norms(term.field())); + } + DocsEnum docs = termsEnum.docs(acceptDocs, null); + assert docs != null; + return new TermScorer(this, docs, similarity.simScorer(stats, context)); } - - public Explanation explain(IndexReader reader, int doc) - throws IOException { - - ComplexExplanation result = new ComplexExplanation(); - result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); - - Explanation idfExpl = - new Explanation(idf, "idf(docFreq=" + reader.docFreq(term) + - ", numDocs=" + reader.numDocs() + ")"); - - // explain query weight - Explanation queryExpl = new Explanation(); - queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:"); - - Explanation boostExpl = new Explanation(getBoost(), "boost"); - if (getBoost() != 1.0f) - queryExpl.addDetail(boostExpl); - queryExpl.addDetail(idfExpl); - - Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm"); - queryExpl.addDetail(queryNormExpl); - - queryExpl.setValue(boostExpl.getValue() * - idfExpl.getValue() * - queryNormExpl.getValue()); - - result.addDetail(queryExpl); - - // explain field weight - String field = term.field(); - ComplexExplanation fieldExpl = new ComplexExplanation(); - fieldExpl.setDescription("fieldWeight("+term+" in "+doc+ - "), product of:"); - - Explanation tfExpl = scorer(reader).explain(doc); - fieldExpl.addDetail(tfExpl); - fieldExpl.addDetail(idfExpl); - - Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); - float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; - fieldNormExpl.setValue(fieldNorm); - fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); - fieldExpl.addDetail(fieldNormExpl); - - fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch())); - fieldExpl.setValue(tfExpl.getValue() * - idfExpl.getValue() * - fieldNormExpl.getValue()); - - result.addDetail(fieldExpl); - result.setMatch(fieldExpl.getMatch()); - - // combine them - result.setValue(queryExpl.getValue() * fieldExpl.getValue()); - - if (queryExpl.getValue() == 1.0f) - return fieldExpl; - - return result; + + /** + * Returns a {@link TermsEnum} positioned at this weights Term or null if + * the term does not exist in the given context + */ + private TermsEnum getTermsEnum(AtomicReaderContext context) throws IOException { + final TermState state = termStates.get(context.ord); + if (state == null) { // term is not present in that reader + assert termNotInReader(context.reader(), term) : "no termstate found but term exists in reader term=" + term; + return null; + } + //System.out.println("LD=" + reader.getLiveDocs() + " set?=" + (reader.getLiveDocs() != null ? reader.getLiveDocs().get(0) : "null")); + final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); + termsEnum.seekExact(term.bytes(), state); + return termsEnum; } + + private boolean termNotInReader(AtomicReader reader, Term term) throws IOException { + // only called from assert + //System.out.println("TQ.termNotInReader reader=" + reader + " term=" + field + ":" + bytes.utf8ToString()); + return reader.docFreq(term) == 0; + } + + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + Scorer scorer = scorer(context, context.reader().getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); + if (newDoc == doc) { + float freq = scorer.freq(); + SimScorer docScorer = similarity.simScorer(stats, context); + ComplexExplanation result = new ComplexExplanation(); + result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "termFreq=" + freq)); + result.addDetail(scoreExplanation); + result.setValue(scoreExplanation.getValue()); + result.setMatch(true); + return result; + } + } + return new ComplexExplanation(false, 0.0f, "no matching term"); + } } /** Constructs a query for the term t. */ public TermQuery(Term t) { + this(t, -1); + } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, int docFreq) { term = t; + this.docFreq = docFreq; + perReaderTermState = null; } + + /** Expert: constructs a TermQuery that will use the + * provided docFreq instead of looking up the docFreq + * against the searcher. */ + public TermQuery(Term t, TermContext states) { + assert states != null; + term = t; + docFreq = states.docFreq(); + perReaderTermState = states; + } /** Returns the term of this query. */ public Term getTerm() { return term; } - protected Weight createWeight(Searcher searcher) throws IOException { - return new TermWeight(searcher); + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { + final IndexReaderContext context = searcher.getTopReaderContext(); + final TermContext termState; + if (perReaderTermState == null || perReaderTermState.topReaderContext != context) { + // make TermQuery single-pass if we don't have a PRTS or if the context differs! + termState = TermContext.build(context, term); + } else { + // PRTS was pre-build for this IS + termState = this.perReaderTermState; + } + + // we must not ignore the given docFreq - if set use the given value (lie) + if (docFreq != -1) + termState.setDocFreq(docFreq); + + return new TermWeight(searcher, termState); } - public void extractTerms(Set terms) { + @Override + public void extractTerms(Set terms) { terms.add(getTerm()); } /** Prints a user-readable version of this query. */ + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); if (!term.field().equals(field)) { buffer.append(term.field()); buffer.append(":"); @@ -163,6 +194,7 @@ } /** Returns true iff o is equal to this. */ + @Override public boolean equals(Object o) { if (!(o instanceof TermQuery)) return false; @@ -172,6 +204,7 @@ } /** Returns a hash code value for this object.*/ + @Override public int hashCode() { return Float.floatToIntBits(getBoost()) ^ term.hashCode(); } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TermRangeFilter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TermRangeQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TermRangeTermsEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/TermScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/TermScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/TermScorer.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/TermScorer.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,171 +19,78 @@ import java.io.IOException; -import org.apache.lucene.index.TermDocs; +import org.apache.lucene.index.DocsEnum; +import org.apache.lucene.search.similarities.Similarity; /** Expert: A Scorer for documents matching a Term. */ final class TermScorer extends Scorer { - private Weight weight; - private TermDocs termDocs; - private byte[] norms; - private float weightValue; - private int doc; - - private final int[] docs = new int[32]; // buffered doc numbers - private final int[] freqs = new int[32]; // buffered term freqs - private int pointer; - private int pointerMax; - - private static final int SCORE_CACHE_SIZE = 32; - private float[] scoreCache = new float[SCORE_CACHE_SIZE]; - - /** Construct a TermScorer. - * @param weight The weight of the Term in the query. - * @param td An iterator over the documents matching the Term. - * @param similarity The Similarity implementation to be used for score computations. - * @param norms The field norms of the document fields for the Term. + private final DocsEnum docsEnum; + private final Similarity.SimScorer docScorer; + + /** + * Construct a TermScorer. + * + * @param weight + * The weight of the Term in the query. + * @param td + * An iterator over the documents matching the Term. + * @param docScorer + * The Similarity.SimScorer implementation + * to be used for score computations. */ - TermScorer(Weight weight, TermDocs td, Similarity similarity, - byte[] norms) { - super(similarity); - this.weight = weight; - this.termDocs = td; - this.norms = norms; - this.weightValue = weight.getValue(); - - for (int i = 0; i < SCORE_CACHE_SIZE; i++) - scoreCache[i] = getSimilarity().tf(i) * weightValue; + TermScorer(Weight weight, DocsEnum td, Similarity.SimScorer docScorer) { + super(weight); + this.docScorer = docScorer; + this.docsEnum = td; } - public void score(HitCollector hc) throws IOException { - next(); - score(hc, Integer.MAX_VALUE); + @Override + public int docID() { + return docsEnum.docID(); } - protected boolean score(HitCollector c, int end) throws IOException { - Similarity similarity = getSimilarity(); // cache sim in local - float[] normDecoder = Similarity.getNormDecoder(); - while (doc < end) { // for docs in window - int f = freqs[pointer]; - float score = // compute tf(f)*weight - f < SCORE_CACHE_SIZE // check cache - ? scoreCache[f] // cache hit - : similarity.tf(f)*weightValue; // cache miss - - score *= normDecoder[norms[doc] & 0xFF]; // normalize for field - - c.collect(doc, score); // collect score - - if (++pointer >= pointerMax) { - pointerMax = termDocs.read(docs, freqs); // refill buffers - if (pointerMax != 0) { - pointer = 0; - } else { - termDocs.close(); // close stream - doc = Integer.MAX_VALUE; // set to sentinel value - return false; - } - } - doc = docs[pointer]; - } - return true; + @Override + public int freq() throws IOException { + return docsEnum.freq(); } - /** Returns the current document number matching the query. - * Initially invalid, until {@link #next()} is called the first time. + /** + * Advances to the next document matching the query.
    + * + * @return the document matching the query or NO_MORE_DOCS if there are no more documents. */ - public int doc() { return doc; } - - /** Advances to the next document matching the query. - *
    The iterator over the matching documents is buffered using - * {@link TermDocs#read(int[],int[])}. - * @return true iff there is another document matching the query. - */ - public boolean next() throws IOException { - pointer++; - if (pointer >= pointerMax) { - pointerMax = termDocs.read(docs, freqs); // refill buffer - if (pointerMax != 0) { - pointer = 0; - } else { - termDocs.close(); // close stream - doc = Integer.MAX_VALUE; // set to sentinel value - return false; - } - } - doc = docs[pointer]; - return true; + @Override + public int nextDoc() throws IOException { + return docsEnum.nextDoc(); } - - public float score() { - int f = freqs[pointer]; - float raw = // compute tf(f)*weight - f < SCORE_CACHE_SIZE // check cache - ? scoreCache[f] // cache hit - : getSimilarity().tf(f)*weightValue; // cache miss - - return raw * Similarity.decodeNorm(norms[doc]); // normalize for field + + @Override + public float score() throws IOException { + assert docID() != NO_MORE_DOCS; + return docScorer.score(docsEnum.docID(), docsEnum.freq()); } - /** Skips to the first match beyond the current whose document number is - * greater than or equal to a given target. - *
    The implementation uses {@link TermDocs#skipTo(int)}. - * @param target The target document number. - * @return true iff there is such a match. + /** + * Advances to the first match beyond the current whose document number is + * greater than or equal to a given target.
    + * The implementation uses {@link DocsEnum#advance(int)}. + * + * @param target + * The target document number. + * @return the matching document or NO_MORE_DOCS if none exist. */ - public boolean skipTo(int target) throws IOException { - // first scan in cache - for (pointer++; pointer < pointerMax; pointer++) { - if (docs[pointer] >= target) { - doc = docs[pointer]; - return true; - } - } - - // not found in cache, seek underlying stream - boolean result = termDocs.skipTo(target); - if (result) { - pointerMax = 1; - pointer = 0; - docs[pointer] = doc = termDocs.doc(); - freqs[pointer] = termDocs.freq(); - } else { - doc = Integer.MAX_VALUE; - } - return result; + @Override + public int advance(int target) throws IOException { + return docsEnum.advance(target); } - - /** Returns an explanation of the score for a document. - *
    When this method is used, the {@link #next()} method - * and the {@link #score(HitCollector)} method should not be used. - * @param doc The document number for the explanation. - */ - public Explanation explain(int doc) throws IOException { - TermQuery query = (TermQuery)weight.getQuery(); - Explanation tfExplanation = new Explanation(); - int tf = 0; - while (pointer < pointerMax) { - if (docs[pointer] == doc) - tf = freqs[pointer]; - pointer++; - } - if (tf == 0) { - if (termDocs.skipTo(doc)) - { - if (termDocs.doc() == doc) - { - tf = termDocs.freq(); - } - } - } - termDocs.close(); - tfExplanation.setValue(getSimilarity().tf(tf)); - tfExplanation.setDescription("tf(termFreq("+query.getTerm()+")="+tf+")"); - - return tfExplanation; + + @Override + public long cost() { + return docsEnum.cost(); } /** Returns a string representation of this TermScorer. */ + @Override public String toString() { return "scorer(" + weight + ")"; } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TermStatistics.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TimeLimitedCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TimeLimitingCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TopDocCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/TopDocs.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/TopDocs.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/TopDocs.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/TopDocs.java 16 Dec 2014 11:31:49 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,32 +17,263 @@ * limitations under the License. */ -/** Expert: Returned by low-level search implementations. - * @see Searcher#search(Query,Filter,int) */ -public class TopDocs implements java.io.Serializable { - /** Expert: The total number of hits for the query. - * @see Hits#length() - */ +import org.apache.lucene.util.PriorityQueue; + +import java.io.IOException; + +/** Represents hits returned by {@link + * IndexSearcher#search(Query,Filter,int)} and {@link + * IndexSearcher#search(Query,int)}. */ +public class TopDocs { + + /** The total number of hits for the query. */ public int totalHits; - /** Expert: The top hits for the query. */ + + /** The top hits for the query. */ public ScoreDoc[] scoreDocs; - /** Expert: Stores the maximum score value encountered, needed for normalizing. */ + + /** Stores the maximum score value encountered, needed for normalizing. */ private float maxScore; - /** Expert: Returns the maximum score value encountered. */ + /** + * Returns the maximum score value encountered. Note that in case + * scores are not tracked, this returns {@link Float#NaN}. + */ public float getMaxScore() { - return maxScore; + return maxScore; } - /** Expert: Sets the maximum score value encountered. */ + /** Sets the maximum score value encountered. */ public void setMaxScore(float maxScore) { - this.maxScore=maxScore; + this.maxScore = maxScore; } - - /** Expert: Constructs a TopDocs.*/ + + /** Constructs a TopDocs with a default maxScore=Float.NaN. */ + TopDocs(int totalHits, ScoreDoc[] scoreDocs) { + this(totalHits, scoreDocs, Float.NaN); + } + public TopDocs(int totalHits, ScoreDoc[] scoreDocs, float maxScore) { this.totalHits = totalHits; this.scoreDocs = scoreDocs; this.maxScore = maxScore; } + + // Refers to one hit: + private static class ShardRef { + // Which shard (index into shardHits[]): + final int shardIndex; + + // Which hit within the shard: + int hitIndex; + + public ShardRef(int shardIndex) { + this.shardIndex = shardIndex; + } + + @Override + public String toString() { + return "ShardRef(shardIndex=" + shardIndex + " hitIndex=" + hitIndex + ")"; + } + }; + + // Specialized MergeSortQueue that just merges by + // relevance score, descending: + private static class ScoreMergeSortQueue extends PriorityQueue { + final ScoreDoc[][] shardHits; + + public ScoreMergeSortQueue(TopDocs[] shardHits) { + super(shardHits.length); + this.shardHits = new ScoreDoc[shardHits.length][]; + for(int shardIDX=0;shardIDX secondScore) { + return true; + } else { + // Tie break: earlier shard wins + if (first.shardIndex < second.shardIndex) { + return true; + } else if (first.shardIndex > second.shardIndex) { + return false; + } else { + // Tie break in same shard: resolve however the + // shard had resolved it: + assert first.hitIndex != second.hitIndex; + return first.hitIndex < second.hitIndex; + } + } + } + } + + @SuppressWarnings({"rawtypes","unchecked"}) + private static class MergeSortQueue extends PriorityQueue { + // These are really FieldDoc instances: + final ScoreDoc[][] shardHits; + final FieldComparator[] comparators; + final int[] reverseMul; + + public MergeSortQueue(Sort sort, TopDocs[] shardHits) throws IOException { + super(shardHits.length); + this.shardHits = new ScoreDoc[shardHits.length][]; + for(int shardIDX=0;shardIDX second.shardIndex) { + //System.out.println(" return tb false"); + return false; + } else { + // Tie break in same shard: resolve however the + // shard had resolved it: + //System.out.println(" return tb " + (first.hitIndex < second.hitIndex)); + assert first.hitIndex != second.hitIndex; + return first.hitIndex < second.hitIndex; + } + } + } + + /** Returns a new TopDocs, containing topN results across + * the provided TopDocs, sorting by the specified {@link + * Sort}. Each of the TopDocs must have been sorted by + * the same Sort, and sort field values must have been + * filled (ie, fillFields=true must be + * passed to {@link + * TopFieldCollector#create}. + * + *

    Pass sort=null to merge sort by score descending. + * + * @lucene.experimental */ + public static TopDocs merge(Sort sort, int topN, TopDocs[] shardHits) throws IOException { + return merge(sort, 0, topN, shardHits); + } + + /** + * Same as {@link #merge(Sort, int, TopDocs[])} but also slices the result at the same time based + * on the provided start and size. The return TopDocs will always have a scoreDocs with length of at most size. + */ + public static TopDocs merge(Sort sort, int start, int size, TopDocs[] shardHits) throws IOException { + final PriorityQueue queue; + if (sort == null) { + queue = new ScoreMergeSortQueue(shardHits); + } else { + queue = new MergeSortQueue(sort, shardHits); + } + + int totalHitCount = 0; + int availHitCount = 0; + float maxScore = Float.MIN_VALUE; + for(int shardIDX=0;shardIDX 0) { + availHitCount += shard.scoreDocs.length; + queue.add(new ShardRef(shardIDX)); + maxScore = Math.max(maxScore, shard.getMaxScore()); + //System.out.println(" maxScore now " + maxScore + " vs " + shard.getMaxScore()); + } + } + + if (availHitCount == 0) { + maxScore = Float.NaN; + } + + final ScoreDoc[] hits; + if (availHitCount <= start) { + hits = new ScoreDoc[0]; + } else { + hits = new ScoreDoc[Math.min(size, availHitCount - start)]; + int requestedResultWindow = start + size; + int numIterOnHits = Math.min(availHitCount, requestedResultWindow); + int hitUpto = 0; + while (hitUpto < numIterOnHits) { + assert queue.size() > 0; + ShardRef ref = queue.pop(); + final ScoreDoc hit = shardHits[ref.shardIndex].scoreDocs[ref.hitIndex++]; + hit.shardIndex = ref.shardIndex; + if (hitUpto >= start) { + hits[hitUpto - start] = hit; + } + + //System.out.println(" hitUpto=" + hitUpto); + //System.out.println(" doc=" + hits[hitUpto].doc + " score=" + hits[hitUpto].score); + + hitUpto++; + + if (ref.hitIndex < shardHits[ref.shardIndex].scoreDocs.length) { + // Not done with this these TopDocs yet: + queue.add(ref); + } + } + } + + if (sort == null) { + return new TopDocs(totalHitCount, hits, maxScore); + } else { + return new TopFieldDocs(totalHitCount, hits, sort.getSort(), maxScore); + } + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TopDocsCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TopFieldCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TopFieldDocCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/TopFieldDocs.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/TopFieldDocs.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/TopFieldDocs.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/TopFieldDocs.java 16 Dec 2014 11:31:47 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,29 +18,22 @@ */ -/** - * Expert: Returned by low-level sorted search implementations. - * - *

    Created: Feb 12, 2004 8:58:46 AM - * - * @since lucene 1.4 - * @version $Id$ - * @see Searcher#search(Query,Filter,int,Sort) +/** Represents hits returned by {@link + * IndexSearcher#search(Query,Filter,int,Sort)}. */ -public class TopFieldDocs -extends TopDocs { +public class TopFieldDocs extends TopDocs { - /** The fields which were used to sort results by. */ - public SortField[] fields; + /** The fields which were used to sort results by. */ + public SortField[] fields; - /** Creates one of these objects. - * @param totalHits Total number of hits for the query. - * @param scoreDocs The top hits for the query. - * @param fields The sort criteria used to find the top hits. - * @param maxScore The maximum score encountered. - */ - TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) { - super (totalHits, scoreDocs, maxScore); - this.fields = fields; - } + /** Creates one of these objects. + * @param totalHits Total number of hits for the query. + * @param scoreDocs The top hits for the query. + * @param fields The sort criteria used to find the top hits. + * @param maxScore The maximum score encountered. + */ + public TopFieldDocs (int totalHits, ScoreDoc[] scoreDocs, SortField[] fields, float maxScore) { + super (totalHits, scoreDocs, maxScore); + this.fields = fields; + } } \ No newline at end of file Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TopScoreDocCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TopTermsRewrite.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/TotalHitCountCollector.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/Weight.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/Weight.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/Weight.java 17 Aug 2012 14:54:55 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/Weight.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,44 +19,194 @@ import java.io.IOException; -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReader; // javadocs +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReaderContext; // javadocs +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.util.Bits; -/** Expert: Calculate query weights and build query scorers. +/** + * Expert: Calculate query weights and build query scorers. *

    - * The purpose of Weight is to make it so that searching does not modify - * a Query, so that a Query instance can be reused.
    - * Searcher dependent state of the query should reside in the Weight.
    - * IndexReader dependent state should reside in the Scorer. + * The purpose of {@link Weight} is to ensure searching does not modify a + * {@link Query}, so that a {@link Query} instance can be reused.
    + * {@link IndexSearcher} dependent state of the query should reside in the + * {@link Weight}.
    + * {@link AtomicReader} dependent state should reside in the {@link Scorer}. *

    + * Since {@link Weight} creates {@link Scorer} instances for a given + * {@link AtomicReaderContext} ({@link #scorer(AtomicReaderContext, Bits)}) + * callers must maintain the relationship between the searcher's top-level + * {@link IndexReaderContext} and the context used to create a {@link Scorer}. + *

    * A Weight is used in the following way: *

      - *
    1. A Weight is constructed by a top-level query, - * given a Searcher ({@link Query#createWeight(Searcher)}). - *
    2. The {@link #sumOfSquaredWeights()} method is called - * on the Weight to compute - * the query normalization factor {@link Similarity#queryNorm(float)} - * of the query clauses contained in the query. - *
    3. The query normalization factor is passed to {@link #normalize(float)}. - * At this point the weighting is complete. - *
    4. A Scorer is constructed by {@link #scorer(IndexReader)}. + *
    5. A Weight is constructed by a top-level query, given a + * IndexSearcher ({@link Query#createWeight(IndexSearcher)}). + *
    6. The {@link #getValueForNormalization()} method is called on the + * Weight to compute the query normalization factor + * {@link Similarity#queryNorm(float)} of the query clauses contained in the + * query. + *
    7. The query normalization factor is passed to {@link #normalize(float, float)}. At + * this point the weighting is complete. + *
    8. A Scorer is constructed by + * {@link #scorer(AtomicReaderContext, Bits)}. *
    + * + * @since 2.9 */ -public interface Weight extends java.io.Serializable { +public abstract class Weight { + + /** + * An explanation of the score computation for the named document. + * + * @param context the readers context to create the {@link Explanation} for. + * @param doc the document's id relative to the given context's reader + * @return an Explanation for the score + * @throws IOException if an {@link IOException} occurs + */ + public abstract Explanation explain(AtomicReaderContext context, int doc) throws IOException; + /** The query that this concerns. */ - Query getQuery(); + public abstract Query getQuery(); + + /** The value for normalization of contained query clauses (e.g. sum of squared weights). */ + public abstract float getValueForNormalization() throws IOException; - /** The weight for this query. */ - float getValue(); + /** Assigns the query normalization factor and boost from parent queries to this. */ + public abstract void normalize(float norm, float topLevelBoost); - /** The sum of squared weights of contained query clauses. */ - float sumOfSquaredWeights() throws IOException; + /** + * Returns a {@link Scorer} which scores documents in/out-of order according + * to scoreDocsInOrder. + *

    + * NOTE: even if scoreDocsInOrder is false, it is + * recommended to check whether the returned Scorer indeed scores + * documents out of order (i.e., call {@link #scoresDocsOutOfOrder()}), as + * some Scorer implementations will always return documents + * in-order.
    + * NOTE: null can be returned if no documents will be scored by this + * query. + * + * @param context + * the {@link AtomicReaderContext} for which to return the {@link Scorer}. + * @param acceptDocs + * Bits that represent the allowable docs to match (typically deleted docs + * but possibly filtering other documents) + * + * @return a {@link Scorer} which scores documents in/out-of order. + * @throws IOException if there is a low-level I/O error + */ + public abstract Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException; - /** Assigns the query normalization factor to this. */ - void normalize(float norm); + /** + * Optional method, to return a {@link BulkScorer} to + * score the query and send hits to a {@link Collector}. + * Only queries that have a different top-level approach + * need to override this; the default implementation + * pulls a normal {@link Scorer} and iterates and + * collects the resulting hits. + * + * @param context + * the {@link AtomicReaderContext} for which to return the {@link Scorer}. + * @param scoreDocsInOrder + * specifies whether in-order scoring of documents is required. Note + * that if set to false (i.e., out-of-order scoring is required), + * this method can return whatever scoring mode it supports, as every + * in-order scorer is also an out-of-order one. However, an + * out-of-order scorer may not support {@link Scorer#nextDoc()} + * and/or {@link Scorer#advance(int)}, therefore it is recommended to + * request an in-order scorer if use of these + * methods is required. + * @param acceptDocs + * Bits that represent the allowable docs to match (typically deleted docs + * but possibly filtering other documents) + * + * @return a {@link BulkScorer} which scores documents and + * passes them to a collector. + * @throws IOException if there is a low-level I/O error + */ + public BulkScorer bulkScorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs) throws IOException { - /** Constructs a scorer for this. */ - Scorer scorer(IndexReader reader) throws IOException; + Scorer scorer = scorer(context, acceptDocs); + if (scorer == null) { + // No docs match + return null; + } - /** An explanation of the score computation for the named document. */ - Explanation explain(IndexReader reader, int doc) throws IOException; + // This impl always scores docs in order, so we can + // ignore scoreDocsInOrder: + return new DefaultBulkScorer(scorer); + } + + /** Just wraps a Scorer and performs top scoring using it. */ + static class DefaultBulkScorer extends BulkScorer { + private final Scorer scorer; + + public DefaultBulkScorer(Scorer scorer) { + if (scorer == null) { + throw new NullPointerException(); + } + this.scorer = scorer; + } + + @Override + public boolean score(Collector collector, int max) throws IOException { + // TODO: this may be sort of weird, when we are + // embedded in a BooleanScorer, because we are + // called for every chunk of 2048 documents. But, + // then, scorer is a FakeScorer in that case, so any + // Collector doing something "interesting" in + // setScorer will be forced to use BS2 anyways: + collector.setScorer(scorer); + if (max == DocIdSetIterator.NO_MORE_DOCS) { + scoreAll(collector, scorer); + return false; + } else { + int doc = scorer.docID(); + if (doc < 0) { + doc = scorer.nextDoc(); + } + return scoreRange(collector, scorer, doc, max); + } + } + + /** Specialized method to bulk-score a range of hits; we + * separate this from {@link #scoreAll} to help out + * hotspot. + * See LUCENE-5487 */ + static boolean scoreRange(Collector collector, Scorer scorer, int currentDoc, int end) throws IOException { + while (currentDoc < end) { + collector.collect(currentDoc); + currentDoc = scorer.nextDoc(); + } + return currentDoc != DocIdSetIterator.NO_MORE_DOCS; + } + + /** Specialized method to bulk-score all hits; we + * separate this from {@link #scoreRange} to help out + * hotspot. + * See LUCENE-5487 */ + static void scoreAll(Collector collector, Scorer scorer) throws IOException { + int doc; + while ((doc = scorer.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS) { + collector.collect(doc); + } + } + } + + /** + * Returns true iff this implementation scores docs only out of order. This + * method is used in conjunction with {@link Collector}'s + * {@link Collector#acceptsDocsOutOfOrder() acceptsDocsOutOfOrder} and + * {@link #bulkScorer(AtomicReaderContext, boolean, Bits)} to + * create a matching {@link Scorer} instance for a given {@link Collector}, or + * vice versa. + *

    + * NOTE: the default implementation returns false, i.e. + * the Scorer scores documents in-order. + */ + public boolean scoresDocsOutOfOrder() { + return false; + } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/WildcardQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/WildcardQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/WildcardQuery.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/WildcardQuery.java 16 Dec 2014 11:31:48 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,43 +17,100 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import java.util.ArrayList; +import java.util.List; + import org.apache.lucene.index.Term; -import java.io.IOException; +import org.apache.lucene.util.ToStringUtils; +import org.apache.lucene.util.automaton.Automata; +import org.apache.lucene.util.automaton.Operations; +import org.apache.lucene.util.automaton.Automaton; /** Implements the wildcard search query. Supported wildcards are *, which * matches any character sequence (including the empty one), and ?, - * which matches any single character. Note this query can be slow, as it + * which matches any single character. '\' is the escape character. + *

    + * Note this query can be slow, as it * needs to iterate over many terms. In order to prevent extremely slow WildcardQueries, - * a Wildcard term should not start with one of the wildcards * or - * ?. + * a Wildcard term should not start with the wildcard * * - * @see WildcardTermEnum + *

    This query uses the {@link + * MultiTermQuery#CONSTANT_SCORE_AUTO_REWRITE_DEFAULT} + * rewrite method. + * + * @see AutomatonQuery */ -public class WildcardQuery extends MultiTermQuery { - private boolean termContainsWildcard; - - public WildcardQuery(Term term) { - super(term); - this.termContainsWildcard = (term.text().indexOf('*') != -1) || (term.text().indexOf('?') != -1); - } +public class WildcardQuery extends AutomatonQuery { + /** String equality with support for wildcards */ + public static final char WILDCARD_STRING = '*'; - protected FilteredTermEnum getEnum(IndexReader reader) throws IOException { - return new WildcardTermEnum(reader, getTerm()); - } + /** Char equality with support for wildcards */ + public static final char WILDCARD_CHAR = '?'; - public boolean equals(Object o) { - if (o instanceof WildcardQuery) - return super.equals(o); - - return false; + /** Escape character */ + public static final char WILDCARD_ESCAPE = '\\'; + + /** + * Constructs a query for terms matching term. + */ + public WildcardQuery(Term term) { + super(term, toAutomaton(term)); } - public Query rewrite(IndexReader reader) throws IOException { - if (this.termContainsWildcard) { - return super.rewrite(reader); + /** + * Convert Lucene wildcard syntax into an automaton. + * @lucene.internal + */ + @SuppressWarnings("fallthrough") + public static Automaton toAutomaton(Term wildcardquery) { + List automata = new ArrayList<>(); + + String wildcardText = wildcardquery.text(); + + for (int i = 0; i < wildcardText.length();) { + final int c = wildcardText.codePointAt(i); + int length = Character.charCount(c); + switch(c) { + case WILDCARD_STRING: + automata.add(Automata.makeAnyString()); + break; + case WILDCARD_CHAR: + automata.add(Automata.makeAnyChar()); + break; + case WILDCARD_ESCAPE: + // add the next codepoint instead, if it exists + if (i + length < wildcardText.length()) { + final int nextChar = wildcardText.codePointAt(i + length); + length += Character.charCount(nextChar); + automata.add(Automata.makeChar(nextChar)); + break; + } // else fallthru, lenient parsing with a trailing \ + default: + automata.add(Automata.makeChar(c)); } - - return new TermQuery(getTerm()); + i += length; + } + + return Operations.concatenate(automata); } + + /** + * Returns the pattern term. + */ + public Term getTerm() { + return term; + } + + /** Prints a user-readable version of this query. */ + @Override + public String toString(String field) { + StringBuilder buffer = new StringBuilder(); + if (!getField().equals(field)) { + buffer.append(getField()); + buffer.append(":"); + } + buffer.append(term.text()); + buffer.append(ToStringUtils.boost(getBoost())); + return buffer.toString(); + } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/WildcardTermEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/package.html 17 Aug 2012 14:54:56 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/package.html 16 Dec 2014 11:31:50 -0000 1.1.2.1 @@ -18,8 +18,6 @@ - - Code to search indices. @@ -29,346 +27,559 @@

    1. Search Basics
    2. The Query Classes
    3. -
    4. Changing the Scoring
    5. +
    6. Scoring: Introduction
    7. +
    8. Scoring: Basics
    9. +
    10. Changing the Scoring
    11. +
    12. Appendix: Search Algorithm

    - -

    Search

    -

    -Search over indices. -Applications usually call {@link -org.apache.lucene.search.Searcher#search(Query)} or {@link -org.apache.lucene.search.Searcher#search(Query,Filter)}. + +

    Search Basics

    +

    +Lucene offers a wide variety of {@link org.apache.lucene.search.Query} implementations, most of which are in +this package, its subpackages ({@link org.apache.lucene.search.spans spans}, {@link org.apache.lucene.search.payloads payloads}), +or the queries module. These implementations can be combined in a wide +variety of ways to provide complex querying capabilities along with information about where matches took place in the document +collection. The Query Classes section below highlights some of the more important Query classes. For details +on implementing your own Query class, see Custom Queries -- Expert Level below. +

    +

    +To perform a search, applications usually call {@link +org.apache.lucene.search.IndexSearcher#search(Query,int)} or {@link +org.apache.lucene.search.IndexSearcher#search(Query,Filter,int)}. +

    +

    +Once a Query has been created and submitted to the {@link org.apache.lucene.search.IndexSearcher IndexSearcher}, the scoring +process begins. After some infrastructure setup, control finally passes to the {@link org.apache.lucene.search.Weight Weight} +implementation and its {@link org.apache.lucene.search.Scorer Scorer} or {@link org.apache.lucene.search.BulkScorer BulkScore} +instances. See the Algorithm section for more notes on the process. +

    +

    + +

    Query Classes

    - TermQuery + {@link org.apache.lucene.search.TermQuery TermQuery}

    Of the various implementations of - Query, the - TermQuery - is the easiest to understand and the most often used in applications. A TermQuery matches all the documents that contain the + {@link org.apache.lucene.search.Query Query}, the + {@link org.apache.lucene.search.TermQuery TermQuery} + is the easiest to understand and the most often used in applications. A + {@link org.apache.lucene.search.TermQuery TermQuery} matches all the documents that contain the specified - Term, + {@link org.apache.lucene.index.Term Term}, which is a word that occurs in a certain - Field. - Thus, a TermQuery identifies and scores all - Documents that have a Field with the specified string in it. - Constructing a TermQuery + {@link org.apache.lucene.document.Field Field}. + Thus, a {@link org.apache.lucene.search.TermQuery TermQuery} identifies and scores all + {@link org.apache.lucene.document.Document Document}s that have a + {@link org.apache.lucene.document.Field Field} with the specified string in it. + Constructing a {@link org.apache.lucene.search.TermQuery TermQuery} is as simple as: -

    +    
             TermQuery tq = new TermQuery(new Term("fieldName", "term"));
    -    
    In this example, the Query identifies all Documents that have the Field named "fieldName" +
    In this example, the {@link org.apache.lucene.search.Query Query} identifies all + {@link org.apache.lucene.document.Document Document}s that have the + {@link org.apache.lucene.document.Field Field} named "fieldName" containing the word "term".

    - BooleanQuery + {@link org.apache.lucene.search.BooleanQuery BooleanQuery}

    Things start to get interesting when one combines multiple - TermQuery instances into a BooleanQuery. - A BooleanQuery contains multiple - BooleanClauses, - where each clause contains a sub-query (Query - instance) and an operator (from BooleanClause.Occur) + {@link org.apache.lucene.search.TermQuery TermQuery} instances into a + {@link org.apache.lucene.search.BooleanQuery BooleanQuery}. + A {@link org.apache.lucene.search.BooleanQuery BooleanQuery} contains multiple + {@link org.apache.lucene.search.BooleanClause BooleanClause}s, + where each clause contains a sub-query ({@link org.apache.lucene.search.Query Query} + instance) and an operator (from + {@link org.apache.lucene.search.BooleanClause.Occur BooleanClause.Occur}) describing how that sub-query is combined with the other clauses:

      -
    1. SHOULD — Use this operator when a clause can occur in the result set, but is not required. +

    2. {@link org.apache.lucene.search.BooleanClause.Occur#SHOULD SHOULD} — Use this operator when a clause can occur in the result set, but is not required. If a query is made up of all SHOULD clauses, then every document in the result set matches at least one of these clauses.

    3. -
    4. MUST — Use this operator when a clause is required to occur in the result set. Every +

    5. {@link org.apache.lucene.search.BooleanClause.Occur#MUST MUST} — Use this operator when a clause is required to occur in the result set. Every document in the result set will match all such clauses.

    6. -
    7. MUST NOT — Use this operator when a +

    8. {@link org.apache.lucene.search.BooleanClause.Occur#MUST_NOT MUST NOT} — Use this operator when a clause must not occur in the result set. No document in the result set will match any such clauses.

    Boolean queries are constructed by adding two or more - BooleanClause - instances. If too many clauses are added, a TooManyClauses + {@link org.apache.lucene.search.BooleanClause BooleanClause} + instances. If too many clauses are added, a {@link org.apache.lucene.search.BooleanQuery.TooManyClauses TooManyClauses} exception will be thrown during searching. This most often occurs - when a Query - is rewritten into a BooleanQuery with many - TermQuery clauses, - for example by WildcardQuery. + when a {@link org.apache.lucene.search.Query Query} + is rewritten into a {@link org.apache.lucene.search.BooleanQuery BooleanQuery} with many + {@link org.apache.lucene.search.TermQuery TermQuery} clauses, + for example by {@link org.apache.lucene.search.WildcardQuery WildcardQuery}. The default setting for the maximum number of clauses 1024, but this can be changed via the - static method setMaxClauseCount - in BooleanQuery. + static method {@link org.apache.lucene.search.BooleanQuery#setMaxClauseCount(int)}.

    Phrases

    Another common search is to find documents containing certain phrases. This - is handled two different ways: + is handled three different ways:

    1. -

      PhraseQuery +

      {@link org.apache.lucene.search.PhraseQuery PhraseQuery} — Matches a sequence of - Terms. - PhraseQuery uses a slop factor to determine - how many positions may occur between any two terms in the phrase and still be considered a match.

      + {@link org.apache.lucene.index.Term Term}s. + {@link org.apache.lucene.search.PhraseQuery PhraseQuery} uses a slop factor to determine + how many positions may occur between any two terms in the phrase and still be considered a match. + The slop is 0 by default, meaning the phrase must match exactly.

    2. -

      SpanNearQuery +

      {@link org.apache.lucene.search.MultiPhraseQuery MultiPhraseQuery} + — A more general form of PhraseQuery that accepts multiple Terms + for a position in the phrase. For example, this can be used to perform phrase queries that also + incorporate synonyms. +

    3. +
    4. +

      {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} — Matches a sequence of other - SpanQuery - instances. SpanNearQuery allows for + {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} + instances. {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} allows for much more - complicated phrase queries since it is constructed from other SpanQuery - instances, instead of only TermQuery + complicated phrase queries since it is constructed from other + {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} + instances, instead of only {@link org.apache.lucene.search.TermQuery TermQuery} instances.

    +

    - RangeQuery + {@link org.apache.lucene.search.TermRangeQuery TermRangeQuery}

    The - RangeQuery + {@link org.apache.lucene.search.TermRangeQuery TermRangeQuery} matches all documents that occur in the exclusive range of a lower - Term + {@link org.apache.lucene.index.Term Term} and an upper - Term. + {@link org.apache.lucene.index.Term Term} + according to {@link org.apache.lucene.index.TermsEnum#getComparator TermsEnum.getComparator()}. It is not intended + for numerical ranges; use {@link org.apache.lucene.search.NumericRangeQuery NumericRangeQuery} instead. + For example, one could find all documents - that have terms beginning with the letters a through c. This type of Query is frequently used to - find - documents that occur in a specific date range. + that have terms beginning with the letters a through c.

    +

    - PrefixQuery, - WildcardQuery + {@link org.apache.lucene.search.NumericRangeQuery NumericRangeQuery}

    +

    The + {@link org.apache.lucene.search.NumericRangeQuery NumericRangeQuery} + matches all documents that occur in a numeric range. + For NumericRangeQuery to work, you must index the values + using a one of the numeric fields ({@link org.apache.lucene.document.IntField IntField}, + {@link org.apache.lucene.document.LongField LongField}, {@link org.apache.lucene.document.FloatField FloatField}, + or {@link org.apache.lucene.document.DoubleField DoubleField}). +

    + +

    + {@link org.apache.lucene.search.PrefixQuery PrefixQuery}, + {@link org.apache.lucene.search.WildcardQuery WildcardQuery}, + {@link org.apache.lucene.search.RegexpQuery RegexpQuery} +

    +

    While the - PrefixQuery + {@link org.apache.lucene.search.PrefixQuery PrefixQuery} has a different implementation, it is essentially a special case of the - WildcardQuery. - The PrefixQuery allows an application - to identify all documents with terms that begin with a certain string. The WildcardQuery generalizes this by allowing + {@link org.apache.lucene.search.WildcardQuery WildcardQuery}. + The {@link org.apache.lucene.search.PrefixQuery PrefixQuery} allows an application + to identify all documents with terms that begin with a certain string. The + {@link org.apache.lucene.search.WildcardQuery WildcardQuery} generalizes this by allowing for the use of * (matches 0 or more characters) and ? (matches exactly one character) wildcards. - Note that the WildcardQuery can be quite slow. Also + Note that the {@link org.apache.lucene.search.WildcardQuery WildcardQuery} can be quite slow. Also note that - WildcardQuery should + {@link org.apache.lucene.search.WildcardQuery WildcardQuery} should not start with * and ?, as these are extremely slow. - To remove this protection and allow a wildcard at the beginning of a term, see method - setAllowLeadingWildcard in - QueryParser. + Some QueryParsers may not allow this by default, but provide a setAllowLeadingWildcard method + to remove that protection. + The {@link org.apache.lucene.search.RegexpQuery RegexpQuery} is even more general than WildcardQuery, + allowing an application to identify all documents with terms that match a regular expression pattern.

    - FuzzyQuery + {@link org.apache.lucene.search.FuzzyQuery FuzzyQuery}

    A - FuzzyQuery + {@link org.apache.lucene.search.FuzzyQuery FuzzyQuery} matches documents that contain terms similar to the specified term. Similarity is determined using Levenshtein (edit) distance. This type of query can be useful when accounting for spelling variations in the collection.

    - -

    Changing Similarity

    -

    Chances are DefaultSimilarity is sufficient for all - your searching needs. - However, in some applications it may be necessary to customize your Similarity implementation. For instance, some - applications do not need to - distinguish between shorter and longer documents (see a "fair" similarity).

    -

    To change Similarity, one must do so for both indexing and - searching, and the changes must happen before - either of these actions take place. Although in theory there is nothing stopping you from changing mid-stream, it - just isn't well-defined what is going to happen. + +

    Scoring — Introduction

    +

    Lucene scoring is the heart of why we all love Lucene. It is blazingly fast and it hides + almost all of the complexity from the user. In a nutshell, it works. At least, that is, + until it doesn't work, or doesn't work as one would expect it to work. Then we are left + digging into Lucene internals or asking for help on + java-user@lucene.apache.org to figure out + why a document with five of our query terms scores lower than a different document with + only one of the query terms.

    +

    While this document won't answer your specific scoring issues, it will, hopefully, point you + to the places that can help you figure out the what and why of Lucene scoring. +

    +

    Lucene scoring supports a number of pluggable information retrieval + models, including: +

    + These models can be plugged in via the {@link org.apache.lucene.search.similarities Similarity API}, + and offer extension hooks and parameters for tuning. In general, Lucene first finds the documents + that need to be scored based on boolean logic in the Query specification, and then ranks this subset of + matching documents via the retrieval model. For some valuable references on VSM and IR in general refer to + Lucene Wiki IR references. +

    +

    The rest of this document will cover Scoring basics and explain how to + change your {@link org.apache.lucene.search.similarities.Similarity Similarity}. Next, it will cover + ways you can customize the lucene internals in + Custom Queries -- Expert Level, which gives details on + implementing your own {@link org.apache.lucene.search.Query Query} class and related functionality. + Finally, we will finish up with some reference material in the Appendix. +

    -

    To make this change, implement your own Similarity (likely - you'll want to simply subclass - DefaultSimilarity) and then use the new - class by calling - IndexWriter.setSimilarity - before indexing and - Searcher.setSimilarity - before searching. + + +

    Scoring — Basics

    +

    Scoring is very much dependent on the way documents are indexed, so it is important to understand + indexing. (see Lucene overview + before continuing on with this section) Be sure to use the useful + {@link org.apache.lucene.search.IndexSearcher#explain(org.apache.lucene.search.Query, int) IndexSearcher.explain(Query, doc)} + to understand how the score for a certain matching document was + computed. + +

    Generally, the Query determines which documents match (a binary + decision), while the Similarity determines how to assign scores to + the matching documents. +

    +

    Fields and Documents

    +

    In Lucene, the objects we are scoring are {@link org.apache.lucene.document.Document Document}s. + A Document is a collection of {@link org.apache.lucene.document.Field Field}s. Each Field has + {@link org.apache.lucene.document.FieldType semantics} about how it is created and stored + ({@link org.apache.lucene.document.FieldType#tokenized() tokenized}, + {@link org.apache.lucene.document.FieldType#stored() stored}, etc). It is important to note that + Lucene scoring works on Fields and then combines the results to return Documents. This is + important because two Documents with the exact same content, but one having the content in two + Fields and the other in one Field may return different scores for the same query due to length + normalization. +

    +

    Score Boosting

    +

    Lucene allows influencing search results by "boosting" at different times: +

      +
    • Index-time boost by calling + {@link org.apache.lucene.document.Field#setBoost(float) Field.setBoost()} before a document is + added to the index.
    • +
    • Query-time boost by setting a boost on a query clause, calling + {@link org.apache.lucene.search.Query#setBoost(float) Query.setBoost()}.
    • +
    +

    +

    Indexing time boosts are pre-processed for storage efficiency and written to + storage for a field as follows: +

      +
    • All boosts of that field (i.e. all boosts under the same field name in that doc) are + multiplied.
    • +
    • The boost is then encoded into a normalization value by the Similarity + object at index-time: {@link org.apache.lucene.search.similarities.Similarity#computeNorm computeNorm()}. + The actual encoding depends upon the Similarity implementation, but note that most + use a lossy encoding (such as multiplying the boost with document length or similar, packed + into a single byte!).
    • +
    • Decoding of any index-time normalization values and integration into the document's score is also performed + at search time by the Similarity.
    • +
    +

    + + +

    Changing Scoring — Similarity

    - If you are interested in use cases for changing your similarity, see the Lucene users's mailing list at Overriding Similarity. - In summary, here are a few use cases: -

      -
    1. SweetSpotSimilaritySweetSpotSimilarity gives small increases - as the frequency increases a small amount - and then greater increases when you hit the "sweet spot", i.e. where you think the frequency of terms is - more significant.

    2. -
    3. Overriding tf — In some applications, it doesn't matter what the score of a document is as long as a - matching term occurs. In these - cases people have overridden Similarity to return 1 from the tf() method.

    4. -
    5. Changing Length Normalization — By overriding lengthNorm, - it is possible to discount how the length of a field contributes - to a score. In DefaultSimilarity, - lengthNorm = 1 / (numTerms in field)^0.5, but if one changes this to be - 1 / (numTerms in field), all fields will be treated - "fairly".

    6. -
    - In general, Chris Hostetter sums it up best in saying (from the Lucene users's mailing list): -
    [One would override the Similarity in] ... any situation where you know more about your data then just - that - it's "text" is a situation where it *might* make sense to to override your - Similarity method.
    +Changing {@link org.apache.lucene.search.similarities.Similarity Similarity} is an easy way to +influence scoring, this is done at index-time with +{@link org.apache.lucene.index.IndexWriterConfig#setSimilarity(org.apache.lucene.search.similarities.Similarity) + IndexWriterConfig.setSimilarity(Similarity)} and at query-time with +{@link org.apache.lucene.search.IndexSearcher#setSimilarity(org.apache.lucene.search.similarities.Similarity) + IndexSearcher.setSimilarity(Similarity)}. Be sure to use the same +Similarity at query-time as at index-time (so that norms are +encoded/decoded correctly); Lucene makes no effort to verify this.

    - -

    Changing Scoring — Expert Level

    +

    +You can influence scoring by configuring a different built-in Similarity implementation, or by tweaking its +parameters, subclassing it to override behavior. Some implementations also offer a modular API which you can +extend by plugging in a different component (e.g. term frequency normalizer). +

    +

    +Finally, you can extend the low level {@link org.apache.lucene.search.similarities.Similarity Similarity} directly +to implement a new retrieval model, or to use external scoring factors particular to your application. For example, +a custom Similarity can access per-document values via {@link org.apache.lucene.search.FieldCache FieldCache} or +{@link org.apache.lucene.index.NumericDocValues} and integrate them into the score. +

    +

    +See the {@link org.apache.lucene.search.similarities} package documentation for information +on the built-in available scoring models and extending or changing Similarity. +

    -

    Changing scoring is an expert level task, so tread carefully and be prepared to share your code if + + +

    Custom Queries — Expert Level

    + +

    Custom queries are an expert level task, so tread carefully and be prepared to share your code if you want help.

    With the warning out of the way, it is possible to change a lot more than just the Similarity - when it comes to scoring in Lucene. Lucene's scoring is a complex mechanism that is grounded by - three main classes: + when it comes to matching and scoring in Lucene. Lucene's search is a complex mechanism that is grounded by + three main classes:

    1. - Query — The abstract object representation of the + {@link org.apache.lucene.search.Query Query} — The abstract object representation of the user's information need.
    2. - Weight — The internal interface representation of - the user's Query, so that Query objects may be reused.
    3. + {@link org.apache.lucene.search.Weight Weight} — The internal interface representation of + the user's Query, so that Query objects may be reused. + This is global (across all segments of the index) and + generally will require global statistics (such as docFreq + for a given term across all segments).
    4. - Scorer — An abstract class containing common - functionality for scoring. Provides both scoring and explanation capabilities.
    5. + {@link org.apache.lucene.search.Scorer Scorer} — An abstract class containing common + functionality for scoring. Provides both scoring and + explanation capabilities. This is created per-segment. +
    6. + {@link org.apache.lucene.search.BulkScorer BulkScorer} — An abstract class that scores + a range of documents. A default implementation simply iterates through the hits from + {@link org.apache.lucene.search.Scorer Scorer}, but some queries such as + {@link org.apache.lucene.search.BooleanQuery BooleanQuery} have more efficient + implementations.
    Details on each of these classes, and their children, can be found in the subsections below.

    The Query Class

    In some sense, the - Query + {@link org.apache.lucene.search.Query Query} class is where it all begins. Without a Query, there would be nothing to score. Furthermore, the Query class is the catalyst for the other scoring classes as it is often responsible for creating them or coordinating the functionality between them. The - Query class has several methods that are important for + {@link org.apache.lucene.search.Query Query} class has several methods that are important for derived classes:

      -
    1. createWeight(Searcher searcher) — A - Weight is the internal representation of the +
    2. {@link org.apache.lucene.search.Query#createWeight(IndexSearcher) createWeight(IndexSearcher searcher)} — A + {@link org.apache.lucene.search.Weight Weight} is the internal representation of the Query, so each Query implementation must provide an implementation of Weight. See the subsection on The Weight Interface below for details on implementing the Weight + href="#weightClass">The Weight Interface below for details on implementing the Weight interface.
    3. -
    4. rewrite(IndexReader reader) — Rewrites queries into primitive queries. Primitive queries are: - TermQuery, - BooleanQuery, OTHERS????
    5. +
    6. {@link org.apache.lucene.search.Query#rewrite(IndexReader) rewrite(IndexReader reader)} — Rewrites queries into primitive queries. Primitive queries are: + {@link org.apache.lucene.search.TermQuery TermQuery}, + {@link org.apache.lucene.search.BooleanQuery BooleanQuery}, and other queries that implement {@link org.apache.lucene.search.Query#createWeight(IndexSearcher) createWeight(IndexSearcher searcher)}

    +

    The Weight Interface

    The - Weight + {@link org.apache.lucene.search.Weight Weight} interface provides an internal representation of the Query so that it can be reused. Any - Searcher + {@link org.apache.lucene.search.IndexSearcher IndexSearcher} dependent state should be stored in the Weight implementation, - not in the Query class. The interface defines six methods that must be implemented: + not in the Query class. The interface defines five methods that must be implemented:

    1. - Weight#getQuery() — Pointer to the + {@link org.apache.lucene.search.Weight#getQuery getQuery()} — Pointer to the Query that this Weight represents.
    2. - Weight#getValue() — The weight for - this Query. For example, the TermQuery.TermWeight value is - equal to the idf^2 * boost * queryNorm
    3. + {@link org.apache.lucene.search.Weight#getValueForNormalization() getValueForNormalization()} — + A weight can return a floating point value to indicate its magnitude for query normalization. Typically + a weight such as TermWeight that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} + will just defer to the Similarity's implementation: + {@link org.apache.lucene.search.similarities.Similarity.SimWeight#getValueForNormalization SimWeight#getValueForNormalization()}. + For example, with {@link org.apache.lucene.search.similarities.TFIDFSimilarity Lucene's classic vector-space formula}, this + is implemented as the sum of squared weights: (idf * boost)2
    4. - - Weight#sumOfSquaredWeights() — The sum of squared weights. For TermQuery, this is (idf * - boost)^2
    5. -
    6. - - Weight#normalize(float) — Determine the query normalization factor. The query normalization may + {@link org.apache.lucene.search.Weight#normalize(float,float) normalize(float norm, float topLevelBoost)} — + Performs query normalization: +
        +
      • topLevelBoost: A query-boost factor from any wrapping queries that should be multiplied into every + document's score. For example, a TermQuery that is wrapped within a BooleanQuery with a boost of 5 would + receive this value at this time. This allows the TermQuery (the leaf node in this case) to compute this up-front + a single time (e.g. by multiplying into the IDF), rather than for every document.
      • +
      • norm: Passes in a a normalization factor which may allow for comparing scores between queries.
      • +
      + Typically a weight such as TermWeight + that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will just defer to the Similarity's implementation: + {@link org.apache.lucene.search.similarities.Similarity.SimWeight#normalize SimWeight#normalize(float,float)}.
    7. - - Weight#scorer(IndexReader) — Construct a new - Scorer - for this Weight. See - The Scorer Class - below for help defining a Scorer. As the name implies, the - Scorer is responsible for doing the actual scoring of documents given the Query. + {@link org.apache.lucene.search.Weight#scorer(org.apache.lucene.index.AtomicReaderContext, org.apache.lucene.util.Bits) + scorer(AtomicReaderContext context, Bits acceptDocs)} — + Construct a new {@link org.apache.lucene.search.Scorer Scorer} for this Weight. See The Scorer Class + below for help defining a Scorer. As the name implies, the Scorer is responsible for doing the actual scoring of documents + given the Query.
    8. - - Weight#explain(IndexReader, int) — Provide a means for explaining why a given document was - scored - the way it was.
    9. + {@link org.apache.lucene.search.Weight#bulkScorer(org.apache.lucene.index.AtomicReaderContext, boolean, org.apache.lucene.util.Bits) + scorer(AtomicReaderContext context, boolean scoreDocsInOrder, Bits acceptDocs)} — + Construct a new {@link org.apache.lucene.search.BulkScorer BulkScorer} for this Weight. See The BulkScorer Class + below for help defining a BulkScorer. This is an optional method, and most queries do not implement it. + +
    10. + {@link org.apache.lucene.search.Weight#explain(org.apache.lucene.index.AtomicReaderContext, int) + explain(AtomicReaderContext context, int doc)} — Provide a means for explaining why a given document was + scored the way it was. + Typically a weight such as TermWeight + that scores via a {@link org.apache.lucene.search.similarities.Similarity Similarity} will make use of the Similarity's implementation: + {@link org.apache.lucene.search.similarities.Similarity.SimScorer#explain(int, Explanation) SimScorer#explain(int doc, Explanation freq)}. +
    11. +

    +

    The Scorer Class

    The - Scorer + {@link org.apache.lucene.search.Scorer Scorer} abstract class provides common scoring functionality for all Scorer implementations and - is the heart of the Lucene scoring process. The Scorer defines the following abstract methods which - must be implemented: + is the heart of the Lucene scoring process. The Scorer defines the following abstract (some of them are not + yet abstract, but will be in future versions and should be considered as such now) methods which + must be implemented (some of them inherited from {@link org.apache.lucene.search.DocIdSetIterator DocIdSetIterator}):

    1. - Scorer#next() — Advances to the next - document that matches this Query, returning true if and only - if there is another document that matches.
    2. + {@link org.apache.lucene.search.Scorer#nextDoc nextDoc()} — Advances to the next + document that matches this Query, returning true if and only if there is another document that matches.
    3. - Scorer#doc() — Returns the id of the - Document - that contains the match. It is not valid until next() has been called at least once. + {@link org.apache.lucene.search.Scorer#docID docID()} — Returns the id of the + {@link org.apache.lucene.document.Document Document} that contains the match.
    4. - Scorer#score() — Return the score of the - current document. This value can be determined in any - appropriate way for an application. For instance, the - TermScorer - returns the tf * Weight.getValue() * fieldNorm. + {@link org.apache.lucene.search.Scorer#score score()} — Return the score of the + current document. This value can be determined in any appropriate way for an application. For instance, the + {@link org.apache.lucene.search.TermScorer TermScorer} simply defers to the configured Similarity: + {@link org.apache.lucene.search.similarities.Similarity.SimScorer#score(int, float) SimScorer.score(int doc, float freq)}.
    5. - Scorer#skipTo(int) — Skip ahead in + {@link org.apache.lucene.search.Scorer#freq freq()} — Returns the number of matches + for the current document. This value can be determined in any appropriate way for an application. For instance, the + {@link org.apache.lucene.search.TermScorer TermScorer} simply defers to the term frequency from the inverted index: + {@link org.apache.lucene.index.DocsEnum#freq DocsEnum.freq()}. +
    6. +
    7. + {@link org.apache.lucene.search.Scorer#advance advance()} — Skip ahead in the document matches to the document whose id is greater than - or equal to the passed in value. In many instances, skipTo can be + or equal to the passed in value. In many instances, advance can be implemented more efficiently than simply looping through all the matching documents until - the target document is identified.
    8. + the target document is identified. +
    9. - Scorer#explain(int) — Provides - details on why the score came about.
    10. + {@link org.apache.lucene.search.Scorer#getChildren getChildren()} — Returns any child subscorers + underneath this scorer. This allows for users to navigate the scorer hierarchy and receive more fine-grained + details on the scoring process. +

    + +

    The BulkScorer Class

    +

    The + {@link org.apache.lucene.search.BulkScorer BulkScorer} scores a range of documents. There is only one + abstract method: +

      +
    1. + {@link org.apache.lucene.search.BulkScorer#score(org.apache.lucene.search.Collector,int) score(Collector,int)} — + Score all documents up to but not including the specified max document. +
    2. +
    +

    Why would I want to add my own Query?

    In a nutshell, you want to add your own custom Query implementation when you think that Lucene's aren't appropriate for the task that you want to do. You might be doing some cutting edge research or you need more information back out of Lucene (similar to Doug adding SpanQuery functionality).

    -

    Examples

    -

    FILL IN HERE

    + + + + +

    Appendix: Search Algorithm

    +

    This section is mostly notes on stepping through the Scoring process and serves as + fertilizer for the earlier sections.

    +

    In the typical search application, a {@link org.apache.lucene.search.Query Query} + is passed to the {@link org.apache.lucene.search.IndexSearcher IndexSearcher}, + beginning the scoring process.

    +

    Once inside the IndexSearcher, a {@link org.apache.lucene.search.Collector Collector} + is used for the scoring and sorting of the search results. + These important objects are involved in a search: +

      +
    1. The {@link org.apache.lucene.search.Weight Weight} object of the Query. The + Weight object is an internal representation of the Query that allows the Query + to be reused by the IndexSearcher.
    2. +
    3. The IndexSearcher that initiated the call.
    4. +
    5. A {@link org.apache.lucene.search.Filter Filter} for limiting the result set. + Note, the Filter may be null.
    6. +
    7. A {@link org.apache.lucene.search.Sort Sort} object for specifying how to sort + the results if the standard score-based sort method is not desired.
    8. +
    +

    +

    Assuming we are not sorting (since sorting doesn't affect the raw Lucene score), + we call one of the search methods of the IndexSearcher, passing in the + {@link org.apache.lucene.search.Weight Weight} object created by + {@link org.apache.lucene.search.IndexSearcher#createNormalizedWeight(org.apache.lucene.search.Query) + IndexSearcher.createNormalizedWeight(Query)}, + {@link org.apache.lucene.search.Filter Filter} and the number of results we want. + This method returns a {@link org.apache.lucene.search.TopDocs TopDocs} object, + which is an internal collection of search results. The IndexSearcher creates + a {@link org.apache.lucene.search.TopScoreDocCollector TopScoreDocCollector} and + passes it along with the Weight, Filter to another expert search method (for + more on the {@link org.apache.lucene.search.Collector Collector} mechanism, + see {@link org.apache.lucene.search.IndexSearcher IndexSearcher}). The TopScoreDocCollector + uses a {@link org.apache.lucene.util.PriorityQueue PriorityQueue} to collect the + top results for the search. +

    +

    If a Filter is being used, some initial setup is done to determine which docs to include. + Otherwise, we ask the Weight for a {@link org.apache.lucene.search.Scorer Scorer} for each + {@link org.apache.lucene.index.IndexReader IndexReader} segment and proceed by calling + {@link org.apache.lucene.search.BulkScorer#score(org.apache.lucene.search.Collector) BulkScorer.score(Collector)}. +

    +

    At last, we are actually going to score some documents. The score method takes in the Collector + (most likely the TopScoreDocCollector or TopFieldCollector) and does its business.Of course, here + is where things get involved. The {@link org.apache.lucene.search.Scorer Scorer} that is returned + by the {@link org.apache.lucene.search.Weight Weight} object depends on what type of Query was + submitted. In most real world applications with multiple query terms, the + {@link org.apache.lucene.search.Scorer Scorer} is going to be a BooleanScorer2 created + from {@link org.apache.lucene.search.BooleanQuery.BooleanWeight BooleanWeight} (see the section on + custom queries for info on changing this). +

    +

    Assuming a BooleanScorer2, we first initialize the Coordinator, which is used to apply the coord() + factor. We then get a internal Scorer based on the required, optional and prohibited parts of the query. + Using this internal Scorer, the BooleanScorer2 then proceeds into a while loop based on the + {@link org.apache.lucene.search.Scorer#nextDoc Scorer.nextDoc()} method. The nextDoc() method advances + to the next document matching the query. This is an abstract method in the Scorer class and is thus + overridden by all derived implementations. If you have a simple OR query your internal Scorer is most + likely a DisjunctionSumScorer, which essentially combines the scorers from the sub scorers of the OR'd terms.

    Index: 3rdParty_sources/lucene/org/apache/lucene/search/doc-files/nrq-formula-1.png =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/doc-files/Attic/nrq-formula-1.png,v diff -u -r1.1 -r1.1.2.1 Binary files differ Index: 3rdParty_sources/lucene/org/apache/lucene/search/doc-files/nrq-formula-2.png =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/doc-files/Attic/nrq-formula-2.png,v diff -u -r1.1 -r1.1.2.1 Binary files differ Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/ByteFieldSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/CustomScoreQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/DocValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/FieldCacheSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/FieldScoreQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/FloatFieldSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/IntFieldSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/OrdFieldSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/ReverseOrdFieldSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/ShortFieldSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/ValueSource.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/ValueSourceQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/function/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/AveragePayloadFunction.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/BoostingTermQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/MaxPayloadFunction.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/MinPayloadFunction.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadFunction.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadNearQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadSpanUtil.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadSpanUtil.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadSpanUtil.java 17 Aug 2012 14:55:14 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadSpanUtil.java 16 Dec 2014 11:31:57 -0000 1.1.2.1 @@ -1,7 +1,6 @@ package org.apache.lucene.search.payloads; -import org.apache.lucene.search.BooleanClause; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -21,61 +20,66 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.HashMap; import java.util.Iterator; import java.util.List; +import java.util.Map; +import java.util.TreeSet; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.search.BooleanClause; import org.apache.lucene.search.BooleanQuery; import org.apache.lucene.search.DisjunctionMaxQuery; import org.apache.lucene.search.FilteredQuery; import org.apache.lucene.search.MultiPhraseQuery; import org.apache.lucene.search.PhraseQuery; import org.apache.lucene.search.Query; import org.apache.lucene.search.TermQuery; -import org.apache.lucene.search.spans.PayloadSpans; import org.apache.lucene.search.spans.SpanNearQuery; import org.apache.lucene.search.spans.SpanOrQuery; import org.apache.lucene.search.spans.SpanQuery; import org.apache.lucene.search.spans.SpanTermQuery; +import org.apache.lucene.search.spans.Spans; /** * Experimental class to get set of payloads for most standard Lucene queries. * Operates like Highlighter - IndexReader should only contain doc of interest, * best to use MemoryIndex. * - *

    - * - * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. + * @lucene.experimental * */ public class PayloadSpanUtil { - private IndexReader reader; + private IndexReaderContext context; /** - * @param reader + * @param context * that contains doc with payloads to extract + * + * @see IndexReader#getContext() */ - public PayloadSpanUtil(IndexReader reader) { - this.reader = reader; + public PayloadSpanUtil(IndexReaderContext context) { + this.context = context; } /** * Query should be rewritten for wild/fuzzy support. * - * @param query + * @param query rewritten query * @return payloads Collection - * @throws IOException + * @throws IOException if there is a low-level I/O error */ - public Collection getPayloadsForQuery(Query query) throws IOException { - Collection payloads = new ArrayList(); + public Collection getPayloadsForQuery(Query query) throws IOException { + Collection payloads = new ArrayList<>(); queryToSpanQuery(query, payloads); return payloads; } - private void queryToSpanQuery(Query query, Collection payloads) + private void queryToSpanQuery(Query query, Collection payloads) throws IOException { if (query instanceof BooleanQuery) { BooleanClause[] queryClauses = ((BooleanQuery) query).getClauses(); @@ -113,14 +117,14 @@ queryToSpanQuery(((FilteredQuery) query).getQuery(), payloads); } else if (query instanceof DisjunctionMaxQuery) { - for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator + for (Iterator iterator = ((DisjunctionMaxQuery) query).iterator(); iterator .hasNext();) { - queryToSpanQuery((Query) iterator.next(), payloads); + queryToSpanQuery(iterator.next(), payloads); } } else if (query instanceof MultiPhraseQuery) { final MultiPhraseQuery mpq = (MultiPhraseQuery) query; - final List termArrays = mpq.getTermArrays(); + final List termArrays = mpq.getTermArrays(); final int[] positions = mpq.getPositions(); if (positions.length > 0) { @@ -131,29 +135,30 @@ } } - final List[] disjunctLists = new List[maxPosition + 1]; + @SuppressWarnings({"rawtypes","unchecked"}) final List[] disjunctLists = + new List[maxPosition + 1]; int distinctPositions = 0; for (int i = 0; i < termArrays.size(); ++i) { - final Term[] termArray = (Term[]) termArrays.get(i); - List disjuncts = disjunctLists[positions[i]]; + final Term[] termArray = termArrays.get(i); + List disjuncts = disjunctLists[positions[i]]; if (disjuncts == null) { - disjuncts = (disjunctLists[positions[i]] = new ArrayList( + disjuncts = (disjunctLists[positions[i]] = new ArrayList<>( termArray.length)); ++distinctPositions; } - for (int j = 0; j < termArray.length; ++j) { - disjuncts.add(new SpanTermQuery(termArray[j])); + for (final Term term : termArray) { + disjuncts.add(new SpanTermQuery(term)); } } int positionGaps = 0; int position = 0; final SpanQuery[] clauses = new SpanQuery[distinctPositions]; for (int i = 0; i < disjunctLists.length; ++i) { - List disjuncts = disjunctLists[i]; + List disjuncts = disjunctLists[i]; if (disjuncts != null) { - clauses[position++] = new SpanOrQuery((SpanQuery[]) disjuncts + clauses[position++] = new SpanOrQuery(disjuncts .toArray(new SpanQuery[disjuncts.size()])); } else { ++positionGaps; @@ -171,19 +176,23 @@ } } - private void getPayloads(Collection payloads, SpanQuery query) + private void getPayloads(Collection payloads, SpanQuery query) throws IOException { - PayloadSpans spans = query.getPayloadSpans(reader); - - while (spans.next() == true) { - if (spans.isPayloadAvailable()) { - Collection payload = spans.getPayload(); - Iterator it = payload.iterator(); - while (it.hasNext()) { - byte[] bytes = (byte[]) it.next(); - payloads.add(bytes); + Map termContexts = new HashMap<>(); + TreeSet terms = new TreeSet<>(); + query.extractTerms(terms); + for (Term term : terms) { + termContexts.put(term, TermContext.build(context, term)); + } + for (AtomicReaderContext atomicReaderContext : context.leaves()) { + final Spans spans = query.getSpans(atomicReaderContext, atomicReaderContext.reader().getLiveDocs(), termContexts); + while (spans.next() == true) { + if (spans.isPayloadAvailable()) { + Collection payload = spans.getPayload(); + for (byte [] bytes : payload) { + payloads.add(bytes); + } } - } } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/payloads/PayloadTermQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/payloads/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/payloads/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/payloads/package.html 17 Aug 2012 14:55:14 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/payloads/package.html 16 Dec 2014 11:31:57 -0000 1.1.2.1 @@ -1,36 +1,32 @@ - + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +--> + org.apache.lucene.search.payloads -

    The payloads package provides Query mechanisms for finding and using payloads. - +The payloads package provides Query mechanisms for finding and using payloads. +

    The following Query implementations are provided: -

    -
      -
    1. BoostingTermQuery -- Boost a term's score based on the value of the payload located at that term
    2. +
    3. {@link org.apache.lucene.search.payloads.PayloadTermQuery PayloadTermQuery} -- Boost a term's score based on the value of the payload located at that term.
    4. +
    5. {@link org.apache.lucene.search.payloads.PayloadNearQuery PayloadNearQuery} -- A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} that factors in the value of the payloads located + at each of the positions where the spans occur.
    -
    -
     
    -
    -
    +

    Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/AfterEffect.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/AfterEffectB.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/AfterEffectL.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BM25Similarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModel.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelBE.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelD.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelG.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelIF.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelIn.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelIne.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicModelP.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/BasicStats.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/DFRSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/DefaultSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/Distribution.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/DistributionLL.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/DistributionSPL.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/IBSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/LMDirichletSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/LMJelinekMercerSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/LMSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/Lambda.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/LambdaDF.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/LambdaTTF.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/MultiSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/Normalization.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/NormalizationH1.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/NormalizationH2.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/NormalizationH3.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/NormalizationZ.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/PerFieldSimilarityWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/Similarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/SimilarityBase.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/TFIDFSimilarity.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/similarities/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/FieldMaskingSpanQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansOrdered.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansOrdered.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansOrdered.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansOrdered.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,14 +17,22 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.Bits; +import org.apache.lucene.util.InPlaceMergeSorter; import java.io.IOException; -import java.util.Arrays; +import java.util.ArrayList; import java.util.Comparator; +import java.util.HashSet; import java.util.LinkedList; import java.util.List; import java.util.Collection; +import java.util.Map; +import java.util.Set; /** A Spans that is formed from the ordered subspans of a SpanNearQuery * where the subspans do not overlap and have a maximum slop between them. @@ -43,70 +51,107 @@ * matches twice: *
    t1 t2 .. t3      
    *
          t1 .. t2 t3
    + * + * + * Expert: + * Only public for subclassing. Most implementations should not need this class */ -class NearSpansOrdered implements PayloadSpans { +public class NearSpansOrdered extends Spans { private final int allowedSlop; private boolean firstTime = true; private boolean more = false; /** The spans in the same order as the SpanNearQuery */ - private final PayloadSpans[] subSpans; + private final Spans[] subSpans; /** Indicates that all subSpans have same doc() */ private boolean inSameDoc = false; private int matchDoc = -1; private int matchStart = -1; private int matchEnd = -1; - private List/**/ matchPayload; + private List matchPayload; - private final PayloadSpans[] subSpansByDoc; - private final Comparator spanDocComparator = new Comparator() { - public int compare(Object o1, Object o2) { - return ((Spans)o1).doc() - ((Spans)o2).doc(); + private final Spans[] subSpansByDoc; + // Even though the array is probably almost sorted, InPlaceMergeSorter will likely + // perform better since it has a lower overhead than TimSorter for small arrays + private final InPlaceMergeSorter sorter = new InPlaceMergeSorter() { + @Override + protected void swap(int i, int j) { + ArrayUtil.swap(subSpansByDoc, i, j); } + @Override + protected int compare(int i, int j) { + return subSpansByDoc[i].doc() - subSpansByDoc[j].doc(); + } }; - + private SpanNearQuery query; + private boolean collectPayloads = true; + + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + this(spanNearQuery, context, acceptDocs, termContexts, true); + } - public NearSpansOrdered(SpanNearQuery spanNearQuery, IndexReader reader) + public NearSpansOrdered(SpanNearQuery spanNearQuery, AtomicReaderContext context, Bits acceptDocs, Map termContexts, boolean collectPayloads) throws IOException { if (spanNearQuery.getClauses().length < 2) { throw new IllegalArgumentException("Less than 2 clauses: " + spanNearQuery); } + this.collectPayloads = collectPayloads; allowedSlop = spanNearQuery.getSlop(); SpanQuery[] clauses = spanNearQuery.getClauses(); - subSpans = new PayloadSpans[clauses.length]; - matchPayload = new LinkedList(); - subSpansByDoc = new PayloadSpans[clauses.length]; + subSpans = new Spans[clauses.length]; + matchPayload = new LinkedList<>(); + subSpansByDoc = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { - subSpans[i] = clauses[i].getPayloadSpans(reader); + subSpans[i] = clauses[i].getSpans(context, acceptDocs, termContexts); subSpansByDoc[i] = subSpans[i]; // used in toSameDoc() } query = spanNearQuery; // kept for toString() only. } // inherit javadocs + @Override public int doc() { return matchDoc; } // inherit javadocs + @Override public int start() { return matchStart; } // inherit javadocs + @Override public int end() { return matchEnd; } + + public Spans[] getSubSpans() { + return subSpans; + } // TODO: Remove warning after API has been finalized - public Collection/**/ getPayload() throws IOException { + // TODO: Would be nice to be able to lazy load payloads + @Override + public Collection getPayload() throws IOException { return matchPayload; } // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { + @Override + public boolean isPayloadAvailable() { return matchPayload.isEmpty() == false; } + @Override + public long cost() { + long minCost = Long.MAX_VALUE; + for (int i = 0; i < subSpans.length; i++) { + minCost = Math.min(minCost, subSpans[i].cost()); + } + return minCost; + } + // inherit javadocs + @Override public boolean next() throws IOException { if (firstTime) { firstTime = false; @@ -118,11 +163,14 @@ } more = true; } - matchPayload.clear(); + if(collectPayloads) { + matchPayload.clear(); + } return advanceAfterOrdered(); } // inherit javadocs + @Override public boolean skipTo(int target) throws IOException { if (firstTime) { firstTime = false; @@ -141,7 +189,9 @@ return false; } } - matchPayload.clear(); + if(collectPayloads) { + matchPayload.clear(); + } return advanceAfterOrdered(); } @@ -161,7 +211,7 @@ /** Advance the subSpans to the same document */ private boolean toSameDoc() throws IOException { - Arrays.sort(subSpansByDoc, spanDocComparator); + sorter.sort(0, subSpansByDoc.length); int firstIndex = 0; int maxDoc = subSpansByDoc[subSpansByDoc.length - 1].doc(); while (subSpansByDoc[firstIndex].doc() != maxDoc) { @@ -186,8 +236,6 @@ } /** Check whether two Spans in the same document are ordered. - * @param spans1 - * @param spans2 * @return true iff spans1 starts before spans2 * or the spans start at the same position, * and spans1 ends before spans2. @@ -234,17 +282,22 @@ private boolean shrinkToAfterShortestMatch() throws IOException { matchStart = subSpans[subSpans.length - 1].start(); matchEnd = subSpans[subSpans.length - 1].end(); + Set possibleMatchPayloads = new HashSet<>(); if (subSpans[subSpans.length - 1].isPayloadAvailable()) { - matchPayload.addAll(subSpans[subSpans.length - 1].getPayload()); + possibleMatchPayloads.addAll(subSpans[subSpans.length - 1].getPayload()); } + + Collection possiblePayload = null; + int matchSlop = 0; int lastStart = matchStart; int lastEnd = matchEnd; for (int i = subSpans.length - 2; i >= 0; i--) { - PayloadSpans prevSpans = subSpans[i]; - - if (subSpans[i].isPayloadAvailable()) { - matchPayload.addAll(0, subSpans[i].getPayload()); + Spans prevSpans = subSpans[i]; + if (collectPayloads && prevSpans.isPayloadAvailable()) { + Collection payload = prevSpans.getPayload(); + possiblePayload = new ArrayList<>(payload.size()); + possiblePayload.addAll(payload); } int prevStart = prevSpans.start(); @@ -265,9 +318,19 @@ } else { // prevSpans still before (lastStart, lastEnd) prevStart = ppStart; prevEnd = ppEnd; + if (collectPayloads && prevSpans.isPayloadAvailable()) { + Collection payload = prevSpans.getPayload(); + possiblePayload = new ArrayList<>(payload.size()); + possiblePayload.addAll(payload); + } } } } + + if (collectPayloads && possiblePayload != null) { + possibleMatchPayloads.addAll(possiblePayload); + } + assert prevStart <= matchStart; if (matchStart > prevEnd) { // Only non overlapping spans add to slop. matchSlop += (matchStart - prevEnd); @@ -280,9 +343,17 @@ lastStart = prevStart; lastEnd = prevEnd; } - return matchSlop <= allowedSlop; // ordered and allowed slop + + boolean match = matchSlop <= allowedSlop; + + if(collectPayloads && match && possibleMatchPayloads.size() > 0) { + matchPayload.addAll(possibleMatchPayloads); + } + + return match; // ordered and allowed slop } + @Override public String toString() { return getClass().getName() + "("+query.toString()+")@"+ (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END")); Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansUnordered.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansUnordered.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansUnordered.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/NearSpansUnordered.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,20 +17,31 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; import java.util.List; +import java.util.Map; import java.util.Set; import java.util.HashSet; -class NearSpansUnordered implements PayloadSpans { +/** + * Similar to {@link NearSpansOrdered}, but for the unordered case. + * + * Expert: + * Only public for subclassing. Most implementations should not need this class + */ +public class NearSpansUnordered extends Spans { private SpanNearQuery query; - private List ordered = new ArrayList(); // spans in query order + private List ordered = new ArrayList<>(); // spans in query order + private Spans[] subSpans; private int slop; // from query private SpansCell first; // linked list of spans @@ -44,14 +55,13 @@ private boolean more = true; // true iff not done private boolean firstTime = true; // true before first next() - private class CellQueue extends PriorityQueue { + private class CellQueue extends PriorityQueue { public CellQueue(int size) { - initialize(size); + super(size); } - protected final boolean lessThan(Object o1, Object o2) { - SpansCell spans1 = (SpansCell)o1; - SpansCell spans2 = (SpansCell)o2; + @Override + protected final boolean lessThan(SpansCell spans1, SpansCell spans2) { if (spans1.doc() == spans2.doc()) { return NearSpansOrdered.docSpansOrdered(spans1, spans2); } else { @@ -62,21 +72,23 @@ /** Wraps a Spans, and can be used to form a linked list. */ - private class SpansCell implements PayloadSpans { - private PayloadSpans spans; + private class SpansCell extends Spans { + private Spans spans; private SpansCell next; private int length = -1; private int index; - public SpansCell(PayloadSpans spans, int index) { + public SpansCell(Spans spans, int index) { this.spans = spans; this.index = index; } + @Override public boolean next() throws IOException { return adjust(spans.next()); } + @Override public boolean skipTo(int target) throws IOException { return adjust(spans.skipTo(target)); } @@ -98,45 +110,63 @@ return condition; } + @Override public int doc() { return spans.doc(); } + + @Override public int start() { return spans.start(); } + + @Override public int end() { return spans.end(); } // TODO: Remove warning after API has been finalized - public Collection/**/ getPayload() throws IOException { - return new ArrayList(spans.getPayload()); + @Override + public Collection getPayload() throws IOException { + return new ArrayList<>(spans.getPayload()); } // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { + @Override + public boolean isPayloadAvailable() throws IOException { return spans.isPayloadAvailable(); } + @Override + public long cost() { + return spans.cost(); + } + + @Override public String toString() { return spans.toString() + "#" + index; } } - public NearSpansUnordered(SpanNearQuery query, IndexReader reader) + public NearSpansUnordered(SpanNearQuery query, AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { this.query = query; this.slop = query.getSlop(); SpanQuery[] clauses = query.getClauses(); queue = new CellQueue(clauses.length); + subSpans = new Spans[clauses.length]; for (int i = 0; i < clauses.length; i++) { SpansCell cell = - new SpansCell(clauses[i].getPayloadSpans(reader), i); + new SpansCell(clauses[i].getSpans(context, acceptDocs, termContexts), i); ordered.add(cell); + subSpans[i] = cell.spans; } } - + public Spans[] getSubSpans() { + return subSpans; + } + @Override public boolean next() throws IOException { if (firstTime) { initList(true); listToQueue(); // initialize queue firstTime = false; } else if (more) { if (min().next()) { // trigger further scanning - queue.adjustTop(); // maintain queue + queue.updateTop(); // maintain queue } else { more = false; } @@ -174,12 +204,13 @@ more = min().next(); if (more) { - queue.adjustTop(); // maintain queue + queue.updateTop(); // maintain queue } } return false; // no more matches } + @Override public boolean skipTo(int target) throws IOException { if (firstTime) { // initialize initList(false); @@ -193,7 +224,7 @@ } else { // normal case while (more && min().doc() < target) { // skip as needed if (min().skipTo(target)) { - queue.adjustTop(); + queue.updateTop(); } else { more = false; } @@ -202,20 +233,24 @@ return more && (atMatch() || next()); } - private SpansCell min() { return (SpansCell)queue.top(); } + private SpansCell min() { return queue.top(); } + @Override public int doc() { return min().doc(); } + @Override public int start() { return min().start(); } + @Override public int end() { return max.end(); } // TODO: Remove warning after API has been finalized /** * WARNING: The List is not necessarily in order of the the positions - * @return - * @throws IOException + * @return Collection of byte[] payloads + * @throws IOException if there is a low-level I/O error */ - public Collection/**/ getPayload() throws IOException { - Set/* getPayload() throws IOException { + Set matchPayload = new HashSet<>(); for (SpansCell cell = first; cell != null; cell = cell.next) { if (cell.isPayloadAvailable()) { matchPayload.addAll(cell.getPayload()); @@ -225,7 +260,8 @@ } // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { + @Override + public boolean isPayloadAvailable() throws IOException { SpansCell pointer = min(); while (pointer != null) { if (pointer.isPayloadAvailable()) { @@ -236,15 +272,25 @@ return false; } + + @Override + public long cost() { + long minCost = Long.MAX_VALUE; + for (int i = 0; i < subSpans.length; i++) { + minCost = Math.min(minCost, subSpans[i].cost()); + } + return minCost; + } + @Override public String toString() { return getClass().getName() + "("+query.toString()+")@"+ (firstTime?"START":(more?(doc()+":"+start()+"-"+end()):"END")); } private void initList(boolean next) throws IOException { for (int i = 0; more && i < ordered.size(); i++) { - SpansCell cell = (SpansCell)ordered.get(i); + SpansCell cell = ordered.get(i); if (next) more = cell.next(); // move to first entry if (more) { @@ -253,8 +299,8 @@ } } - private void addToList(SpansCell cell) throws IOException { - if (last != null) { // add next to end of list + private void addToList(SpansCell cell) { + if (last != null) { // add next to end of list last.next = cell; } else first = cell; @@ -263,23 +309,23 @@ } private void firstToLast() { - last.next = first; // move first to end of list + last.next = first; // move first to end of list last = first; first = first.next; last.next = null; } - private void queueToList() throws IOException { + private void queueToList() { last = first = null; while (queue.top() != null) { - addToList((SpansCell)queue.pop()); + addToList(queue.pop()); } } private void listToQueue() { queue.clear(); // rebuild queue for (SpansCell cell = first; cell != null; cell = cell.next) { - queue.put(cell); // add to queue from list + queue.add(cell); // add to queue from list } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/PayloadSpans.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanFirstQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanFirstQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanFirstQuery.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanFirstQuery.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,44 +17,40 @@ * limitations under the License. */ -import java.io.IOException; - -import java.util.Collection; -import java.util.Set; -import java.util.ArrayList; - -import org.apache.lucene.index.IndexReader; -import org.apache.lucene.search.Query; import org.apache.lucene.util.ToStringUtils; -/** Matches spans near the beginning of a field. */ -public class SpanFirstQuery extends SpanQuery { - private SpanQuery match; - private int end; +import java.io.IOException; +/** Matches spans near the beginning of a field. + *

    + * This class is a simple extension of {@link SpanPositionRangeQuery} in that it assumes the + * start to be zero and only checks the end boundary. + * + * + * */ +public class SpanFirstQuery extends SpanPositionRangeQuery { + /** Construct a SpanFirstQuery matching spans in match whose end * position is less than or equal to end. */ public SpanFirstQuery(SpanQuery match, int end) { - this.match = match; - this.end = end; + super(match, 0, end); } - /** Return the SpanQuery whose matches are filtered. */ - public SpanQuery getMatch() { return match; } + @Override + protected AcceptStatus acceptPosition(Spans spans) throws IOException { + assert spans.start() != spans.end() : "start equals end: " + spans.start(); + if (spans.start() >= end) + return AcceptStatus.NO_AND_ADVANCE; + else if (spans.end() <= end) + return AcceptStatus.YES; + else + return AcceptStatus.NO; + } - /** Return the maximum end position permitted in a match. */ - public int getEnd() { return end; } - public String getField() { return match.getField(); } - - /** Returns a collection of all terms matched by this query. - * @deprecated use extractTerms instead - * @see #extractTerms(Set) - */ - public Collection getTerms() { return match.getTerms(); } - + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("spanFirst("); buffer.append(match.toString(field)); buffer.append(", "); @@ -63,76 +59,15 @@ buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } - - public void extractTerms(Set terms) { - match.extractTerms(terms); - } - public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException { - return (PayloadSpans) getSpans(reader); + @Override + public SpanFirstQuery clone() { + SpanFirstQuery spanFirstQuery = new SpanFirstQuery((SpanQuery) match.clone(), end); + spanFirstQuery.setBoost(getBoost()); + return spanFirstQuery; } - public Spans getSpans(final IndexReader reader) throws IOException { - return new PayloadSpans() { - private PayloadSpans spans = match.getPayloadSpans(reader); - - public boolean next() throws IOException { - while (spans.next()) { // scan to next match - if (end() <= end) - return true; - } - return false; - } - - public boolean skipTo(int target) throws IOException { - if (!spans.skipTo(target)) - return false; - - return spans.end() <= end || next(); - - } - - public int doc() { return spans.doc(); } - public int start() { return spans.start(); } - public int end() { return spans.end(); } - - // TODO: Remove warning after API has been finalized - public Collection/**/ getPayload() throws IOException { - ArrayList result = null; - if (spans.isPayloadAvailable()) { - result = new ArrayList(spans.getPayload()); - } - return result;//TODO: any way to avoid the new construction? - } - - // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { - return spans.isPayloadAvailable(); - } - - public String toString() { - return "spans(" + SpanFirstQuery.this.toString() + ")"; - } - - }; - } - - public Query rewrite(IndexReader reader) throws IOException { - SpanFirstQuery clone = null; - - SpanQuery rewritten = (SpanQuery) match.rewrite(reader); - if (rewritten != match) { - clone = (SpanFirstQuery) this.clone(); - clone.match = rewritten; - } - - if (clone != null) { - return clone; // some clauses rewrote - } else { - return this; // no clauses rewrote - } - } - + @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof SpanFirstQuery)) return false; @@ -143,6 +78,7 @@ && this.getBoost() == other.getBoost(); } + @Override public int hashCode() { int h = match.hashCode(); h ^= (h << 8) | (h >>> 25); // reversible Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanMultiTermQueryWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNearPayloadCheckQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNearQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNearQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNearQuery.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNearQuery.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,52 +19,66 @@ import java.io.IOException; -import java.util.Collection; + import java.util.List; import java.util.ArrayList; import java.util.Iterator; +import java.util.Map; import java.util.Set; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; /** Matches spans which are near one another. One can specify slop, the * maximum number of intervening unmatched positions, as well as whether * matches are required to be in-order. */ -public class SpanNearQuery extends SpanQuery { - private List clauses; - private int slop; - private boolean inOrder; +public class SpanNearQuery extends SpanQuery implements Cloneable { + protected List clauses; + protected int slop; + protected boolean inOrder; - private String field; + protected String field; + private boolean collectPayloads; /** Construct a SpanNearQuery. Matches spans matching a span from each * clause, with up to slop total unmatched positions between * them. * When inOrder is true, the spans from each clause - * must be * ordered as in clauses. */ + * must be * ordered as in clauses. + * @param clauses the clauses to find near each other + * @param slop The slop value + * @param inOrder true if order is important + * */ public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder) { + this(clauses, slop, inOrder, true); + } + + public SpanNearQuery(SpanQuery[] clauses, int slop, boolean inOrder, boolean collectPayloads) { // copy clauses array into an ArrayList - this.clauses = new ArrayList(clauses.length); + this.clauses = new ArrayList<>(clauses.length); for (int i = 0; i < clauses.length; i++) { SpanQuery clause = clauses[i]; - if (i == 0) { // check field + if (field == null) { // check field field = clause.getField(); - } else if (!clause.getField().equals(field)) { + } else if (clause.getField() != null && !clause.getField().equals(field)) { throw new IllegalArgumentException("Clauses must have same field."); } this.clauses.add(clause); } - + this.collectPayloads = collectPayloads; this.slop = slop; this.inOrder = inOrder; } /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { - return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]); + return clauses.toArray(new SpanQuery[clauses.size()]); } /** Return the maximum number of intervening unmatched positions permitted.*/ @@ -73,37 +87,24 @@ /** Return true if matches are required to be in-order.*/ public boolean isInOrder() { return inOrder; } + @Override public String getField() { return field; } - /** Returns a collection of all terms matched by this query. - * @deprecated use extractTerms instead - * @see #extractTerms(Set) - */ - public Collection getTerms() { - Collection terms = new ArrayList(); - Iterator i = clauses.iterator(); - while (i.hasNext()) { - SpanQuery clause = (SpanQuery)i.next(); - terms.addAll(clause.getTerms()); + @Override + public void extractTerms(Set terms) { + for (final SpanQuery clause : clauses) { + clause.extractTerms(terms); } - return terms; - } - - public void extractTerms(Set terms) { - Iterator i = clauses.iterator(); - while (i.hasNext()) { - SpanQuery clause = (SpanQuery)i.next(); - clause.extractTerms(terms); - } } + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("spanNear(["); - Iterator i = clauses.iterator(); + Iterator i = clauses.iterator(); while (i.hasNext()) { - SpanQuery clause = (SpanQuery)i.next(); + SpanQuery clause = i.next(); buffer.append(clause.toString(field)); if (i.hasNext()) { buffer.append(", "); @@ -118,30 +119,28 @@ return buffer.toString(); } - public Spans getSpans(final IndexReader reader) throws IOException { + @Override + public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { if (clauses.size() == 0) // optimize 0-clause case - return new SpanOrQuery(getClauses()).getPayloadSpans(reader); + return new SpanOrQuery(getClauses()).getSpans(context, acceptDocs, termContexts); if (clauses.size() == 1) // optimize 1-clause case - return ((SpanQuery)clauses.get(0)).getPayloadSpans(reader); + return clauses.get(0).getSpans(context, acceptDocs, termContexts); return inOrder - ? (PayloadSpans) new NearSpansOrdered(this, reader) - : (PayloadSpans) new NearSpansUnordered(this, reader); + ? (Spans) new NearSpansOrdered(this, context, acceptDocs, termContexts, collectPayloads) + : (Spans) new NearSpansUnordered(this, context, acceptDocs, termContexts); } - public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException { - return (PayloadSpans) getSpans(reader); - } - + @Override public Query rewrite(IndexReader reader) throws IOException { SpanNearQuery clone = null; for (int i = 0 ; i < clauses.size(); i++) { - SpanQuery c = (SpanQuery)clauses.get(i); + SpanQuery c = clauses.get(i); SpanQuery query = (SpanQuery) c.rewrite(reader); if (query != c) { // clause rewrote: must clone if (clone == null) - clone = (SpanNearQuery) this.clone(); + clone = this.clone(); clone.clauses.set(i,query); } } @@ -151,8 +150,22 @@ return this; // no clauses rewrote } } + + @Override + public SpanNearQuery clone() { + int sz = clauses.size(); + SpanQuery[] newClauses = new SpanQuery[sz]; + for (int i = 0; i < sz; i++) { + newClauses[i] = (SpanQuery) clauses.get(i).clone(); + } + SpanNearQuery spanNearQuery = new SpanNearQuery(newClauses, slop, inOrder); + spanNearQuery.setBoost(getBoost()); + return spanNearQuery; + } + /** Returns true iff o is equal to this. */ + @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof SpanNearQuery)) return false; @@ -166,6 +179,7 @@ return getBoost() == spanNearQuery.getBoost(); } + @Override public int hashCode() { int result; result = clauses.hashCode(); Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNotQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNotQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNotQuery.java 17 Aug 2012 14:55:07 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanNotQuery.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,27 +17,52 @@ * limitations under the License. */ +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Query; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; import java.util.ArrayList; import java.util.Collection; +import java.util.Map; import java.util.Set; -/** Removes matches which overlap with another SpanQuery. */ -public class SpanNotQuery extends SpanQuery { +/** Removes matches which overlap with another SpanQuery or + * within a x tokens before or y tokens after another SpanQuery. */ +public class SpanNotQuery extends SpanQuery implements Cloneable { private SpanQuery include; private SpanQuery exclude; + private final int pre; + private final int post; /** Construct a SpanNotQuery matching spans from include which * have no overlap with spans from exclude.*/ public SpanNotQuery(SpanQuery include, SpanQuery exclude) { + this(include, exclude, 0, 0); + } + + + /** Construct a SpanNotQuery matching spans from include which + * have no overlap with spans from exclude within + * dist tokens of include. */ + public SpanNotQuery(SpanQuery include, SpanQuery exclude, int dist) { + this(include, exclude, dist, dist); + } + + /** Construct a SpanNotQuery matching spans from include which + * have no overlap with spans from exclude within + * pre tokens before or post tokens of include. */ + public SpanNotQuery(SpanQuery include, SpanQuery exclude, int pre, int post) { this.include = include; this.exclude = exclude; + this.pre = (pre >=0) ? pre : 0; + this.post = (post >= 0) ? post : 0; - if (!include.getField().equals(exclude.getField())) + if (include.getField() != null && exclude.getField() != null && !include.getField().equals(exclude.getField())) throw new IllegalArgumentException("Clauses must have same field."); } @@ -47,36 +72,46 @@ /** Return the SpanQuery whose matches must not overlap those returned. */ public SpanQuery getExclude() { return exclude; } + @Override public String getField() { return include.getField(); } - /** Returns a collection of all terms matched by this query. - * @deprecated use extractTerms instead - * @see #extractTerms(Set) - */ - public Collection getTerms() { return include.getTerms(); } - - public void extractTerms(Set terms) { include.extractTerms(terms); } + @Override + public void extractTerms(Set terms) { include.extractTerms(terms); } + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("spanNot("); buffer.append(include.toString(field)); buffer.append(", "); buffer.append(exclude.toString(field)); + buffer.append(", "); + buffer.append(Integer.toString(pre)); + buffer.append(", "); + buffer.append(Integer.toString(post)); buffer.append(")"); buffer.append(ToStringUtils.boost(getBoost())); return buffer.toString(); } + @Override + public SpanNotQuery clone() { + SpanNotQuery spanNotQuery = new SpanNotQuery((SpanQuery)include.clone(), + (SpanQuery) exclude.clone(), pre, post); + spanNotQuery.setBoost(getBoost()); + return spanNotQuery; + } - public Spans getSpans(final IndexReader reader) throws IOException { - return new PayloadSpans() { - private PayloadSpans includeSpans = include.getPayloadSpans(reader); + @Override + public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { + return new Spans() { + private Spans includeSpans = include.getSpans(context, acceptDocs, termContexts); private boolean moreInclude = true; - private Spans excludeSpans = exclude.getSpans(reader); + private Spans excludeSpans = exclude.getSpans(context, acceptDocs, termContexts); private boolean moreExclude = excludeSpans.next(); + @Override public boolean next() throws IOException { if (moreInclude) // move to next include moreInclude = includeSpans.next(); @@ -88,20 +123,21 @@ while (moreExclude // while exclude is before && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()) { + && excludeSpans.end() <= includeSpans.start() - pre) { moreExclude = excludeSpans.next(); // increment exclude } if (!moreExclude // if no intersection || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end() <= excludeSpans.start()) + || includeSpans.end()+post <= excludeSpans.start()) break; // we found a match moreInclude = includeSpans.next(); // intersected: keep scanning } return moreInclude; } + @Override public boolean skipTo(int target) throws IOException { if (moreInclude) // skip include moreInclude = includeSpans.skipTo(target); @@ -115,58 +151,66 @@ while (moreExclude // while exclude is before && includeSpans.doc() == excludeSpans.doc() - && excludeSpans.end() <= includeSpans.start()) { + && excludeSpans.end() <= includeSpans.start()-pre) { moreExclude = excludeSpans.next(); // increment exclude } if (!moreExclude // if no intersection || includeSpans.doc() != excludeSpans.doc() - || includeSpans.end() <= excludeSpans.start()) + || includeSpans.end()+post <= excludeSpans.start()) return true; // we found a match return next(); // scan to next match } + @Override public int doc() { return includeSpans.doc(); } + @Override public int start() { return includeSpans.start(); } + @Override public int end() { return includeSpans.end(); } - // TODO: Remove warning after API has been finalizedb - public Collection/**/ getPayload() throws IOException { - ArrayList result = null; + // TODO: Remove warning after API has been finalized + @Override + public Collection getPayload() throws IOException { + ArrayList result = null; if (includeSpans.isPayloadAvailable()) { - result = new ArrayList(includeSpans.getPayload()); + result = new ArrayList<>(includeSpans.getPayload()); } return result; } // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { + @Override + public boolean isPayloadAvailable() throws IOException { return includeSpans.isPayloadAvailable(); } + @Override + public long cost() { + return includeSpans.cost(); + } + + @Override public String toString() { return "spans(" + SpanNotQuery.this.toString() + ")"; } }; } - public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException { - return (PayloadSpans) getSpans(reader); - } - + @Override public Query rewrite(IndexReader reader) throws IOException { SpanNotQuery clone = null; SpanQuery rewrittenInclude = (SpanQuery) include.rewrite(reader); if (rewrittenInclude != include) { - clone = (SpanNotQuery) this.clone(); + clone = this.clone(); clone.include = rewrittenInclude; } SpanQuery rewrittenExclude = (SpanQuery) exclude.rewrite(reader); if (rewrittenExclude != exclude) { - if (clone == null) clone = (SpanNotQuery) this.clone(); + if (clone == null) clone = this.clone(); clone.exclude = rewrittenExclude; } @@ -178,23 +222,30 @@ } /** Returns true iff o is equal to this. */ + @Override public boolean equals(Object o) { - if (this == o) return true; - if (!(o instanceof SpanNotQuery)) return false; + if (!super.equals(o)) + return false; SpanNotQuery other = (SpanNotQuery)o; return this.include.equals(other.include) && this.exclude.equals(other.exclude) - && this.getBoost() == other.getBoost(); + && this.pre == other.pre + && this.post == other.post; } + @Override public int hashCode() { - int h = include.hashCode(); - h = (h<<1) | (h >>> 31); // rotate left + int h = super.hashCode(); + h = Integer.rotateLeft(h, 1); + h ^= include.hashCode(); + h = Integer.rotateLeft(h, 1); h ^= exclude.hashCode(); - h = (h<<1) | (h >>> 31); // rotate left - h ^= Float.floatToRawIntBits(getBoost()); + h = Integer.rotateLeft(h, 1); + h ^= pre; + h = Integer.rotateLeft(h, 1); + h ^= post; return h; } -} +} \ No newline at end of file Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanOrQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanOrQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanOrQuery.java 17 Aug 2012 14:55:07 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanOrQuery.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -23,71 +23,80 @@ import java.util.Collection; import java.util.ArrayList; import java.util.Iterator; +import java.util.Map; import java.util.Set; +import org.apache.lucene.index.AtomicReaderContext; import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.PriorityQueue; import org.apache.lucene.util.ToStringUtils; import org.apache.lucene.search.Query; /** Matches the union of its clauses.*/ -public class SpanOrQuery extends SpanQuery { - private List clauses; +public class SpanOrQuery extends SpanQuery implements Cloneable { + private List clauses; private String field; /** Construct a SpanOrQuery merging the provided clauses. */ - public SpanOrQuery(SpanQuery[] clauses) { + public SpanOrQuery(SpanQuery... clauses) { // copy clauses array into an ArrayList - this.clauses = new ArrayList(clauses.length); + this.clauses = new ArrayList<>(clauses.length); for (int i = 0; i < clauses.length; i++) { - SpanQuery clause = clauses[i]; - if (i == 0) { // check field - field = clause.getField(); - } else if (!clause.getField().equals(field)) { - throw new IllegalArgumentException("Clauses must have same field."); - } - this.clauses.add(clause); + addClause(clauses[i]); } } + /** Adds a clause to this query */ + public final void addClause(SpanQuery clause) { + if (field == null) { + field = clause.getField(); + } else if (clause.getField() != null && !clause.getField().equals(field)) { + throw new IllegalArgumentException("Clauses must have same field."); + } + this.clauses.add(clause); + } + /** Return the clauses whose spans are matched. */ public SpanQuery[] getClauses() { - return (SpanQuery[])clauses.toArray(new SpanQuery[clauses.size()]); + return clauses.toArray(new SpanQuery[clauses.size()]); } + @Override public String getField() { return field; } - /** Returns a collection of all terms matched by this query. - * @deprecated use extractTerms instead - * @see #extractTerms(Set) - */ - public Collection getTerms() { - Collection terms = new ArrayList(); - Iterator i = clauses.iterator(); - while (i.hasNext()) { - SpanQuery clause = (SpanQuery)i.next(); - terms.addAll(clause.getTerms()); + @Override + public void extractTerms(Set terms) { + for(final SpanQuery clause: clauses) { + clause.extractTerms(terms); } - return terms; } - public void extractTerms(Set terms) { - Iterator i = clauses.iterator(); - while (i.hasNext()) { - SpanQuery clause = (SpanQuery)i.next(); - clause.extractTerms(terms); + @Override + public SpanOrQuery clone() { + int sz = clauses.size(); + SpanQuery[] newClauses = new SpanQuery[sz]; + + for (int i = 0; i < sz; i++) { + newClauses[i] = (SpanQuery) clauses.get(i).clone(); } + SpanOrQuery soq = new SpanOrQuery(newClauses); + soq.setBoost(getBoost()); + return soq; } + @Override public Query rewrite(IndexReader reader) throws IOException { SpanOrQuery clone = null; for (int i = 0 ; i < clauses.size(); i++) { - SpanQuery c = (SpanQuery)clauses.get(i); + SpanQuery c = clauses.get(i); SpanQuery query = (SpanQuery) c.rewrite(reader); if (query != c) { // clause rewrote: must clone if (clone == null) - clone = (SpanOrQuery) this.clone(); + clone = this.clone(); clone.clauses.set(i,query); } } @@ -98,12 +107,13 @@ } } + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); buffer.append("spanOr(["); - Iterator i = clauses.iterator(); + Iterator i = clauses.iterator(); while (i.hasNext()) { - SpanQuery clause = (SpanQuery)i.next(); + SpanQuery clause = i.next(); buffer.append(clause.toString(field)); if (i.hasNext()) { buffer.append(", "); @@ -114,18 +124,19 @@ return buffer.toString(); } + @Override public boolean equals(Object o) { if (this == o) return true; if (o == null || getClass() != o.getClass()) return false; final SpanOrQuery that = (SpanOrQuery) o; if (!clauses.equals(that.clauses)) return false; - if (!field.equals(that.field)) return false; return getBoost() == that.getBoost(); } + @Override public int hashCode() { int h = clauses.hashCode(); h ^= (h << 10) | (h >>> 23); @@ -134,14 +145,13 @@ } - private class SpanQueue extends PriorityQueue { + private class SpanQueue extends PriorityQueue { public SpanQueue(int size) { - initialize(size); + super(size); } - protected final boolean lessThan(Object o1, Object o2) { - Spans spans1 = (Spans)o1; - Spans spans2 = (Spans)o2; + @Override + protected final boolean lessThan(Spans spans1, Spans spans2) { if (spans1.doc() == spans2.doc()) { if (spans1.start() == spans2.start()) { return spans1.end() < spans2.end(); @@ -154,30 +164,30 @@ } } - public PayloadSpans getPayloadSpans(final IndexReader reader) throws IOException { - return (PayloadSpans)getSpans(reader); - } - - public Spans getSpans(final IndexReader reader) throws IOException { + @Override + public Spans getSpans(final AtomicReaderContext context, final Bits acceptDocs, final Map termContexts) throws IOException { if (clauses.size() == 1) // optimize 1-clause case - return ((SpanQuery)clauses.get(0)).getPayloadSpans(reader); + return (clauses.get(0)).getSpans(context, acceptDocs, termContexts); - return new PayloadSpans() { + return new Spans() { private SpanQueue queue = null; + private long cost; private boolean initSpanQueue(int target) throws IOException { queue = new SpanQueue(clauses.size()); - Iterator i = clauses.iterator(); + Iterator i = clauses.iterator(); while (i.hasNext()) { - PayloadSpans spans = ((SpanQuery)i.next()).getPayloadSpans(reader); + Spans spans = i.next().getSpans(context, acceptDocs, termContexts); + cost += spans.cost(); if ( ((target == -1) && spans.next()) || ((target != -1) && spans.skipTo(target))) { - queue.put(spans); + queue.add(spans); } } return queue.size() != 0; } + @Override public boolean next() throws IOException { if (queue == null) { return initSpanQueue(-1); @@ -188,58 +198,73 @@ } if (top().next()) { // move to next - queue.adjustTop(); + queue.updateTop(); return true; } queue.pop(); // exhausted a clause return queue.size() != 0; } - private PayloadSpans top() { return (PayloadSpans)queue.top(); } + private Spans top() { return queue.top(); } + @Override public boolean skipTo(int target) throws IOException { if (queue == null) { return initSpanQueue(target); } - + + boolean skipCalled = false; while (queue.size() != 0 && top().doc() < target) { if (top().skipTo(target)) { - queue.adjustTop(); + queue.updateTop(); } else { queue.pop(); } + skipCalled = true; } - - return queue.size() != 0; + + if (skipCalled) { + return queue.size() != 0; + } + return next(); } + @Override public int doc() { return top().doc(); } + @Override public int start() { return top().start(); } + @Override public int end() { return top().end(); } - // TODO: Remove warning after API has been finalized - public Collection/**/ getPayload() throws IOException { - ArrayList result = null; - PayloadSpans theTop = top(); + @Override + public Collection getPayload() throws IOException { + ArrayList result = null; + Spans theTop = top(); if (theTop != null && theTop.isPayloadAvailable()) { - result = new ArrayList(theTop.getPayload()); + result = new ArrayList<>(theTop.getPayload()); } return result; } - // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { - PayloadSpans top = top(); + @Override + public boolean isPayloadAvailable() throws IOException { + Spans top = top(); return top != null && top.isPayloadAvailable(); } + @Override public String toString() { return "spans("+SpanOrQuery.this+")@"+ ((queue == null)?"START" :(queue.size()>0?(doc()+":"+start()+"-"+end()):"END")); } + @Override + public long cost() { + return cost; + } + }; } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanPayloadCheckQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanPositionCheckQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanPositionRangeQuery.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanQuery.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanQuery.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,49 +17,33 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import java.io.IOException; +import java.util.Map; + +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.Query; -import org.apache.lucene.search.Searcher; +import org.apache.lucene.search.IndexSearcher; import org.apache.lucene.search.Weight; +import org.apache.lucene.util.Bits; -import java.io.IOException; -import java.util.Collection; -import java.util.Set; - /** Base class for span-based queries. */ public abstract class SpanQuery extends Query { /** Expert: Returns the matches for this query in an index. Used internally * to search for spans. */ - public abstract Spans getSpans(IndexReader reader) throws IOException; + public abstract Spans getSpans(AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException; - /** - * Returns the matches for this query in an index, including access to any {@link org.apache.lucene.index.Payload}s at those - * positions. Implementing classes that want access to the payloads will need to implement this. - * @param reader The {@link org.apache.lucene.index.IndexReader} to use to get spans/payloads - * @return null - * @throws IOException if there is an error accessing the payload - * - * - * WARNING: The status of the Payloads feature is experimental. - * The APIs introduced here might change in the future and will not be - * supported anymore in such a case. + /** + * Returns the name of the field matched by this query. + *

    + * Note that this may return null if the query matches no terms. */ - public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException{ - return null; - }; - - /** Returns the name of the field matched by this query.*/ public abstract String getField(); - /** Returns a collection of all terms matched by this query. - * @deprecated use extractTerms instead - * @see Query#extractTerms(Set) - */ - public abstract Collection getTerms(); - - protected Weight createWeight(Searcher searcher) throws IOException { + @Override + public Weight createWeight(IndexSearcher searcher) throws IOException { return new SpanWeight(this, searcher); } } - Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanScorer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanScorer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanScorer.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanScorer.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,91 +17,95 @@ * limitations under the License. */ -import org.apache.lucene.search.Explanation; -import org.apache.lucene.search.Scorer; -import org.apache.lucene.search.Similarity; -import org.apache.lucene.search.Weight; - import java.io.IOException; +import org.apache.lucene.search.Weight; +import org.apache.lucene.search.Scorer; +import org.apache.lucene.search.similarities.Similarity; + /** * Public for extension only. */ public class SpanScorer extends Scorer { protected Spans spans; - protected Weight weight; - protected byte[] norms; - protected float value; - protected boolean firstTime = true; protected boolean more = true; protected int doc; protected float freq; - - protected SpanScorer(Spans spans, Weight weight, Similarity similarity, byte[] norms) - throws IOException { - super(similarity); + protected int numMatches; + protected final Similarity.SimScorer docScorer; + + protected SpanScorer(Spans spans, Weight weight, Similarity.SimScorer docScorer) + throws IOException { + super(weight); + this.docScorer = docScorer; this.spans = spans; - this.norms = norms; - this.weight = weight; - this.value = weight.getValue(); + doc = -1; + more = spans.next(); } - public boolean next() throws IOException { - if (firstTime) { - more = spans.next(); - firstTime = false; + @Override + public int nextDoc() throws IOException { + if (!setFreqCurrentDoc()) { + doc = NO_MORE_DOCS; } - return setFreqCurrentDoc(); + return doc; } - public boolean skipTo(int target) throws IOException { - if (firstTime) { - more = spans.skipTo(target); - firstTime = false; + @Override + public int advance(int target) throws IOException { + if (!more) { + return doc = NO_MORE_DOCS; } - if (! more) { - return false; - } if (spans.doc() < target) { // setFreqCurrentDoc() leaves spans.doc() ahead more = spans.skipTo(target); } - return setFreqCurrentDoc(); + if (!setFreqCurrentDoc()) { + doc = NO_MORE_DOCS; + } + return doc; } - + protected boolean setFreqCurrentDoc() throws IOException { - if (! more) { + if (!more) { return false; } doc = spans.doc(); freq = 0.0f; + numMatches = 0; do { int matchLength = spans.end() - spans.start(); - freq += getSimilarity().sloppyFreq(matchLength); + freq += docScorer.computeSlopFactor(matchLength); + numMatches++; more = spans.next(); } while (more && (doc == spans.doc())); return true; } - public int doc() { return doc; } + @Override + public int docID() { return doc; } + @Override public float score() throws IOException { - float raw = getSimilarity().tf(freq) * value; // raw score - return raw * Similarity.decodeNorm(norms[doc]); // normalize + return docScorer.score(doc, freq); } - - public Explanation explain(final int doc) throws IOException { - Explanation tfExplanation = new Explanation(); - - skipTo(doc); - - float phraseFreq = (doc() == doc) ? freq : 0.0f; - tfExplanation.setValue(getSimilarity().tf(phraseFreq)); - tfExplanation.setDescription("tf(phraseFreq=" + phraseFreq + ")"); - - return tfExplanation; + + @Override + public int freq() throws IOException { + return numMatches; } - + + /** Returns the intermediate "sloppy freq" adjusted for edit distance + * @lucene.internal */ + // only public so .payloads can see it. + public float sloppyFreq() throws IOException { + return freq; + } + + @Override + public long cost() { + return spans.cost(); + } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanTermQuery.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanTermQuery.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanTermQuery.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanTermQuery.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,13 +17,19 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.Fields; import org.apache.lucene.index.Term; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.index.TermContext; +import org.apache.lucene.index.TermState; +import org.apache.lucene.index.Terms; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.util.Bits; import org.apache.lucene.util.ToStringUtils; import java.io.IOException; -import java.util.ArrayList; -import java.util.Collection; +import java.util.Map; import java.util.Set; /** Matches spans containing a term. */ @@ -36,23 +42,17 @@ /** Return the term whose spans are matched. */ public Term getTerm() { return term; } + @Override public String getField() { return term.field(); } - /** Returns a collection of all terms matched by this query. - * @deprecated use extractTerms instead - * @see #extractTerms(Set) - */ - public Collection getTerms() { - Collection terms = new ArrayList(); + @Override + public void extractTerms(Set terms) { terms.add(term); - return terms; } - public void extractTerms(Set terms) { - terms.add(term); - } + @Override public String toString(String field) { - StringBuffer buffer = new StringBuffer(); + StringBuilder buffer = new StringBuilder(); if (term.field().equals(field)) buffer.append(term.text()); else @@ -61,27 +61,72 @@ return buffer.toString(); } - /** Returns true iff o is equal to this. */ - public boolean equals(Object o) { - if (!(o instanceof SpanTermQuery)) - return false; - SpanTermQuery other = (SpanTermQuery)o; - return (this.getBoost() == other.getBoost()) - && this.term.equals(other.term); - } - - /** Returns a hash code value for this object.*/ + @Override public int hashCode() { - return Float.floatToIntBits(getBoost()) ^ term.hashCode() ^ 0xD23FE494; + final int prime = 31; + int result = super.hashCode(); + result = prime * result + ((term == null) ? 0 : term.hashCode()); + return result; } - public Spans getSpans(final IndexReader reader) throws IOException { - return new TermSpans(reader.termPositions(term), term); + @Override + public boolean equals(Object obj) { + if (this == obj) + return true; + if (!super.equals(obj)) + return false; + if (getClass() != obj.getClass()) + return false; + SpanTermQuery other = (SpanTermQuery) obj; + if (term == null) { + if (other.term != null) + return false; + } else if (!term.equals(other.term)) + return false; + return true; } + @Override + public Spans getSpans(final AtomicReaderContext context, Bits acceptDocs, Map termContexts) throws IOException { + TermContext termContext = termContexts.get(term); + final TermState state; + if (termContext == null) { + // this happens with span-not query, as it doesn't include the NOT side in extractTerms() + // so we seek to the term now in this segment..., this sucks because its ugly mostly! + final Fields fields = context.reader().fields(); + if (fields != null) { + final Terms terms = fields.terms(term.field()); + if (terms != null) { + final TermsEnum termsEnum = terms.iterator(null); + if (termsEnum.seekExact(term.bytes())) { + state = termsEnum.termState(); + } else { + state = null; + } + } else { + state = null; + } + } else { + state = null; + } + } else { + state = termContext.get(context.ord); + } + + if (state == null) { // term is not present in that reader + return TermSpans.EMPTY_TERM_SPANS; + } + + final TermsEnum termsEnum = context.reader().terms(term.field()).iterator(null); + termsEnum.seekExact(term.bytes(), state); + + final DocsAndPositionsEnum postings = termsEnum.docsAndPositions(acceptDocs, null, DocsAndPositionsEnum.FLAG_PAYLOADS); - public PayloadSpans getPayloadSpans(IndexReader reader) throws IOException { - return (PayloadSpans) getSpans(reader); + if (postings != null) { + return new TermSpans(postings, term); + } else { + // term does exist, but has no positions + throw new IllegalStateException("field \"" + term.field() + "\" was indexed without position data; cannot run SpanTermQuery (term=" + term.text() + ")"); + } } - } Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanWeight.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanWeight.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanWeight.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/SpanWeight.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,130 +17,96 @@ * limitations under the License. */ -import org.apache.lucene.index.IndexReader; +import org.apache.lucene.index.AtomicReaderContext; +import org.apache.lucene.index.IndexReaderContext; import org.apache.lucene.index.Term; +import org.apache.lucene.index.TermContext; import org.apache.lucene.search.*; +import org.apache.lucene.search.similarities.Similarity; +import org.apache.lucene.search.similarities.Similarity.SimScorer; +import org.apache.lucene.util.Bits; import java.io.IOException; -import java.util.HashSet; -import java.util.Iterator; -import java.util.Set; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeSet; /** * Expert-only. Public for use by other weight implementations */ -public class SpanWeight implements Weight { +public class SpanWeight extends Weight { protected Similarity similarity; - protected float value; - protected float idf; - protected float queryNorm; - protected float queryWeight; - - protected Set terms; + protected Map termContexts; protected SpanQuery query; + protected Similarity.SimWeight stats; - public SpanWeight(SpanQuery query, Searcher searcher) + public SpanWeight(SpanQuery query, IndexSearcher searcher) throws IOException { - this.similarity = query.getSimilarity(searcher); + this.similarity = searcher.getSimilarity(); this.query = query; - terms=new HashSet(); + + termContexts = new HashMap<>(); + TreeSet terms = new TreeSet<>(); query.extractTerms(terms); - - idf = this.query.getSimilarity(searcher).idf(terms, searcher); + final IndexReaderContext context = searcher.getTopReaderContext(); + final TermStatistics termStats[] = new TermStatistics[terms.size()]; + int i = 0; + for (Term term : terms) { + TermContext state = TermContext.build(context, term); + termStats[i] = searcher.termStatistics(term, state); + termContexts.put(term, state); + i++; + } + final String field = query.getField(); + if (field != null) { + stats = similarity.computeWeight(query.getBoost(), + searcher.collectionStatistics(query.getField()), + termStats); + } } + @Override public Query getQuery() { return query; } - public float getValue() { return value; } - public float sumOfSquaredWeights() throws IOException { - queryWeight = idf * query.getBoost(); // compute query weight - return queryWeight * queryWeight; // square it + @Override + public float getValueForNormalization() throws IOException { + return stats == null ? 1.0f : stats.getValueForNormalization(); } - public void normalize(float queryNorm) { - this.queryNorm = queryNorm; - queryWeight *= queryNorm; // normalize query weight - value = queryWeight * idf; // idf for document + @Override + public void normalize(float queryNorm, float topLevelBoost) { + if (stats != null) { + stats.normalize(queryNorm, topLevelBoost); + } } - public Scorer scorer(IndexReader reader) throws IOException { - return new SpanScorer(query.getSpans(reader), this, - similarity, - reader.norms(query.getField())); + @Override + public Scorer scorer(AtomicReaderContext context, Bits acceptDocs) throws IOException { + if (stats == null) { + return null; + } else { + return new SpanScorer(query.getSpans(context, acceptDocs, termContexts), this, similarity.simScorer(stats, context)); + } } - public Explanation explain(IndexReader reader, int doc) - throws IOException { - - ComplexExplanation result = new ComplexExplanation(); - result.setDescription("weight("+getQuery()+" in "+doc+"), product of:"); - String field = ((SpanQuery)getQuery()).getField(); - - StringBuffer docFreqs = new StringBuffer(); - Iterator i = terms.iterator(); - while (i.hasNext()) { - Term term = (Term)i.next(); - docFreqs.append(term.text()); - docFreqs.append("="); - docFreqs.append(reader.docFreq(term)); - - if (i.hasNext()) { - docFreqs.append(" "); + @Override + public Explanation explain(AtomicReaderContext context, int doc) throws IOException { + SpanScorer scorer = (SpanScorer) scorer(context, context.reader().getLiveDocs()); + if (scorer != null) { + int newDoc = scorer.advance(doc); + if (newDoc == doc) { + float freq = scorer.sloppyFreq(); + SimScorer docScorer = similarity.simScorer(stats, context); + ComplexExplanation result = new ComplexExplanation(); + result.setDescription("weight("+getQuery()+" in "+doc+") [" + similarity.getClass().getSimpleName() + "], result of:"); + Explanation scoreExplanation = docScorer.explain(doc, new Explanation(freq, "phraseFreq=" + freq)); + result.addDetail(scoreExplanation); + result.setValue(scoreExplanation.getValue()); + result.setMatch(true); + return result; } } - - Explanation idfExpl = - new Explanation(idf, "idf(" + field + ": " + docFreqs + ")"); - - // explain query weight - Explanation queryExpl = new Explanation(); - queryExpl.setDescription("queryWeight(" + getQuery() + "), product of:"); - - Explanation boostExpl = new Explanation(getQuery().getBoost(), "boost"); - if (getQuery().getBoost() != 1.0f) - queryExpl.addDetail(boostExpl); - queryExpl.addDetail(idfExpl); - - Explanation queryNormExpl = new Explanation(queryNorm,"queryNorm"); - queryExpl.addDetail(queryNormExpl); - - queryExpl.setValue(boostExpl.getValue() * - idfExpl.getValue() * - queryNormExpl.getValue()); - - result.addDetail(queryExpl); - - // explain field weight - ComplexExplanation fieldExpl = new ComplexExplanation(); - fieldExpl.setDescription("fieldWeight("+field+":"+query.toString(field)+ - " in "+doc+"), product of:"); - - Explanation tfExpl = scorer(reader).explain(doc); - fieldExpl.addDetail(tfExpl); - fieldExpl.addDetail(idfExpl); - - Explanation fieldNormExpl = new Explanation(); - byte[] fieldNorms = reader.norms(field); - float fieldNorm = - fieldNorms!=null ? Similarity.decodeNorm(fieldNorms[doc]) : 0.0f; - fieldNormExpl.setValue(fieldNorm); - fieldNormExpl.setDescription("fieldNorm(field="+field+", doc="+doc+")"); - fieldExpl.addDetail(fieldNormExpl); - - fieldExpl.setMatch(Boolean.valueOf(tfExpl.isMatch())); - fieldExpl.setValue(tfExpl.getValue() * - idfExpl.getValue() * - fieldNormExpl.getValue()); - - result.addDetail(fieldExpl); - result.setMatch(fieldExpl.getMatch()); - - // combine them - result.setValue(queryExpl.getValue() * fieldExpl.getValue()); - - if (queryExpl.getValue() == 1.0f) - return fieldExpl; - - return result; + + return new ComplexExplanation(false, 0.0f, "no matching term"); } } Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/Spans.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/Spans.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/Spans.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/Spans.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.search.spans; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,37 +18,82 @@ */ import java.io.IOException; +import java.util.Collection; /** Expert: an enumeration of span matches. Used to implement span searching. * Each span represents a range of term positions within a document. Matches * are enumerated in order, by increasing document number, within that by * increasing start position and finally by increasing end position. */ -public interface Spans { +public abstract class Spans { /** Move to the next match, returning true iff any such exists. */ - boolean next() throws IOException; + public abstract boolean next() throws IOException; /** Skips to the first match beyond the current, whose document number is - * greater than or equal to target.

    Returns true iff there is such - * a match.

    Behaves as if written:

    +   * greater than or equal to target.
    +   * 

    The behavior of this method is undefined when called with + * target ≤ current, or after the iterator has exhausted. + * Both cases may result in unpredicted behavior. + *

    Returns true iff there is such + * a match.

    Behaves as if written:

        *   boolean skipTo(int target) {
        *     do {
        *       if (!next())
    -   * 	     return false;
    +   *         return false;
        *     } while (target > doc());
        *     return true;
        *   }
        * 
    * Most implementations are considerably more efficient than that. */ - boolean skipTo(int target) throws IOException; + public abstract boolean skipTo(int target) throws IOException; /** Returns the document number of the current match. Initially invalid. */ - int doc(); + public abstract int doc(); /** Returns the start position of the current match. Initially invalid. */ - int start(); + public abstract int start(); /** Returns the end position of the current match. Initially invalid. */ - int end(); + public abstract int end(); + + /** + * Returns the payload data for the current span. + * This is invalid until {@link #next()} is called for + * the first time. + * This method must not be called more than once after each call + * of {@link #next()}. However, most payloads are loaded lazily, + * so if the payload data for the current position is not needed, + * this method may not be called at all for performance reasons. An ordered + * SpanQuery does not lazy load, so if you have payloads in your index and + * you do not want ordered SpanNearQuerys to collect payloads, you can + * disable collection with a constructor option.
    + *
    + * Note that the return type is a collection, thus the ordering should not be relied upon. + *
    + * @lucene.experimental + * + * @return a List of byte arrays containing the data of this payload, otherwise null if isPayloadAvailable is false + * @throws IOException if there is a low-level I/O error + */ + // TODO: Remove warning after API has been finalized + public abstract Collection getPayload() throws IOException; + /** + * Checks if a payload can be loaded at this position. + *

    + * Payloads can only be loaded once per call to + * {@link #next()}. + * + * @return true if there is a payload available at this position that can be loaded + */ + public abstract boolean isPayloadAvailable() throws IOException; + + /** + * Returns the estimated cost of this spans. + *

    + * This is generally an upper bound of the number of documents this iterator + * might match, but may be a rough heuristic, hardcoded value, or otherwise + * completely inaccurate. + */ + public abstract long cost(); } Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/TermSpans.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/TermSpans.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/TermSpans.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/TermSpans.java 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -17,7 +17,9 @@ import org.apache.lucene.index.Term; -import org.apache.lucene.index.TermPositions; +import org.apache.lucene.index.DocsAndPositionsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.util.BytesRef; import java.io.IOException; import java.util.Collections; @@ -27,89 +29,155 @@ * Expert: * Public for extension only */ -public class TermSpans implements PayloadSpans { - protected TermPositions positions; - protected Term term; +public class TermSpans extends Spans { + protected final DocsAndPositionsEnum postings; + protected final Term term; protected int doc; protected int freq; protected int count; protected int position; + protected boolean readPayload; - - public TermSpans(TermPositions positions, Term term) throws IOException { - - this.positions = positions; + public TermSpans(DocsAndPositionsEnum postings, Term term) { + this.postings = postings; this.term = term; doc = -1; } + // only for EmptyTermSpans (below) + TermSpans() { + term = null; + postings = null; + } + + @Override public boolean next() throws IOException { if (count == freq) { - if (!positions.next()) { - doc = Integer.MAX_VALUE; + if (postings == null) { return false; } - doc = positions.doc(); - freq = positions.freq(); + doc = postings.nextDoc(); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { + return false; + } + freq = postings.freq(); count = 0; } - position = positions.nextPosition(); + position = postings.nextPosition(); count++; + readPayload = false; return true; } + @Override public boolean skipTo(int target) throws IOException { - // are we already at the correct position? - if (doc >= target) { - return true; - } - - if (!positions.skipTo(target)) { - doc = Integer.MAX_VALUE; + assert target > doc; + doc = postings.advance(target); + if (doc == DocIdSetIterator.NO_MORE_DOCS) { return false; } - doc = positions.doc(); - freq = positions.freq(); + freq = postings.freq(); count = 0; - - position = positions.nextPosition(); + position = postings.nextPosition(); count++; - + readPayload = false; return true; } + @Override public int doc() { return doc; } + @Override public int start() { return position; } + @Override public int end() { return position + 1; } + @Override + public long cost() { + return postings.cost(); + } + // TODO: Remove warning after API has been finalized - public Collection/**/ getPayload() throws IOException { - byte [] bytes = new byte[positions.getPayloadLength()]; - bytes = positions.getPayload(bytes, 0); + @Override + public Collection getPayload() throws IOException { + final BytesRef payload = postings.getPayload(); + readPayload = true; + final byte[] bytes; + if (payload != null) { + bytes = new byte[payload.length]; + System.arraycopy(payload.bytes, payload.offset, bytes, 0, payload.length); + } else { + bytes = null; + } return Collections.singletonList(bytes); } // TODO: Remove warning after API has been finalized - public boolean isPayloadAvailable() { - return positions.isPayloadAvailable(); + @Override + public boolean isPayloadAvailable() throws IOException { + return readPayload == false && postings.getPayload() != null; } + @Override public String toString() { return "spans(" + term.toString() + ")@" + (doc == -1 ? "START" : (doc == Integer.MAX_VALUE) ? "END" : doc + "-" + position); } + public DocsAndPositionsEnum getPostings() { + return postings; + } - public TermPositions getPositions() { - return positions; + private static final class EmptyTermSpans extends TermSpans { + + @Override + public boolean next() { + return false; + } + + @Override + public boolean skipTo(int target) { + return false; + } + + @Override + public int doc() { + return DocIdSetIterator.NO_MORE_DOCS; + } + + @Override + public int start() { + return -1; + } + + @Override + public int end() { + return -1; + } + + @Override + public Collection getPayload() { + return null; + } + + @Override + public boolean isPayloadAvailable() { + return false; + } + + @Override + public long cost() { + return 0; + } } + + public static final TermSpans EMPTY_TERM_SPANS = new EmptyTermSpans(); } Index: 3rdParty_sources/lucene/org/apache/lucene/search/spans/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/search/spans/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/search/spans/package.html 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/search/spans/package.html 16 Dec 2014 11:31:58 -0000 1.1.2.1 @@ -26,29 +26,30 @@

      -
    • A SpanTermQuery matches all spans -containing a particular Term.
    • +
    • A {@link org.apache.lucene.search.spans.SpanTermQuery SpanTermQuery} matches all spans +containing a particular {@link org.apache.lucene.index.Term Term}.
    • -
    • A SpanNearQuery matches spans +
    • A {@link org.apache.lucene.search.spans.SpanNearQuery SpanNearQuery} matches spans which occur near one another, and can be used to implement things like -phrase search (when constructed from SpanTermQueries) and inter-phrase -proximity (when constructed from other SpanNearQueries).
    • +phrase search (when constructed from {@link org.apache.lucene.search.spans.SpanTermQuery}s) +and inter-phrase proximity (when constructed from other {@link org.apache.lucene.search.spans.SpanNearQuery}s). -
    • A SpanOrQuery merges spans from a -number of other SpanQueries.
    • +
    • A {@link org.apache.lucene.search.spans.SpanOrQuery SpanOrQuery} merges spans from a +number of other {@link org.apache.lucene.search.spans.SpanQuery}s.
    • -
    • A SpanNotQuery removes spans -matching one SpanQuery which overlap -another. This can be used, e.g., to implement within-paragraph +
    • A {@link org.apache.lucene.search.spans.SpanNotQuery SpanNotQuery} removes spans +matching one {@link org.apache.lucene.search.spans.SpanQuery SpanQuery} which overlap (or comes +near) another. This can be used, e.g., to implement within-paragraph search.
    • -
    • A SpanFirstQuery matches spans +
    • A {@link org.apache.lucene.search.spans.SpanFirstQuery SpanFirstQuery} matches spans matching q whose end position is less than n. This can be used to constrain matches to the first part of the document.
    • +
    • A {@link org.apache.lucene.search.spans.SpanPositionRangeQuery SpanPositionRangeQuery} is +a more general form of SpanFirstQuery that can constrain matches to arbitrary portions of the document.
    • +
    In all cases, output spans are minimally inclusive. In other words, a @@ -59,7 +60,7 @@

    For example, a span query which matches "John Kerry" within ten words of "George Bush" within the first 100 words of the document could be constructed with: -

    +
     SpanQuery john   = new SpanTermQuery(new Term("content", "john"));
     SpanQuery kerry  = new SpanTermQuery(new Term("content", "kerry"));
     SpanQuery george = new SpanTermQuery(new Term("content", "george"));
    @@ -82,7 +83,7 @@
     So, for example, the above query can be restricted to documents which
     also use the word "iraq" with:
     
    -
    +
     Query query = new BooleanQuery();
     query.add(johnKerryNearGeorgeBushAtStart, true, false);
     query.add(new TermQuery("content", "iraq"), true, false);
    Index: 3rdParty_sources/lucene/org/apache/lucene/store/AlreadyClosedException.java
    ===================================================================
    RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/AlreadyClosedException.java,v
    diff -u -r1.1 -r1.1.2.1
    --- 3rdParty_sources/lucene/org/apache/lucene/store/AlreadyClosedException.java	17 Aug 2012 14:55:06 -0000	1.1
    +++ 3rdParty_sources/lucene/org/apache/lucene/store/AlreadyClosedException.java	16 Dec 2014 11:31:34 -0000	1.1.2.1
    @@ -1,6 +1,6 @@
     package org.apache.lucene.store;
     
    -/**
    +/*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
    @@ -25,4 +25,8 @@
       public AlreadyClosedException(String message) {
         super(message);
       }
    +  
    +  public AlreadyClosedException(String message, Throwable cause) {
    +    super(message, cause);
    +  }
     }
    Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/BaseDirectory.java'.
    Fisheye: No comparison available.  Pass `N' to diff?
    Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/BufferedChecksum.java'.
    Fisheye: No comparison available.  Pass `N' to diff?
    Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/BufferedChecksumIndexInput.java'.
    Fisheye: No comparison available.  Pass `N' to diff?
    Index: 3rdParty_sources/lucene/org/apache/lucene/store/BufferedIndexInput.java
    ===================================================================
    RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/BufferedIndexInput.java,v
    diff -u -r1.1 -r1.1.2.1
    --- 3rdParty_sources/lucene/org/apache/lucene/store/BufferedIndexInput.java	17 Aug 2012 14:55:05 -0000	1.1
    +++ 3rdParty_sources/lucene/org/apache/lucene/store/BufferedIndexInput.java	16 Dec 2014 11:31:33 -0000	1.1.2.1
    @@ -1,6 +1,6 @@
     package org.apache.lucene.store;
     
    -/**
    +/*
      * Licensed to the Apache Software Foundation (ASF) under one or more
      * contributor license agreements.  See the NOTICE file distributed with
      * this work for additional information regarding copyright ownership.
    @@ -17,38 +17,61 @@
      * limitations under the License.
      */
     
    +import java.io.EOFException;
     import java.io.IOException;
     
     /** Base implementation class for buffered {@link IndexInput}. */
    -public abstract class BufferedIndexInput extends IndexInput {
    +public abstract class BufferedIndexInput extends IndexInput implements RandomAccessInput {
     
    -  /** Default buffer size */
    +  /** Default buffer size set to {@value #BUFFER_SIZE}. */
       public static final int BUFFER_SIZE = 1024;
    +  
    +  /** Minimum buffer size allowed */
    +  public static final int MIN_BUFFER_SIZE = 8;
    +  
    +  // The normal read buffer size defaults to 1024, but
    +  // increasing this during merging seems to yield
    +  // performance gains.  However we don't want to increase
    +  // it too much because there are quite a few
    +  // BufferedIndexInputs created during merging.  See
    +  // LUCENE-888 for details.
    +  /**
    +   * A buffer size for merges set to {@value #MERGE_BUFFER_SIZE}.
    +   */
    +  public static final int MERGE_BUFFER_SIZE = 4096;
     
       private int bufferSize = BUFFER_SIZE;
    -
    +  
       protected byte[] buffer;
    +  
    +  private long bufferStart = 0;       // position in file of buffer
    +  private int bufferLength = 0;       // end of valid bytes
    +  private int bufferPosition = 0;     // next byte to read
     
    -  private long bufferStart = 0;			  // position in file of buffer
    -  private int bufferLength = 0;			  // end of valid bytes
    -  private int bufferPosition = 0;		  // next byte to read
    -
    -  public byte readByte() throws IOException {
    +  @Override
    +  public final byte readByte() throws IOException {
         if (bufferPosition >= bufferLength)
           refill();
         return buffer[bufferPosition++];
       }
     
    -  public BufferedIndexInput() {}
    +  public BufferedIndexInput(String resourceDesc) {
    +    this(resourceDesc, BUFFER_SIZE);
    +  }
     
    +  public BufferedIndexInput(String resourceDesc, IOContext context) {
    +    this(resourceDesc, bufferSize(context));
    +  }
    +
       /** Inits BufferedIndexInput with a specific bufferSize */
    -  public BufferedIndexInput(int bufferSize) {
    +  public BufferedIndexInput(String resourceDesc, int bufferSize) {
    +    super(resourceDesc);
         checkBufferSize(bufferSize);
         this.bufferSize = bufferSize;
       }
     
       /** Change the buffer size used by this IndexInput */
    -  public void setBufferSize(int newSize) {
    +  public final void setBufferSize(int newSize) {
         assert buffer == null || bufferSize == buffer.length: "buffer=" + buffer + " bufferSize=" + bufferSize + " buffer.length=" + (buffer != null ? buffer.length : 0);
         if (newSize != bufferSize) {
           checkBufferSize(newSize);
    @@ -79,29 +102,30 @@
       }
     
       /** Returns buffer size.  @see #setBufferSize */
    -  public int getBufferSize() {
    +  public final int getBufferSize() {
         return bufferSize;
       }
     
       private void checkBufferSize(int bufferSize) {
    -    if (bufferSize <= 0)
    -      throw new IllegalArgumentException("bufferSize must be greater than 0 (got " + bufferSize + ")");
    +    if (bufferSize < MIN_BUFFER_SIZE)
    +      throw new IllegalArgumentException("bufferSize must be at least MIN_BUFFER_SIZE (got " + bufferSize + ")");
       }
     
    -  public void readBytes(byte[] b, int offset, int len) throws IOException {
    +  @Override
    +  public final void readBytes(byte[] b, int offset, int len) throws IOException {
         readBytes(b, offset, len, true);
       }
     
    -  public void readBytes(byte[] b, int offset, int len, boolean useBuffer) throws IOException {
    -
    -    if(len <= (bufferLength-bufferPosition)){
    +  @Override
    +  public final void readBytes(byte[] b, int offset, int len, boolean useBuffer) throws IOException {
    +    int available = bufferLength - bufferPosition;
    +    if(len <= available){
           // the buffer contains enough data to satisfy this request
           if(len>0) // to allow b to be null if len is 0...
             System.arraycopy(buffer, bufferPosition, b, offset, len);
           bufferPosition+=len;
         } else {
           // the buffer does not have enough data. First serve all we've got.
    -      int available = bufferLength - bufferPosition;
           if(available > 0){
             System.arraycopy(buffer, bufferPosition, b, offset, available);
             offset += available;
    @@ -117,7 +141,7 @@
             if(bufferLength length())
    -          throw new IOException("read past EOF");
    +          throw new EOFException("read past EOF: " + this);
             readInternal(b, offset, len);
             bufferStart = after;
             bufferPosition = 0;
    @@ -141,14 +165,175 @@
         }
       }
     
    +  @Override
    +  public final short readShort() throws IOException {
    +    if (2 <= (bufferLength-bufferPosition)) {
    +      return (short) (((buffer[bufferPosition++] & 0xFF) <<  8) |  (buffer[bufferPosition++] & 0xFF));
    +    } else {
    +      return super.readShort();
    +    }
    +  }
    +  
    +  @Override
    +  public final int readInt() throws IOException {
    +    if (4 <= (bufferLength-bufferPosition)) {
    +      return ((buffer[bufferPosition++] & 0xFF) << 24) | ((buffer[bufferPosition++] & 0xFF) << 16)
    +        | ((buffer[bufferPosition++] & 0xFF) <<  8) |  (buffer[bufferPosition++] & 0xFF);
    +    } else {
    +      return super.readInt();
    +    }
    +  }
    +  
    +  @Override
    +  public final long readLong() throws IOException {
    +    if (8 <= (bufferLength-bufferPosition)) {
    +      final int i1 = ((buffer[bufferPosition++] & 0xff) << 24) | ((buffer[bufferPosition++] & 0xff) << 16) |
    +        ((buffer[bufferPosition++] & 0xff) << 8) | (buffer[bufferPosition++] & 0xff);
    +      final int i2 = ((buffer[bufferPosition++] & 0xff) << 24) | ((buffer[bufferPosition++] & 0xff) << 16) |
    +        ((buffer[bufferPosition++] & 0xff) << 8) | (buffer[bufferPosition++] & 0xff);
    +      return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
    +    } else {
    +      return super.readLong();
    +    }
    +  }
    +
    +  @Override
    +  public final int readVInt() throws IOException {
    +    if (5 <= (bufferLength-bufferPosition)) {
    +      byte b = buffer[bufferPosition++];
    +      if (b >= 0) return b;
    +      int i = b & 0x7F;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7F) << 7;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7F) << 14;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7F) << 21;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      // Warning: the next ands use 0x0F / 0xF0 - beware copy/paste errors:
    +      i |= (b & 0x0F) << 28;
    +      if ((b & 0xF0) == 0) return i;
    +      throw new IOException("Invalid vInt detected (too many bits)");
    +    } else {
    +      return super.readVInt();
    +    }
    +  }
    +  
    +  @Override
    +  public final long readVLong() throws IOException {
    +    if (9 <= bufferLength-bufferPosition) {
    +      byte b = buffer[bufferPosition++];
    +      if (b >= 0) return b;
    +      long i = b & 0x7FL;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 7;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 14;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 21;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 28;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 35;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 42;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 49;
    +      if (b >= 0) return i;
    +      b = buffer[bufferPosition++];
    +      i |= (b & 0x7FL) << 56;
    +      if (b >= 0) return i;
    +      throw new IOException("Invalid vLong detected (negative values disallowed)");
    +    } else {
    +      return super.readVLong();
    +    }
    +  }
    +  
    +  @Override
    +  public final byte readByte(long pos) throws IOException {
    +    long index = pos - bufferStart;
    +    if (index < 0 || index >= bufferLength) {
    +      bufferStart = pos;
    +      bufferPosition = 0;
    +      bufferLength = 0;  // trigger refill() on read()
    +      seekInternal(pos);
    +      refill();
    +      index = 0;
    +    }
    +    return buffer[(int)index];
    +  }
    +
    +  @Override
    +  public final short readShort(long pos) throws IOException {
    +    long index = pos - bufferStart;
    +    if (index < 0 || index >= bufferLength-1) {
    +      bufferStart = pos;
    +      bufferPosition = 0;
    +      bufferLength = 0;  // trigger refill() on read()
    +      seekInternal(pos);
    +      refill();
    +      index = 0;
    +    }
    +    return (short) (((buffer[(int)index]   & 0xFF) << 8) | 
    +                     (buffer[(int)index+1] & 0xFF));
    +  }
    +
    +  @Override
    +  public final int readInt(long pos) throws IOException {
    +    long index = pos - bufferStart;
    +    if (index < 0 || index >= bufferLength-3) {
    +      bufferStart = pos;
    +      bufferPosition = 0;
    +      bufferLength = 0;  // trigger refill() on read()
    +      seekInternal(pos);
    +      refill();
    +      index = 0;
    +    }
    +    return ((buffer[(int)index]   & 0xFF) << 24) | 
    +           ((buffer[(int)index+1] & 0xFF) << 16) |
    +           ((buffer[(int)index+2] & 0xFF) << 8)  |
    +            (buffer[(int)index+3] & 0xFF);
    +  }
    +
    +  @Override
    +  public final long readLong(long pos) throws IOException {
    +    long index = pos - bufferStart;
    +    if (index < 0 || index >= bufferLength-7) {
    +      bufferStart = pos;
    +      bufferPosition = 0;
    +      bufferLength = 0;  // trigger refill() on read()
    +      seekInternal(pos);
    +      refill();
    +      index = 0;
    +    }
    +    final int i1 = ((buffer[(int)index]   & 0xFF) << 24) | 
    +                   ((buffer[(int)index+1] & 0xFF) << 16) |
    +                   ((buffer[(int)index+2] & 0xFF) << 8)  | 
    +                    (buffer[(int)index+3] & 0xFF);
    +    final int i2 = ((buffer[(int)index+4] & 0xFF) << 24) | 
    +                   ((buffer[(int)index+5] & 0xFF) << 16) |
    +                   ((buffer[(int)index+6] & 0xFF) << 8)  | 
    +                    (buffer[(int)index+7] & 0xFF);
    +    return (((long)i1) << 32) | (i2 & 0xFFFFFFFFL);
    +  }
    +  
       private void refill() throws IOException {
         long start = bufferStart + bufferPosition;
         long end = start + bufferSize;
    -    if (end > length())				  // don't read past EOF
    +    if (end > length())  // don't read past EOF
           end = length();
         int newLength = (int)(end - start);
         if (newLength <= 0)
    -      throw new IOException("read past EOF");
    +      throw new EOFException("read past EOF: " + this);
     
         if (buffer == null) {
           newBuffer(new byte[bufferSize]);  // allocate buffer lazily
    @@ -169,15 +354,17 @@
       protected abstract void readInternal(byte[] b, int offset, int length)
               throws IOException;
     
    -  public long getFilePointer() { return bufferStart + bufferPosition; }
    +  @Override
    +  public final long getFilePointer() { return bufferStart + bufferPosition; }
     
    -  public void seek(long pos) throws IOException {
    +  @Override
    +  public final void seek(long pos) throws IOException {
         if (pos >= bufferStart && pos < (bufferStart + bufferLength))
           bufferPosition = (int)(pos - bufferStart);  // seek within buffer
         else {
           bufferStart = pos;
           bufferPosition = 0;
    -      bufferLength = 0;				  // trigger refill() on read()
    +      bufferLength = 0;  // trigger refill() on read()
           seekInternal(pos);
         }
       }
    @@ -188,7 +375,8 @@
        */
       protected abstract void seekInternal(long pos) throws IOException;
     
    -  public Object clone() {
    +  @Override
    +  public BufferedIndexInput clone() {
         BufferedIndexInput clone = (BufferedIndexInput)super.clone();
     
         clone.buffer = null;
    @@ -198,5 +386,101 @@
     
         return clone;
       }
    +  
    +  @Override
    +  public IndexInput slice(String sliceDescription, long offset, long length) throws IOException {
    +    return wrap(sliceDescription, this, offset, length);
    +  }
     
    +  /**
    +   * Flushes the in-memory buffer to the given output, copying at most
    +   * numBytes.
    +   * 

    + * NOTE: this method does not refill the buffer, however it does + * advance the buffer position. + * + * @return the number of bytes actually flushed from the in-memory buffer. + */ + protected final int flushBuffer(IndexOutput out, long numBytes) throws IOException { + int toCopy = bufferLength - bufferPosition; + if (toCopy > numBytes) { + toCopy = (int) numBytes; + } + if (toCopy > 0) { + out.writeBytes(buffer, bufferPosition, toCopy); + bufferPosition += toCopy; + } + return toCopy; + } + + /** + * Returns default buffer sizes for the given {@link IOContext} + */ + public static int bufferSize(IOContext context) { + switch (context.context) { + case MERGE: + return MERGE_BUFFER_SIZE; + default: + return BUFFER_SIZE; + } + } + + /** + * Wraps a portion of another IndexInput with buffering. + *

    Please note: This is in most cases ineffective, because it may double buffer! + */ + public static BufferedIndexInput wrap(String sliceDescription, IndexInput other, long offset, long length) { + return new SlicedIndexInput(sliceDescription, other, offset, length); + } + + /** + * Implementation of an IndexInput that reads from a portion of a file. + */ + private static final class SlicedIndexInput extends BufferedIndexInput { + IndexInput base; + long fileOffset; + long length; + + SlicedIndexInput(String sliceDescription, IndexInput base, long offset, long length) { + super((sliceDescription == null) ? base.toString() : (base.toString() + " [slice=" + sliceDescription + "]"), BufferedIndexInput.BUFFER_SIZE); + if (offset < 0 || length < 0 || offset + length > base.length()) { + throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: " + base); + } + this.base = base.clone(); + this.fileOffset = offset; + this.length = length; + } + + @Override + public SlicedIndexInput clone() { + SlicedIndexInput clone = (SlicedIndexInput)super.clone(); + clone.base = base.clone(); + clone.fileOffset = fileOffset; + clone.length = length; + return clone; + } + + @Override + protected void readInternal(byte[] b, int offset, int len) throws IOException { + long start = getFilePointer(); + if (start + len > length) { + throw new EOFException("read past EOF: " + this); + } + base.seek(fileOffset + start); + base.readBytes(b, offset, len, false); + } + + @Override + protected void seekInternal(long pos) {} + + @Override + public void close() throws IOException { + base.close(); + } + + @Override + public long length() { + return length; + } + } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/BufferedIndexOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/ByteArrayDataInput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/ByteArrayDataOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/ByteBufferIndexInput.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/ChecksumIndexInput.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/ChecksumIndexInput.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/ChecksumIndexInput.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/ChecksumIndexInput.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,8 @@ package org.apache.lucene.store; -/** +import java.io.IOException; + +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,51 +19,35 @@ * limitations under the License. */ -import java.io.IOException; -import java.util.zip.CRC32; -import java.util.zip.Checksum; - -/** Writes bytes through to a primary IndexOutput, computing - * checksum as it goes. Note that you cannot use seek(). */ -public class ChecksumIndexInput extends IndexInput { - IndexInput main; - Checksum digest; - - public ChecksumIndexInput(IndexInput main) { - this.main = main; - digest = new CRC32(); - } - - public byte readByte() throws IOException { - final byte b = main.readByte(); - digest.update(b); - return b; - } - - public void readBytes(byte[] b, int offset, int len) - throws IOException { - main.readBytes(b, offset, len); - digest.update(b, offset, len); - } - +/** + * Extension of IndexInput, computing checksum as it goes. + * Callers can retrieve the checksum via {@link #getChecksum()}. + */ +public abstract class ChecksumIndexInput extends IndexInput { - public long getChecksum() { - return digest.getValue(); + /** resourceDescription should be a non-null, opaque string + * describing this resource; it's returned from + * {@link #toString}. */ + protected ChecksumIndexInput(String resourceDescription) { + super(resourceDescription); } - public void close() throws IOException { - main.close(); - } + /** Returns the current checksum value */ + public abstract long getChecksum() throws IOException; - public long getFilePointer() { - return main.getFilePointer(); + /** + * {@inheritDoc} + * + * {@link ChecksumIndexInput} can only seek forward and seeks are expensive + * since they imply to read bytes in-between the current position and the + * target position in order to update the checksum. + */ + @Override + public void seek(long pos) throws IOException { + final long skip = pos - getFilePointer(); + if (skip < 0) { + throw new IllegalStateException(getClass() + " cannot seek backwards"); + } + skipBytes(skip); } - - public void seek(long pos) { - throw new RuntimeException("not allowed"); - } - - public long length() { - return main.length(); - } } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/ChecksumIndexOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/CompoundFileDirectory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/CompoundFileWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/DataInput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/DataOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/Directory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/Directory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/Directory.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/Directory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,8 +17,14 @@ * limitations under the License. */ +import java.io.FileNotFoundException; import java.io.IOException; +import java.io.Closeable; +import java.nio.file.NoSuchFileException; +import java.util.Collection; // for javadocs +import org.apache.lucene.util.IOUtils; + /** A Directory is a flat list of files. Files may be written once, when they * are created. Once a file is created it may only be opened for read, or * deleted. Random access is permitted both when reading and writing. @@ -35,95 +41,93 @@ * instance using {@link #setLockFactory}. * */ -public abstract class Directory { +public abstract class Directory implements Closeable { - volatile boolean isOpen = true; + /** + * Returns an array of strings, one for each file in the directory. + * + * @throws NoSuchDirectoryException if the directory is not prepared for any + * write operations (such as {@link #createOutput(String, IOContext)}). + * @throws IOException in case of other IO errors + */ + public abstract String[] listAll() throws IOException; - /** Holds the LockFactory instance (implements locking for - * this Directory instance). */ - protected LockFactory lockFactory; - - /** Returns an array of strings, one for each file in the - * directory. This method may return null (for example for - * {@link FSDirectory} if the underlying directory doesn't - * exist in the filesystem or there are permissions - * problems).*/ - public abstract String[] list() - throws IOException; - - /** Returns true iff a file with the given name exists. */ + /** Returns true iff a file with the given name exists. + * + * @deprecated This method will be removed in 5.0 */ + @Deprecated public abstract boolean fileExists(String name) throws IOException; - /** Returns the time the named file was last modified. */ - public abstract long fileModified(String name) - throws IOException; - - /** Set the modified time of an existing file to now. */ - public abstract void touchFile(String name) - throws IOException; - /** Removes an existing file in the directory. */ public abstract void deleteFile(String name) throws IOException; - /** Renames an existing file in the directory. - * If a file already exists with the new name, then it is replaced. - * This replacement is not guaranteed to be atomic. - * @deprecated + /** + * Returns the length of a file in the directory. This method follows the + * following contract: + *

      + *
    • Throws {@link FileNotFoundException} or {@link NoSuchFileException} + * if the file does not exist. + *
    • Returns a value ≥0 if the file exists, which specifies its length. + *
    + * + * @param name the name of the file for which to return the length. + * @throws IOException if there was an IO error while retrieving the file's + * length. */ - public abstract void renameFile(String from, String to) - throws IOException; + public abstract long fileLength(String name) throws IOException; - /** Returns the length of a file in the directory. */ - public abstract long fileLength(String name) - throws IOException; - /** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */ - public abstract IndexOutput createOutput(String name) throws IOException; + public abstract IndexOutput createOutput(String name, IOContext context) + throws IOException; - /** Ensure that any writes to this file are moved to - * stable storage. Lucene uses this to properly commit - * changes to the index, to prevent a machine/OS crash - * from corrupting the index. */ - public void sync(String name) throws IOException {} + /** + * Ensure that any writes to these files are moved to + * stable storage. Lucene uses this to properly commit + * changes to the index, to prevent a machine/OS crash + * from corrupting the index.
    + *
    + * NOTE: Clients may call this method for same files over + * and over again, so some impls might optimize for that. + * For other impls the operation can be a noop, for various + * reasons. + */ + public abstract void sync(Collection names) throws IOException; - /** Returns a stream reading an existing file. */ - public abstract IndexInput openInput(String name) - throws IOException; - /** Returns a stream reading an existing file, with the * specified read buffer size. The particular Directory * implementation may ignore the buffer size. Currently * the only Directory implementations that respect this * parameter are {@link FSDirectory} and {@link - * org.apache.lucene.index.CompoundFileReader}. - */ - public IndexInput openInput(String name, int bufferSize) throws IOException { - return openInput(name); + * CompoundFileDirectory}. + *

    Throws {@link FileNotFoundException} or {@link NoSuchFileException} + * if the file does not exist. + */ + public abstract IndexInput openInput(String name, IOContext context) throws IOException; + + /** Returns a stream reading an existing file, computing checksum as it reads */ + public ChecksumIndexInput openChecksumInput(String name, IOContext context) throws IOException { + return new BufferedChecksumIndexInput(openInput(name, context)); } - + /** Construct a {@link Lock}. * @param name the name of the lock file */ - public Lock makeLock(String name) { - return lockFactory.makeLock(name); - } + public abstract Lock makeLock(String name); + /** * Attempt to clear (forcefully unlock and remove) the * specified lock. Only call this at a time when you are * certain this lock is no longer in use. * @param name name of the lock to be cleared. */ - public void clearLock(String name) throws IOException { - if (lockFactory != null) { - lockFactory.clearLock(name); - } - } + public abstract void clearLock(String name) throws IOException; /** Closes the store. */ + @Override public abstract void close() throws IOException; @@ -136,20 +140,15 @@ * * @param lockFactory instance of {@link LockFactory}. */ - public void setLockFactory(LockFactory lockFactory) { - this.lockFactory = lockFactory; - lockFactory.setLockPrefix(this.getLockID()); - } + public abstract void setLockFactory(LockFactory lockFactory) throws IOException; /** * Get the LockFactory that this Directory instance is * using for its locking implementation. Note that this * may be null for Directory implementations that provide * their own locking implementation. */ - public LockFactory getLockFactory() { - return this.lockFactory; - } + public abstract LockFactory getLockFactory(); /** * Return a string identifier that uniquely differentiates @@ -160,63 +159,56 @@ * "scopes" to the right index. */ public String getLockID() { - return this.toString(); + return this.toString(); } + @Override + public String toString() { + return getClass().getSimpleName() + '@' + Integer.toHexString(hashCode()) + " lockFactory=" + getLockFactory(); + } + /** - * Copy contents of a directory src to a directory dest. - * If a file in src already exists in dest then the - * one in dest will be blindly overwritten. - * - * @param src source directory - * @param dest destination directory - * @param closeDirSrc if true, call {@link #close()} method on source directory - * @throws IOException + * Copies the file src to {@link Directory} to under the new + * file name dest. + *

    + * If you want to copy the entire source directory to the destination one, you + * can do so like this: + * + *

    +   * Directory to; // the directory to copy to
    +   * for (String file : dir.listAll()) {
    +   *   dir.copy(to, file, newFile, IOContext.DEFAULT); // newFile can be either file, or a new name
    +   * }
    +   * 
    + *

    + * NOTE: this method does not check whether dest exist and will + * overwrite it if it does. */ - public static void copy(Directory src, Directory dest, boolean closeDirSrc) throws IOException { - final String[] files = src.list(); - - if (files == null) - throw new IOException("cannot read directory " + src + ": list() returned null"); - - byte[] buf = new byte[BufferedIndexOutput.BUFFER_SIZE]; - for (int i = 0; i < files.length; i++) { - IndexOutput os = null; - IndexInput is = null; + public void copy(Directory to, String src, String dest, IOContext context) throws IOException { + IndexOutput os = null; + IndexInput is = null; + boolean success = false; + try { + os = to.createOutput(dest, context); + is = openInput(src, context); + os.copyBytes(is, is.length()); + success = true; + } finally { + if (success) { + IOUtils.close(os, is); + } else { + IOUtils.closeWhileHandlingException(os, is); try { - // create file in dest directory - os = dest.createOutput(files[i]); - // read current file - is = src.openInput(files[i]); - // and copy to dest directory - long len = is.length(); - long readCount = 0; - while (readCount < len) { - int toRead = readCount + BufferedIndexOutput.BUFFER_SIZE > len ? (int)(len - readCount) : BufferedIndexOutput.BUFFER_SIZE; - is.readBytes(buf, 0, toRead); - os.writeBytes(buf, toRead); - readCount += toRead; - } - } finally { - // graceful cleanup - try { - if (os != null) - os.close(); - } finally { - if (is != null) - is.close(); - } + to.deleteFile(dest); + } catch (Throwable t) { } } - if(closeDirSrc) - src.close(); + } } /** * @throws AlreadyClosedException if this Directory is closed */ - protected final void ensureOpen() throws AlreadyClosedException { - if (!isOpen) - throw new AlreadyClosedException("this Directory is closed"); - } + protected void ensureOpen() throws AlreadyClosedException {} + } Index: 3rdParty_sources/lucene/org/apache/lucene/store/FSDirectory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/FSDirectory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/FSDirectory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/FSDirectory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,484 +17,309 @@ * limitations under the License. */ +import org.apache.lucene.util.Constants; +import org.apache.lucene.util.IOUtils; + import java.io.File; -import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.FileOutputStream; +import java.io.FilenameFilter; +import java.io.FilterOutputStream; import java.io.IOException; -import java.io.RandomAccessFile; -import java.security.MessageDigest; -import java.security.NoSuchAlgorithmException; -import java.util.HashMap; -import java.util.Map; +import java.util.Collection; +import java.util.HashSet; +import java.util.Set; +import java.util.concurrent.Future; -import org.apache.lucene.index.IndexFileNameFilter; +import static java.util.Collections.synchronizedSet; -// Used only for WRITE_LOCK_NAME in deprecated create=true case: -import org.apache.lucene.index.IndexWriter; - /** - * Straightforward implementation of {@link Directory} as a directory of files. - * Locking implementation is by default the {@link SimpleFSLockFactory}, but - * can be changed either by passing in a {@link LockFactory} instance to - * getDirectory, or specifying the LockFactory class by setting - * org.apache.lucene.store.FSDirectoryLockFactoryClass Java system - * property, or by calling {@link #setLockFactory} after creating - * the Directory. - - *

    Directories are cached, so that, for a given canonical - * path, the same FSDirectory instance will always be - * returned by getDirectory. This permits - * synchronization on directories.

    + * Base class for Directory implementations that store index + * files in the file system. + * + * There are currently three core + * subclasses: * + * + * + * Unfortunately, because of system peculiarities, there is + * no single overall best implementation. Therefore, we've + * added the {@link #open} method, to allow Lucene to choose + * the best FSDirectory implementation given your + * environment, and the known limitations of each + * implementation. For users who have no reason to prefer a + * specific implementation, it's best to simply use {@link + * #open}. For all others, you should instantiate the + * desired implementation directly. + * + *

    The locking implementation is by default {@link + * NativeFSLockFactory}, but can be changed by + * passing in a custom {@link LockFactory} instance. + * * @see Directory */ -public class FSDirectory extends Directory { - - /** This cache of directories ensures that there is a unique Directory - * instance per path, so that synchronization on the Directory can be used to - * synchronize access between readers and writers. We use - * refcounts to ensure when the last use of an FSDirectory - * instance for a given canonical path is closed, we remove the - * instance from the cache. See LUCENE-776 - * for some relevant discussion. - */ - private static final Map DIRECTORIES = new HashMap(); +public abstract class FSDirectory extends BaseDirectory { - private static boolean disableLocks = false; - - // TODO: should this move up to the Directory base class? Also: should we - // make a per-instance (in addition to the static "default") version? - /** - * Set whether Lucene's use of lock files is disabled. By default, - * lock files are enabled. They should only be disabled if the index - * is on a read-only medium like a CD-ROM. + * Default read chunk size: 8192 bytes (this is the size up to which the JDK + does not allocate additional arrays while reading/writing) + @deprecated This constant is no longer used since Lucene 4.5. */ - public static void setDisableLocks(boolean doDisableLocks) { - FSDirectory.disableLocks = doDisableLocks; - } + @Deprecated + public static final int DEFAULT_READ_CHUNK_SIZE = 8192; - /** - * Returns whether Lucene's use of lock files is disabled. - * @return true if locks are disabled, false if locks are enabled. - */ - public static boolean getDisableLocks() { - return FSDirectory.disableLocks; + protected final File directory; // The underlying filesystem directory + protected final Set staleFiles = synchronizedSet(new HashSet()); // Files written, but not yet sync'ed + private int chunkSize = DEFAULT_READ_CHUNK_SIZE; + + // returns the canonical version of the directory, creating it if it doesn't exist. + private static File getCanonicalPath(File file) throws IOException { + return new File(file.getCanonicalPath()); } - /** - * Directory specified by org.apache.lucene.lockDir - * or java.io.tmpdir system property. - - * @deprecated As of 2.1, LOCK_DIR is unused - * because the write.lock is now stored by default in the - * index directory. If you really want to store locks - * elsewhere you can create your own {@link - * SimpleFSLockFactory} (or {@link NativeFSLockFactory}, - * etc.) passing in your preferred lock directory. Then, - * pass this LockFactory instance to one of - * the getDirectory methods that take a - * lockFactory (for example, {@link #getDirectory(String, LockFactory)}). + /** Create a new FSDirectory for the named location (ctor for subclasses). + * @param path the path of the directory + * @param lockFactory the lock factory to use, or null for the default + * ({@link NativeFSLockFactory}); + * @throws IOException if there is a low-level I/O error */ - public static final String LOCK_DIR = System.getProperty("org.apache.lucene.lockDir", - System.getProperty("java.io.tmpdir")); - - /** The default class which implements filesystem-based directories. */ - private static Class IMPL; - static { - try { - String name = - System.getProperty("org.apache.lucene.FSDirectory.class", - FSDirectory.class.getName()); - IMPL = Class.forName(name); - } catch (ClassNotFoundException e) { - throw new RuntimeException("cannot load FSDirectory class: " + e.toString(), e); - } catch (SecurityException se) { - try { - IMPL = Class.forName(FSDirectory.class.getName()); - } catch (ClassNotFoundException e) { - throw new RuntimeException("cannot load default FSDirectory class: " + e.toString(), e); - } + protected FSDirectory(File path, LockFactory lockFactory) throws IOException { + // new ctors use always NativeFSLockFactory as default: + if (lockFactory == null) { + lockFactory = new NativeFSLockFactory(); } - } + directory = getCanonicalPath(path); - private static MessageDigest DIGESTER; + if (directory.exists() && !directory.isDirectory()) + throw new NoSuchDirectoryException("file '" + directory + "' exists but is not a directory"); - static { - try { - DIGESTER = MessageDigest.getInstance("MD5"); - } catch (NoSuchAlgorithmException e) { - throw new RuntimeException(e.toString(), e); - } - } + setLockFactory(lockFactory); - /** A buffer optionally used in renameTo method */ - private byte[] buffer = null; - - /** Returns the directory instance for the named location. - * @param path the path to the directory. - * @return the FSDirectory for the named file. */ - public static FSDirectory getDirectory(String path) - throws IOException { - return getDirectory(new File(path), null); } - /** Returns the directory instance for the named location. - * @param path the path to the directory. - * @param lockFactory instance of {@link LockFactory} providing the - * locking implementation. - * @return the FSDirectory for the named file. */ - public static FSDirectory getDirectory(String path, LockFactory lockFactory) - throws IOException { - return getDirectory(new File(path), lockFactory); + /** Creates an FSDirectory instance, trying to pick the + * best implementation given the current environment. + * The directory returned uses the {@link NativeFSLockFactory}. + * + *

    Currently this returns {@link MMapDirectory} for most Solaris + * and Windows 64-bit JREs, {@link NIOFSDirectory} for other + * non-Windows JREs, and {@link SimpleFSDirectory} for other + * JREs on Windows. It is highly recommended that you consult the + * implementation's documentation for your platform before + * using this method. + * + *

    NOTE: this method may suddenly change which + * implementation is returned from release to release, in + * the event that higher performance defaults become + * possible; if the precise implementation is important to + * your application, please instantiate it directly, + * instead. For optimal performance you should consider using + * {@link MMapDirectory} on 64 bit JVMs. + * + *

    See above */ + public static FSDirectory open(File path) throws IOException { + return open(path, null); } - /** Returns the directory instance for the named location. - * @param file the path to the directory. - * @return the FSDirectory for the named file. */ - public static FSDirectory getDirectory(File file) - throws IOException { - return getDirectory(file, null); + /** Just like {@link #open(File)}, but allows you to + * also specify a custom {@link LockFactory}. */ + public static FSDirectory open(File path, LockFactory lockFactory) throws IOException { + if (Constants.JRE_IS_64BIT && MMapDirectory.UNMAP_SUPPORTED) { + return new MMapDirectory(path, lockFactory); + } else if (Constants.WINDOWS) { + return new SimpleFSDirectory(path, lockFactory); + } else { + return new NIOFSDirectory(path, lockFactory); + } } - /** Returns the directory instance for the named location. - * @param file the path to the directory. - * @param lockFactory instance of {@link LockFactory} providing the - * locking implementation. - * @return the FSDirectory for the named file. */ - public static FSDirectory getDirectory(File file, LockFactory lockFactory) - throws IOException - { - file = new File(file.getCanonicalPath()); + @Override + public void setLockFactory(LockFactory lockFactory) throws IOException { + super.setLockFactory(lockFactory); - if (file.exists() && !file.isDirectory()) - throw new IOException(file + " not a directory"); - - if (!file.exists()) - if (!file.mkdirs()) - throw new IOException("Cannot create directory: " + file); - - FSDirectory dir; - synchronized (DIRECTORIES) { - dir = (FSDirectory)DIRECTORIES.get(file); + // for filesystem based LockFactory, delete the lockPrefix, if the locks are placed + // in index dir. If no index dir is given, set ourselves + if (lockFactory instanceof FSLockFactory) { + final FSLockFactory lf = (FSLockFactory) lockFactory; + final File dir = lf.getLockDir(); + // if the lock factory has no lockDir set, use the this directory as lockDir if (dir == null) { - try { - dir = (FSDirectory)IMPL.newInstance(); - } catch (Exception e) { - throw new RuntimeException("cannot load FSDirectory class: " + e.toString(), e); - } - dir.init(file, lockFactory); - DIRECTORIES.put(file, dir); - } else { - // Catch the case where a Directory is pulled from the cache, but has a - // different LockFactory instance. - if (lockFactory != null && lockFactory != dir.getLockFactory()) { - throw new IOException("Directory was previously created with a different LockFactory instance; please pass null as the lockFactory instance and use setLockFactory to change it"); - } + lf.setLockDir(directory); + lf.setLockPrefix(null); + } else if (dir.getCanonicalPath().equals(directory.getCanonicalPath())) { + lf.setLockPrefix(null); } } - synchronized (dir) { - dir.refCount++; - } - return dir; - } - - /** Returns the directory instance for the named location. - * - * @deprecated Use IndexWriter's create flag, instead, to - * create a new index. - * - * @param path the path to the directory. - * @param create if true, create, or erase any existing contents. - * @return the FSDirectory for the named file. */ - public static FSDirectory getDirectory(String path, boolean create) - throws IOException { - return getDirectory(new File(path), create); } - - /** Returns the directory instance for the named location. + + /** Lists all files (not subdirectories) in the + * directory. This method never returns null (throws + * {@link IOException} instead). * - * @deprecated Use IndexWriter's create flag, instead, to - * create a new index. - * - * @param file the path to the directory. - * @param create if true, create, or erase any existing contents. - * @return the FSDirectory for the named file. */ - public static FSDirectory getDirectory(File file, boolean create) - throws IOException - { - FSDirectory dir = getDirectory(file, null); + * @throws NoSuchDirectoryException if the directory + * does not exist, or does exist but is not a + * directory. + * @throws IOException if list() returns null */ + public static String[] listAll(File dir) throws IOException { + if (!dir.exists()) + throw new NoSuchDirectoryException("directory '" + dir + "' does not exist"); + else if (!dir.isDirectory()) + throw new NoSuchDirectoryException("file '" + dir + "' exists but is not a directory"); - // This is now deprecated (creation should only be done - // by IndexWriter): - if (create) { - dir.create(); - } - - return dir; - } - - private void create() throws IOException { - if (directory.exists()) { - String[] files = directory.list(IndexFileNameFilter.getFilter()); // clear old files - if (files == null) - throw new IOException("cannot read directory " + directory.getAbsolutePath() + ": list() returned null"); - for (int i = 0; i < files.length; i++) { - File file = new File(directory, files[i]); - if (!file.delete()) - throw new IOException("Cannot delete " + file); - } - } - lockFactory.clearLock(IndexWriter.WRITE_LOCK_NAME); - } - - private File directory = null; - private int refCount; - - protected FSDirectory() {}; // permit subclassing - - private void init(File path, LockFactory lockFactory) throws IOException { - - // Set up lockFactory with cascaded defaults: if an instance was passed in, - // use that; else if locks are disabled, use NoLockFactory; else if the - // system property org.apache.lucene.store.FSDirectoryLockFactoryClass is set, - // instantiate that; else, use SimpleFSLockFactory: - - directory = path; - - boolean doClearLockID = false; - - if (lockFactory == null) { - - if (disableLocks) { - // Locks are disabled: - lockFactory = NoLockFactory.getNoLockFactory(); - } else { - String lockClassName = System.getProperty("org.apache.lucene.store.FSDirectoryLockFactoryClass"); - - if (lockClassName != null && !lockClassName.equals("")) { - Class c; - - try { - c = Class.forName(lockClassName); - } catch (ClassNotFoundException e) { - throw new IOException("unable to find LockClass " + lockClassName); - } - - try { - lockFactory = (LockFactory) c.newInstance(); - } catch (IllegalAccessException e) { - throw new IOException("IllegalAccessException when instantiating LockClass " + lockClassName); - } catch (InstantiationException e) { - throw new IOException("InstantiationException when instantiating LockClass " + lockClassName); - } catch (ClassCastException e) { - throw new IOException("unable to cast LockClass " + lockClassName + " instance to a LockFactory"); - } - - if (lockFactory instanceof NativeFSLockFactory) { - ((NativeFSLockFactory) lockFactory).setLockDir(path); - } else if (lockFactory instanceof SimpleFSLockFactory) { - ((SimpleFSLockFactory) lockFactory).setLockDir(path); - } - } else { - // Our default lock is SimpleFSLockFactory; - // default lockDir is our index directory: - lockFactory = new SimpleFSLockFactory(path); - doClearLockID = true; + // Exclude subdirs + String[] result = dir.list(new FilenameFilter() { + @Override + public boolean accept(File dir, String file) { + return !new File(dir, file).isDirectory(); } - } - } + }); - setLockFactory(lockFactory); + if (result == null) + throw new IOException("directory '" + dir + "' exists and is a directory, but cannot be listed: list() returned null"); - if (doClearLockID) { - // Clear the prefix because write.lock will be - // stored in our directory: - lockFactory.setLockPrefix(null); - } + return result; } - /** Returns an array of strings, one for each Lucene index file in the directory. */ - public String[] list() { + /** Lists all files (not subdirectories) in the + * directory. + * @see #listAll(File) */ + @Override + public String[] listAll() throws IOException { ensureOpen(); - return directory.list(IndexFileNameFilter.getFilter()); + return listAll(directory); } /** Returns true iff a file with the given name exists. */ + @Override public boolean fileExists(String name) { ensureOpen(); File file = new File(directory, name); return file.exists(); } - /** Returns the time the named file was last modified. */ - public long fileModified(String name) { - ensureOpen(); - File file = new File(directory, name); - return file.lastModified(); - } - - /** Returns the time the named file was last modified. */ - public static long fileModified(File directory, String name) { - File file = new File(directory, name); - return file.lastModified(); - } - - /** Set the modified time of an existing file to now. */ - public void touchFile(String name) { - ensureOpen(); - File file = new File(directory, name); - file.setLastModified(System.currentTimeMillis()); - } - /** Returns the length in bytes of a file in the directory. */ - public long fileLength(String name) { + @Override + public long fileLength(String name) throws IOException { ensureOpen(); File file = new File(directory, name); - return file.length(); + final long len = file.length(); + if (len == 0 && !file.exists()) { + throw new FileNotFoundException(name); + } else { + return len; + } } /** Removes an existing file in the directory. */ + @Override public void deleteFile(String name) throws IOException { ensureOpen(); File file = new File(directory, name); if (!file.delete()) throw new IOException("Cannot delete " + file); + staleFiles.remove(name); } - /** Renames an existing file in the directory. - * Warning: This is not atomic. - * @deprecated - */ - public synchronized void renameFile(String from, String to) - throws IOException { + /** Creates an IndexOutput for the file with the given name. */ + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { ensureOpen(); - File old = new File(directory, from); - File nu = new File(directory, to); - /* This is not atomic. If the program crashes between the call to - delete() and the call to renameTo() then we're screwed, but I've - been unable to figure out how else to do this... */ - - if (nu.exists()) - if (!nu.delete()) - throw new IOException("Cannot delete " + nu); - - // Rename the old file to the new one. Unfortunately, the renameTo() - // method does not work reliably under some JVMs. Therefore, if the - // rename fails, we manually rename by copying the old file to the new one - if (!old.renameTo(nu)) { - java.io.InputStream in = null; - java.io.OutputStream out = null; - try { - in = new FileInputStream(old); - out = new FileOutputStream(nu); - // see if the buffer needs to be initialized. Initialization is - // only done on-demand since many VM's will never run into the renameTo - // bug and hence shouldn't waste 1K of mem for no reason. - if (buffer == null) { - buffer = new byte[1024]; - } - int len; - while ((len = in.read(buffer)) >= 0) { - out.write(buffer, 0, len); - } - - // delete the old file. - old.delete(); - } - catch (IOException ioe) { - IOException newExc = new IOException("Cannot rename " + old + " to " + nu); - newExc.initCause(ioe); - throw newExc; - } - finally { - try { - if (in != null) { - try { - in.close(); - } catch (IOException e) { - throw new RuntimeException("Cannot close input stream: " + e.toString(), e); - } - } - } finally { - if (out != null) { - try { - out.close(); - } catch (IOException e) { - throw new RuntimeException("Cannot close output stream: " + e.toString(), e); - } - } - } - } - } + ensureCanWrite(name); + return new FSIndexOutput(name); } - /** Creates a new, empty file in the directory with the given name. - Returns a stream writing this file. */ - public IndexOutput createOutput(String name) throws IOException { - ensureOpen(); + protected void ensureCanWrite(String name) throws IOException { + if (!directory.exists()) + if (!directory.mkdirs()) + throw new IOException("Cannot create directory: " + directory); + File file = new File(directory, name); if (file.exists() && !file.delete()) // delete existing, if any throw new IOException("Cannot overwrite: " + file); - - return new FSIndexOutput(file); } - public void sync(String name) throws IOException { - ensureOpen(); - File fullFile = new File(directory, name); - boolean success = false; - int retryCount = 0; - IOException exc = null; - while(!success && retryCount < 5) { - retryCount++; - RandomAccessFile file = null; - try { - try { - file = new RandomAccessFile(fullFile, "rw"); - file.getFD().sync(); - success = true; - } finally { - if (file != null) - file.close(); - } - } catch (IOException ioe) { - if (exc == null) - exc = ioe; - try { - // Pause 5 msec - Thread.sleep(5); - } catch (InterruptedException ie) { - Thread.currentThread().interrupt(); - } - } - } - if (!success) - // Throw original exception - throw exc; + /** + * Sub classes should call this method on closing an open {@link IndexOutput}, reporting the name of the file + * that was closed. {@code FSDirectory} needs this information to take care of syncing stale files. + */ + protected void onIndexOutputClosed(String name) { + staleFiles.add(name); } - // Inherit javadoc - public IndexInput openInput(String name) throws IOException { + @Override + public void sync(Collection names) throws IOException { ensureOpen(); - return openInput(name, BufferedIndexInput.BUFFER_SIZE); - } + Set toSync = new HashSet<>(names); + toSync.retainAll(staleFiles); - // Inherit javadoc - public IndexInput openInput(String name, int bufferSize) throws IOException { - ensureOpen(); - return new FSIndexInput(new File(directory, name), bufferSize); + for (String name : toSync) { + fsync(name); + } + + // fsync the directory itsself, but only if there was any file fsynced before + // (otherwise it can happen that the directory does not yet exist)! + if (!toSync.isEmpty()) { + IOUtils.fsync(directory, true); + } + + staleFiles.removeAll(toSync); } - /** - * So we can do some byte-to-hexchar conversion below - */ - private static final char[] HEX_DIGITS = - {'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}; - - + @Override public String getLockID() { ensureOpen(); String dirName; // name to be hashed @@ -504,177 +329,89 @@ throw new RuntimeException(e.toString(), e); } - byte digest[]; - synchronized (DIGESTER) { - digest = DIGESTER.digest(dirName.getBytes()); + int digest = 0; + for(int charIDX=0;charIDX> 4) & 0xf]); - buf.append(HEX_DIGITS[b & 0xf]); - } - - return buf.toString(); + return "lucene-" + Integer.toHexString(digest); } /** Closes the store to future operations. */ + @Override public synchronized void close() { - if (isOpen && --refCount <= 0) { - isOpen = false; - synchronized (DIRECTORIES) { - DIRECTORIES.remove(directory); - } - } + isOpen = false; } - public File getFile() { + /** @return the underlying filesystem directory */ + public File getDirectory() { ensureOpen(); return directory; } /** For debug output. */ + @Override public String toString() { - return this.getClass().getName() + "@" + directory; + return this.getClass().getSimpleName() + "@" + directory + " lockFactory=" + getLockFactory(); } - protected static class FSIndexInput extends BufferedIndexInput { - - protected static class Descriptor extends RandomAccessFile { - // remember if the file is open, so that we don't try to close it - // more than once - protected volatile boolean isOpen; - long position; - final long length; - - public Descriptor(File file, String mode) throws IOException { - super(file, mode); - isOpen=true; - length=length(); - } - - public void close() throws IOException { - if (isOpen) { - isOpen=false; - super.close(); - } - } - - protected void finalize() throws Throwable { - try { - close(); - } finally { - super.finalize(); - } - } + /** + * This setting has no effect anymore. + * @deprecated This is no longer used since Lucene 4.5. + */ + @Deprecated + public final void setReadChunkSize(int chunkSize) { + if (chunkSize <= 0) { + throw new IllegalArgumentException("chunkSize must be positive"); } - - protected final Descriptor file; - boolean isClone; - - public FSIndexInput(File path) throws IOException { - this(path, BufferedIndexInput.BUFFER_SIZE); - } - - public FSIndexInput(File path, int bufferSize) throws IOException { - super(bufferSize); - file = new Descriptor(path, "r"); - } - - /** IndexInput methods */ - protected void readInternal(byte[] b, int offset, int len) - throws IOException { - synchronized (file) { - long position = getFilePointer(); - if (position != file.position) { - file.seek(position); - file.position = position; - } - int total = 0; - do { - int i = file.read(b, offset+total, len-total); - if (i == -1) - throw new IOException("read past EOF"); - file.position += i; - total += i; - } while (total < len); - } - } - - public void close() throws IOException { - // only close the file if this is not a clone - if (!isClone) file.close(); - } - - protected void seekInternal(long position) { - } - - public long length() { - return file.length; - } - - public Object clone() { - FSIndexInput clone = (FSIndexInput)super.clone(); - clone.isClone = true; - return clone; - } - - /** Method used for testing. Returns true if the underlying - * file descriptor is valid. - */ - boolean isFDValid() throws IOException { - return file.getFD().valid(); - } + this.chunkSize = chunkSize; } - protected static class FSIndexOutput extends BufferedIndexOutput { - RandomAccessFile file = null; - - // remember if the file is open, so that we don't try to close it - // more than once - private volatile boolean isOpen; + /** + * This setting has no effect anymore. + * @deprecated This is no longer used since Lucene 4.5. + */ + @Deprecated + public final int getReadChunkSize() { + return chunkSize; + } - public FSIndexOutput(File path) throws IOException { - file = new RandomAccessFile(path, "rw"); - isOpen = true; + final class FSIndexOutput extends OutputStreamIndexOutput { + /** + * The maximum chunk size is 8192 bytes, because {@link FileOutputStream} mallocs + * a native buffer outside of stack if the write buffer size is larger. + */ + static final int CHUNK_SIZE = 8192; + + private final String name; + + public FSIndexOutput(String name) throws IOException { + super(new FilterOutputStream(new FileOutputStream(new File(directory, name))) { + // This implementation ensures, that we never write more than CHUNK_SIZE bytes: + @Override + public void write(byte[] b, int offset, int length) throws IOException { + while (length > 0) { + final int chunk = Math.min(length, CHUNK_SIZE); + out.write(b, offset, chunk); + length -= chunk; + offset += chunk; + } + } + }, CHUNK_SIZE); + this.name = name; } - - /** output methods: */ - public void flushBuffer(byte[] b, int offset, int size) throws IOException { - file.write(b, offset, size); - } + + @Override public void close() throws IOException { - // only close the file if it has not been closed yet - if (isOpen) { - boolean success = false; - try { - super.close(); - success = true; - } finally { - isOpen = false; - if (!success) { - try { - file.close(); - } catch (Throwable t) { - // Suppress so we don't mask original exception - } - } else - file.close(); - } + try { + onIndexOutputClosed(name); + } finally { + super.close(); } } - - /** Random-access methods */ - public void seek(long pos) throws IOException { - super.seek(pos); - file.seek(pos); - } - public long length() throws IOException { - return file.length(); - } - public void setLength(long length) throws IOException { - file.setLength(length); - } } + + protected void fsync(String name) throws IOException { + IOUtils.fsync(new File(directory, name), false); + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/FSLockFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/FileSwitchDirectory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/FilterDirectory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/FlushInfo.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/IOContext.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/IndexInput.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/IndexInput.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/IndexInput.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/IndexInput.java 16 Dec 2014 11:31:35 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,178 +17,40 @@ * limitations under the License. */ +import java.io.Closeable; import java.io.IOException; /** Abstract base class for input from a file in a {@link Directory}. A * random-access input stream. Used for all Lucene index input operations. + * + *

    {@code IndexInput} may only be used from one thread, because it is not + * thread safe (it keeps internal state like file position). To allow + * multithreaded use, every {@code IndexInput} instance must be cloned before + * used in another thread. Subclasses must therefore implement {@link #clone()}, + * returning a new {@code IndexInput} which operates on the same underlying + * resource, but positioned independently. Lucene never closes cloned + * {@code IndexInput}s, it will only do this on the original one. + * The original instance must take care that cloned instances throw + * {@link AlreadyClosedException} when the original one is closed. + * @see Directory */ -public abstract class IndexInput implements Cloneable { - private byte[] bytes; // used by readString() - private char[] chars; // used by readModifiedUTF8String() - private boolean preUTF8Strings; // true if we are reading old (modified UTF8) string format +public abstract class IndexInput extends DataInput implements Cloneable,Closeable { - /** Reads and returns a single byte. - * @see IndexOutput#writeByte(byte) - */ - public abstract byte readByte() throws IOException; + private final String resourceDescription; - /** Reads a specified number of bytes into an array at the specified offset. - * @param b the array to read bytes into - * @param offset the offset in the array to start storing bytes - * @param len the number of bytes to read - * @see IndexOutput#writeBytes(byte[],int) - */ - public abstract void readBytes(byte[] b, int offset, int len) - throws IOException; - - /** Reads a specified number of bytes into an array at the - * specified offset with control over whether the read - * should be buffered (callers who have their own buffer - * should pass in "false" for useBuffer). Currently only - * {@link BufferedIndexInput} respects this parameter. - * @param b the array to read bytes into - * @param offset the offset in the array to start storing bytes - * @param len the number of bytes to read - * @param useBuffer set to false if the caller will handle - * buffering. - * @see IndexOutput#writeBytes(byte[],int) - */ - public void readBytes(byte[] b, int offset, int len, boolean useBuffer) - throws IOException - { - // Default to ignoring useBuffer entirely - readBytes(b, offset, len); - } - - /** Reads four bytes and returns an int. - * @see IndexOutput#writeInt(int) - */ - public int readInt() throws IOException { - return ((readByte() & 0xFF) << 24) | ((readByte() & 0xFF) << 16) - | ((readByte() & 0xFF) << 8) | (readByte() & 0xFF); - } - - /** Reads an int stored in variable-length format. Reads between one and - * five bytes. Smaller values take fewer bytes. Negative numbers are not - * supported. - * @see IndexOutput#writeVInt(int) - */ - public int readVInt() throws IOException { - byte b = readByte(); - int i = b & 0x7F; - for (int shift = 7; (b & 0x80) != 0; shift += 7) { - b = readByte(); - i |= (b & 0x7F) << shift; + /** resourceDescription should be a non-null, opaque string + * describing this resource; it's returned from + * {@link #toString}. */ + protected IndexInput(String resourceDescription) { + if (resourceDescription == null) { + throw new IllegalArgumentException("resourceDescription must not be null"); } - return i; + this.resourceDescription = resourceDescription; } - /** Reads eight bytes and returns a long. - * @see IndexOutput#writeLong(long) - */ - public long readLong() throws IOException { - return (((long)readInt()) << 32) | (readInt() & 0xFFFFFFFFL); - } - - /** Reads a long stored in variable-length format. Reads between one and - * nine bytes. Smaller values take fewer bytes. Negative numbers are not - * supported. */ - public long readVLong() throws IOException { - byte b = readByte(); - long i = b & 0x7F; - for (int shift = 7; (b & 0x80) != 0; shift += 7) { - b = readByte(); - i |= (b & 0x7FL) << shift; - } - return i; - } - - /** Call this if readString should read characters stored - * in the old modified UTF8 format (length in java chars - * and java's modified UTF8 encoding). This is used for - * indices written pre-2.4 See LUCENE-510 for details. */ - public void setModifiedUTF8StringsMode() { - preUTF8Strings = true; - } - - /** Reads a string. - * @see IndexOutput#writeString(String) - */ - public String readString() throws IOException { - if (preUTF8Strings) - return readModifiedUTF8String(); - int length = readVInt(); - if (bytes == null || length > bytes.length) - bytes = new byte[(int) (length*1.25)]; - readBytes(bytes, 0, length); - return new String(bytes, 0, length, "UTF-8"); - } - - private String readModifiedUTF8String() throws IOException { - int length = readVInt(); - if (chars == null || length > chars.length) - chars = new char[length]; - readChars(chars, 0, length); - return new String(chars, 0, length); - } - - /** Reads Lucene's old "modified UTF-8" encoded - * characters into an array. - * @param buffer the array to read characters into - * @param start the offset in the array to start storing characters - * @param length the number of characters to read - * @see IndexOutput#writeChars(String,int,int) - * @deprecated -- please use readString or readBytes - * instead, and construct the string - * from those utf8 bytes - */ - public void readChars(char[] buffer, int start, int length) - throws IOException { - final int end = start + length; - for (int i = start; i < end; i++) { - byte b = readByte(); - if ((b & 0x80) == 0) - buffer[i] = (char)(b & 0x7F); - else if ((b & 0xE0) != 0xE0) { - buffer[i] = (char)(((b & 0x1F) << 6) - | (readByte() & 0x3F)); - } else - buffer[i] = (char)(((b & 0x0F) << 12) - | ((readByte() & 0x3F) << 6) - | (readByte() & 0x3F)); - } - } - - /** - * Expert - * - * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still - * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything - * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine - * how many more bytes to read - * @param length The number of chars to read - * @deprecated this method operates on old "modified utf8" encoded - * strings - */ - public void skipChars(int length) throws IOException{ - for (int i = 0; i < length; i++) { - byte b = readByte(); - if ((b & 0x80) == 0){ - //do nothing, we only need one byte - } - else if ((b & 0xE0) != 0xE0) { - readByte();//read an additional byte - } else{ - //read two additional bytes. - readByte(); - readByte(); - } - } - } - - - /** Closes the stream to futher operations. */ + /** Closes the stream to further operations. */ + @Override public abstract void close() throws IOException; /** Returns the current position in this file, where the next read will @@ -205,25 +67,66 @@ /** The number of bytes in the file. */ public abstract long length(); - /** Returns a clone of this stream. - * - *

    Clones of a stream access the same data, and are positioned at the same - * point as the stream they were cloned from. - * - *

    Expert: Subclasses must ensure that clones may be positioned at - * different points in the input from each other and from the stream they - * were cloned from. + @Override + public String toString() { + return resourceDescription; + } + + /** {@inheritDoc} + *

    Warning: Lucene never closes cloned + * {@code IndexInput}s, it will only do this on the original one. + * The original instance must take care that cloned instances throw + * {@link AlreadyClosedException} when the original one is closed. */ - public Object clone() { - IndexInput clone = null; - try { - clone = (IndexInput)super.clone(); - } catch (CloneNotSupportedException e) {} - - clone.bytes = null; - clone.chars = null; - - return clone; + @Override + public IndexInput clone() { + return (IndexInput) super.clone(); } - + + /** + * Creates a slice of this index input, with the given description, offset, and length. + * The slice is seeked to the beginning. + */ + public abstract IndexInput slice(String sliceDescription, long offset, long length) throws IOException; + + /** + * Creates a random-access slice of this index input, with the given offset and length. + *

    + * The default implementation calls {@link #slice}, and it doesn't support random access, + * it implements absolute reads as seek+read. + */ + public RandomAccessInput randomAccessSlice(long offset, long length) throws IOException { + final IndexInput slice = slice("randomaccess", offset, length); + if (slice instanceof RandomAccessInput) { + // slice() already supports random access + return (RandomAccessInput) slice; + } else { + // return default impl + return new RandomAccessInput() { + @Override + public byte readByte(long pos) throws IOException { + slice.seek(pos); + return slice.readByte(); + } + + @Override + public short readShort(long pos) throws IOException { + slice.seek(pos); + return slice.readShort(); + } + + @Override + public int readInt(long pos) throws IOException { + slice.seek(pos); + return slice.readInt(); + } + + @Override + public long readLong(long pos) throws IOException { + slice.seek(pos); + return slice.readLong(); + } + }; + } + } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/IndexOutput.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/IndexOutput.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/IndexOutput.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/IndexOutput.java 16 Dec 2014 11:31:35 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,193 +17,46 @@ * limitations under the License. */ +import java.io.Closeable; import java.io.IOException; -import org.apache.lucene.util.UnicodeUtil; /** Abstract base class for output to a file in a Directory. A random-access * output stream. Used for all Lucene index output operations. + + *

    {@code IndexOutput} may only be used from one thread, because it is not + * thread safe (it keeps internal state like file position). + * @see Directory * @see IndexInput */ -public abstract class IndexOutput { +public abstract class IndexOutput extends DataOutput implements Closeable { - private UnicodeUtil.UTF8Result utf8Result = new UnicodeUtil.UTF8Result(); - - /** Writes a single byte. - * @see IndexInput#readByte() + /** Forces any buffered output to be written. + * @deprecated Lucene never calls this method. */ - public abstract void writeByte(byte b) throws IOException; - - /** Writes an array of bytes. - * @param b the bytes to write - * @param length the number of bytes to write - * @see IndexInput#readBytes(byte[],int,int) - */ - public void writeBytes(byte[] b, int length) throws IOException { - writeBytes(b, 0, length); - } - - /** Writes an array of bytes. - * @param b the bytes to write - * @param offset the offset in the byte array - * @param length the number of bytes to write - * @see IndexInput#readBytes(byte[],int,int) - */ - public abstract void writeBytes(byte[] b, int offset, int length) throws IOException; - - /** Writes an int as four bytes. - * @see IndexInput#readInt() - */ - public void writeInt(int i) throws IOException { - writeByte((byte)(i >> 24)); - writeByte((byte)(i >> 16)); - writeByte((byte)(i >> 8)); - writeByte((byte) i); - } - - /** Writes an int in a variable-length format. Writes between one and - * five bytes. Smaller values take fewer bytes. Negative numbers are not - * supported. - * @see IndexInput#readVInt() - */ - public void writeVInt(int i) throws IOException { - while ((i & ~0x7F) != 0) { - writeByte((byte)((i & 0x7f) | 0x80)); - i >>>= 7; - } - writeByte((byte)i); - } - - /** Writes a long as eight bytes. - * @see IndexInput#readLong() - */ - public void writeLong(long i) throws IOException { - writeInt((int) (i >> 32)); - writeInt((int) i); - } - - /** Writes an long in a variable-length format. Writes between one and five - * bytes. Smaller values take fewer bytes. Negative numbers are not - * supported. - * @see IndexInput#readVLong() - */ - public void writeVLong(long i) throws IOException { - while ((i & ~0x7F) != 0) { - writeByte((byte)((i & 0x7f) | 0x80)); - i >>>= 7; - } - writeByte((byte)i); - } - - /** Writes a string. - * @see IndexInput#readString() - */ - public void writeString(String s) throws IOException { - UnicodeUtil.UTF16toUTF8(s, 0, s.length(), utf8Result); - writeVInt(utf8Result.length); - writeBytes(utf8Result.result, 0, utf8Result.length); - } - - /** Writes a sub sequence of characters from s as the old - * format (modified UTF-8 encoded bytes). - * @param s the source of the characters - * @param start the first character in the sequence - * @param length the number of characters in the sequence - * @deprecated -- please pre-convert to utf8 bytes - * instead or use {@link #writeString} - */ - public void writeChars(String s, int start, int length) - throws IOException { - final int end = start + length; - for (int i = start; i < end; i++) { - final int code = (int)s.charAt(i); - if (code >= 0x01 && code <= 0x7F) - writeByte((byte)code); - else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) { - writeByte((byte)(0xC0 | (code >> 6))); - writeByte((byte)(0x80 | (code & 0x3F))); - } else { - writeByte((byte)(0xE0 | (code >>> 12))); - writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); - writeByte((byte)(0x80 | (code & 0x3F))); - } - } - } - - /** Writes a sub sequence of characters from char[] as - * the old format (modified UTF-8 encoded bytes). - * @param s the source of the characters - * @param start the first character in the sequence - * @param length the number of characters in the sequence - * @deprecated -- please pre-convert to utf8 bytes instead or use {@link #writeString} - */ - public void writeChars(char[] s, int start, int length) - throws IOException { - final int end = start + length; - for (int i = start; i < end; i++) { - final int code = (int)s[i]; - if (code >= 0x01 && code <= 0x7F) - writeByte((byte)code); - else if (((code >= 0x80) && (code <= 0x7FF)) || code == 0) { - writeByte((byte)(0xC0 | (code >> 6))); - writeByte((byte)(0x80 | (code & 0x3F))); - } else { - writeByte((byte)(0xE0 | (code >>> 12))); - writeByte((byte)(0x80 | ((code >> 6) & 0x3F))); - writeByte((byte)(0x80 | (code & 0x3F))); - } - } - } - - private static int COPY_BUFFER_SIZE = 16384; - private byte[] copyBuffer; - - /** Copy numBytes bytes from input to ourself. */ - public void copyBytes(IndexInput input, long numBytes) throws IOException { - long left = numBytes; - if (copyBuffer == null) - copyBuffer = new byte[COPY_BUFFER_SIZE]; - while(left > 0) { - final int toCopy; - if (left > COPY_BUFFER_SIZE) - toCopy = COPY_BUFFER_SIZE; - else - toCopy = (int) left; - input.readBytes(copyBuffer, 0, toCopy); - writeBytes(copyBuffer, 0, toCopy); - left -= toCopy; - } - } - - /** Forces any buffered output to be written. */ + @Deprecated public abstract void flush() throws IOException; /** Closes this stream to further operations. */ + @Override public abstract void close() throws IOException; /** Returns the current position in this file, where the next write will * occur. - * @see #seek(long) */ public abstract long getFilePointer(); - /** Sets current position in this file, where the next write will occur. - * @see #getFilePointer() - */ - public abstract void seek(long pos) throws IOException; + /** Returns the current checksum of bytes written so far */ + public abstract long getChecksum() throws IOException; - /** The number of bytes in the file. */ - public abstract long length() throws IOException; - - /** Set the file length. By default, this method does - * nothing (it's optional for a Directory to implement - * it). But, certain Directory implementations (for - * example @see FSDirectory) can use this to inform the - * underlying IO system to pre-allocate the file to the - * specified size. If the length is longer than the - * current file length, the bytes added to the file are - * undefined. Otherwise the file is truncated. - * @param length file length + /** The number of bytes in the file. + * + * @deprecated Use {@link #getFilePointer} instead; this + * method will be removed in Lucene5.0. */ - public void setLength(long length) throws IOException {}; + @Deprecated + public long length() throws IOException { + return getFilePointer(); + } + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/InputStreamDataInput.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/Lock.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/Lock.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/Lock.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/Lock.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,22 +17,25 @@ * limitations under the License. */ +import java.io.Closeable; import java.io.IOException; +import org.apache.lucene.util.ThreadInterruptedException; + /** An interprocess mutex lock. - *

    Typical use might look like:

    + * 

    Typical use might look like:

      * new Lock.With(directory.makeLock("my.lock")) {
      *     public Object doBody() {
      *       ... code to execute while locked ...
      *     }
      *   }.run();
      * 
    * - * - * @version $Id$ * @see Directory#makeLock(String) + * + * @lucene.internal */ -public abstract class Lock { +public abstract class Lock implements Closeable { /** How long {@link #obtain(long)} waits, in milliseconds, * in between attempts to acquire the lock. */ @@ -43,7 +46,8 @@ public static final long LOCK_OBTAIN_WAIT_FOREVER = -1; /** Attempts to obtain exclusive access and immediately return - * upon success or failure. + * upon success or failure. Use {@link #close} to + * release the lock. * @return true iff exclusive access is obtained */ public abstract boolean obtain() throws IOException; @@ -68,7 +72,7 @@ * out of bounds * @throws IOException if obtain() throws IOException */ - public boolean obtain(long lockWaitTimeout) throws LockObtainFailedException, IOException { + public final boolean obtain(long lockWaitTimeout) throws IOException { failureReason = null; boolean locked = obtain(); if (lockWaitTimeout < 0 && lockWaitTimeout != LOCK_OBTAIN_WAIT_FOREVER) @@ -82,28 +86,24 @@ if (failureReason != null) { reason += ": " + failureReason; } - LockObtainFailedException e = new LockObtainFailedException(reason); - if (failureReason != null) { - e.initCause(failureReason); - } - throw e; + throw new LockObtainFailedException(reason, failureReason); } try { Thread.sleep(LOCK_POLL_INTERVAL); - } catch (InterruptedException e) { - throw new IOException(e.toString()); + } catch (InterruptedException ie) { + throw new ThreadInterruptedException(ie); } locked = obtain(); } return locked; } /** Releases exclusive access. */ - public abstract void release() throws IOException; + public abstract void close() throws IOException; /** Returns true if the resource is currently locked. Note that one must * still call {@link #obtain()} before using the resource. */ - public abstract boolean isLocked(); + public abstract boolean isLocked() throws IOException; /** Utility class for executing code with exclusive access. */ @@ -129,14 +129,15 @@ * be obtained * @throws IOException if {@link Lock#obtain} throws IOException */ - public Object run() throws LockObtainFailedException, IOException { + public Object run() throws IOException { boolean locked = false; try { locked = lock.obtain(lockWaitTimeout); return doBody(); } finally { - if (locked) - lock.release(); + if (locked) { + lock.close(); + } } } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/LockFactory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/LockFactory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/LockFactory.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/LockFactory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -23,19 +23,30 @@ *

    Base class for Locking implementation. {@link Directory} uses * instances of this class to implement locking.

    * - *

    Note that there are some useful tools to verify that - * your LockFactory is working correctly: {@link - * VerifyingLockFactory}, {@link LockStressTest}, {@link - * LockVerifyServer}.

    + *

    Lucene uses {@link NativeFSLockFactory} by default for + * {@link FSDirectory}-based index directories.

    * + *

    Special care needs to be taken if you change the locking + * implementation: First be certain that no writer is in fact + * writing to the index otherwise you can easily corrupt + * your index. Be sure to do the LockFactory change on all Lucene + * instances and clean up all leftover lock files before starting + * the new configuration for the first time. Different implementations + * can not work together!

    + * + *

    If you suspect that some LockFactory implementation is + * not working properly in your environment, you can easily + * test it by using {@link VerifyingLockFactory}, {@link + * LockVerifyServer} and {@link LockStressTest}.

    + * * @see LockVerifyServer * @see LockStressTest * @see VerifyingLockFactory */ public abstract class LockFactory { - protected String lockPrefix = ""; + protected String lockPrefix = null; /** * Set the prefix in use for all locks created in this Index: 3rdParty_sources/lucene/org/apache/lucene/store/LockObtainFailedException.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/LockObtainFailedException.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/LockObtainFailedException.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/LockObtainFailedException.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -24,10 +24,14 @@ * could not be acquired. This * happens when a writer tries to open an index * that another writer already has open. - * @see Lock#obtain(long). + * @see Lock#obtain(long) */ public class LockObtainFailedException extends IOException { public LockObtainFailedException(String message) { super(message); } + + public LockObtainFailedException(String message, Throwable cause) { + super(message, cause); + } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/LockReleaseFailedException.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/LockReleaseFailedException.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/LockReleaseFailedException.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/LockReleaseFailedException.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -22,7 +22,7 @@ /** * This exception is thrown when the write.lock * could not be released. - * @see Lock#release(). + * @see Lock#close() */ public class LockReleaseFailedException extends IOException { public LockReleaseFailedException(String message) { Index: 3rdParty_sources/lucene/org/apache/lucene/store/LockStressTest.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/LockStressTest.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/LockStressTest.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/LockStressTest.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,6 +19,11 @@ import java.io.IOException; import java.io.File; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetSocketAddress; +import java.net.Socket; +import java.util.Random; /** * Simple standalone tool that forever acquires & releases a @@ -33,85 +38,107 @@ public static void main(String[] args) throws Exception { - if (args.length != 6) { - System.out.println("\nUsage: java org.apache.lucene.store.LockStressTest myID verifierHostOrIP verifierPort lockFactoryClassName lockDirName sleepTime\n" + + if (args.length != 7) { + System.out.println("Usage: java org.apache.lucene.store.LockStressTest myID verifierHost verifierPort lockFactoryClassName lockDirName sleepTimeMS count\n" + "\n" + " myID = int from 0 .. 255 (should be unique for test process)\n" + - " verifierHostOrIP = host name or IP address where LockVerifyServer is running\n" + + " verifierHost = hostname that LockVerifyServer is listening on\n" + " verifierPort = port that LockVerifyServer is listening on\n" + " lockFactoryClassName = primary LockFactory class that we will use\n" + " lockDirName = path to the lock directory (only set for Simple/NativeFSLockFactory\n" + " sleepTimeMS = milliseconds to pause betweeen each lock obtain/release\n" + + " count = number of locking tries\n" + "\n" + "You should run multiple instances of this process, each with its own\n" + "unique ID, and each pointing to the same lock directory, to verify\n" + "that locking is working correctly.\n" + "\n" + - "Make sure you are first running LockVerifyServer.\n" + - "\n"); + "Make sure you are first running LockVerifyServer."); System.exit(1); } - final int myID = Integer.parseInt(args[0]); + int arg = 0; + final int myID = Integer.parseInt(args[arg++]); if (myID < 0 || myID > 255) { System.out.println("myID must be a unique int 0..255"); System.exit(1); } - final String verifierHost = args[1]; - final int verifierPort = Integer.parseInt(args[2]); - final String lockFactoryClassName = args[3]; - final String lockDirName = args[4]; - final int sleepTimeMS = Integer.parseInt(args[5]); + final String verifierHost = args[arg++]; + final int verifierPort = Integer.parseInt(args[arg++]); + final String lockFactoryClassName = args[arg++]; + final String lockDirName = args[arg++]; + final int sleepTimeMS = Integer.parseInt(args[arg++]); + final int count = Integer.parseInt(args[arg++]); - Class c; - try { - c = Class.forName(lockFactoryClassName); - } catch (ClassNotFoundException e) { - throw new IOException("unable to find LockClass " + lockFactoryClassName); + final LockFactory lockFactory = getNewLockFactory(lockFactoryClassName, lockDirName); + final InetSocketAddress addr = new InetSocketAddress(verifierHost, verifierPort); + System.out.println("Connecting to server " + addr + + " and registering as client " + myID + "..."); + try (Socket socket = new Socket()) { + socket.setReuseAddress(true); + socket.connect(addr, 500); + final OutputStream out = socket.getOutputStream(); + final InputStream in = socket.getInputStream(); + + out.write(myID); + out.flush(); + LockFactory verifyLF = new VerifyingLockFactory(lockFactory, in, out); + Lock l = verifyLF.makeLock("test.lock"); + final Random rnd = new Random(); + + // wait for starting gun + if (in.read() != 43) { + throw new IOException("Protocol violation"); + } + + for (int i = 0; i < count; i++) { + boolean obtained = false; + try { + obtained = l.obtain(rnd.nextInt(100) + 10); + } catch (LockObtainFailedException e) {} + + if (obtained) { + if (rnd.nextInt(10) == 0) { + if (rnd.nextBoolean()) { + verifyLF = new VerifyingLockFactory(getNewLockFactory(lockFactoryClassName, lockDirName), in, out); + } + final Lock secondLock = verifyLF.makeLock("test.lock"); + if (secondLock.obtain()) { + throw new IOException("Double Obtain"); + } + } + Thread.sleep(sleepTimeMS); + l.close(); + } + + if (i % 500 == 0) { + System.out.println((i * 100. / count) + "% done."); + } + + Thread.sleep(sleepTimeMS); + } } + + System.out.println("Finished " + count + " tries."); + } + + private static LockFactory getNewLockFactory(String lockFactoryClassName, String lockDirName) throws IOException { LockFactory lockFactory; try { - lockFactory = (LockFactory) c.newInstance(); - } catch (IllegalAccessException e) { - throw new IOException("IllegalAccessException when instantiating LockClass " + lockFactoryClassName); - } catch (InstantiationException e) { - throw new IOException("InstantiationException when instantiating LockClass " + lockFactoryClassName); - } catch (ClassCastException e) { - throw new IOException("unable to cast LockClass " + lockFactoryClassName + " instance to a LockFactory"); + lockFactory = Class.forName(lockFactoryClassName).asSubclass(LockFactory.class).newInstance(); + } catch (IllegalAccessException | InstantiationException | ClassCastException | ClassNotFoundException e) { + throw new IOException("Cannot instantiate lock factory " + lockFactoryClassName); } File lockDir = new File(lockDirName); - if (lockFactory instanceof NativeFSLockFactory) { - ((NativeFSLockFactory) lockFactory).setLockDir(lockDir); - } else if (lockFactory instanceof SimpleFSLockFactory) { - ((SimpleFSLockFactory) lockFactory).setLockDir(lockDir); + if (lockFactory instanceof FSLockFactory) { + ((FSLockFactory) lockFactory).setLockDir(lockDir); } - lockFactory.setLockPrefix("test"); - - LockFactory verifyLF = new VerifyingLockFactory((byte) myID, lockFactory, verifierHost, verifierPort); - - Lock l = verifyLF.makeLock("test.lock"); - - while(true) { - - boolean obtained = false; - - try { - obtained = l.obtain(10); - } catch (LockObtainFailedException e) { - System.out.print("x"); - } - - if (obtained) { - System.out.print("l"); - l.release(); - } - Thread.sleep(sleepTimeMS); - } + return lockFactory; } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/LockVerifyServer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/LockVerifyServer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/LockVerifyServer.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/LockVerifyServer.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,12 +17,16 @@ * limitations under the License. */ +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.net.InetSocketAddress; import java.net.ServerSocket; import java.net.Socket; -import java.io.OutputStream; -import java.io.InputStream; -import java.io.IOException; +import java.util.concurrent.CountDownLatch; +import org.apache.lucene.util.IOUtils; + /** * Simple standalone server that must be running when you * use {@link VerifyingLockFactory}. This server simply @@ -35,62 +39,108 @@ public class LockVerifyServer { - private static String getTime(long startTime) { - return "[" + ((System.currentTimeMillis()-startTime)/1000) + "s] "; - } + public static void main(String[] args) throws Exception { - public static void main(String[] args) throws IOException { - - if (args.length != 1) { - System.out.println("\nUsage: java org.apache.lucene.store.LockVerifyServer port\n"); + if (args.length != 2) { + System.out.println("Usage: java org.apache.lucene.store.LockVerifyServer bindToIp clients\n"); System.exit(1); } - final int port = Integer.parseInt(args[0]); + int arg = 0; + final String hostname = args[arg++]; + final int maxClients = Integer.parseInt(args[arg++]); - ServerSocket s = new ServerSocket(port); - s.setReuseAddress(true); - System.out.println("\nReady on port " + port + "..."); + try (final ServerSocket s = new ServerSocket()) { + s.setReuseAddress(true); + s.setSoTimeout(30000); // initially 30 secs to give clients enough time to startup + s.bind(new InetSocketAddress(hostname, 0)); + final InetSocketAddress localAddr = (InetSocketAddress) s.getLocalSocketAddress(); + System.out.println("Listening on " + localAddr + "..."); + + // we set the port as a sysprop, so the ANT task can read it. For that to work, this server must run in-process: + System.setProperty("lockverifyserver.port", Integer.toString(localAddr.getPort())); + + final Object localLock = new Object(); + final int[] lockedID = new int[1]; + lockedID[0] = -1; + final CountDownLatch startingGun = new CountDownLatch(1); + final Thread[] threads = new Thread[maxClients]; + + for (int count = 0; count < maxClients; count++) { + final Socket cs = s.accept(); + threads[count] = new Thread() { + @Override + public void run() { + try (InputStream in = cs.getInputStream(); OutputStream os = cs.getOutputStream()) { + final int id = in.read(); + if (id < 0) { + throw new IOException("Client closed connection before communication started."); + } + + startingGun.await(); + os.write(43); + os.flush(); + + while(true) { + final int command = in.read(); + if (command < 0) { + return; // closed + } + + synchronized(localLock) { + final int currentLock = lockedID[0]; + if (currentLock == -2) { + return; // another thread got error, so we exit, too! + } + switch (command) { + case 1: + // Locked + if (currentLock != -1) { + lockedID[0] = -2; + throw new IllegalStateException("id " + id + " got lock, but " + currentLock + " already holds the lock"); + } + lockedID[0] = id; + break; + case 0: + // Unlocked + if (currentLock != id) { + lockedID[0] = -2; + throw new IllegalStateException("id " + id + " released the lock, but " + currentLock + " is the one holding the lock"); + } + lockedID[0] = -1; + break; + default: + throw new RuntimeException("Unrecognized command: " + command); + } + os.write(command); + os.flush(); + } + } + } catch (RuntimeException | Error e) { + throw e; + } catch (Exception ioe) { + throw new RuntimeException(ioe); + } finally { + IOUtils.closeWhileHandlingException(cs); + } + } + }; + threads[count].start(); + } + + // start + System.out.println("All clients started, fire gun..."); + startingGun.countDown(); + + // wait for all threads to finish + for (Thread t : threads) { + t.join(); + } + + // cleanup sysprop + System.clearProperty("lockverifyserver.port"); - int lockedID = 0; - long startTime = System.currentTimeMillis(); - - while(true) { - Socket cs = s.accept(); - OutputStream out = cs.getOutputStream(); - InputStream in = cs.getInputStream(); - - int id = in.read(); - int command = in.read(); - - boolean err = false; - - if (command == 1) { - // Locked - if (lockedID != 0) { - err = true; - System.out.println(getTime(startTime) + " ERROR: id " + id + " got lock, but " + lockedID + " already holds the lock"); - } - lockedID = id; - } else if (command == 0) { - if (lockedID != id) { - err = true; - System.out.println(getTime(startTime) + " ERROR: id " + id + " released the lock, but " + lockedID + " is the one holding the lock"); - } - lockedID = 0; - } else - throw new RuntimeException("unrecognized command " + command); - - System.out.print("."); - - if (err) - out.write(1); - else - out.write(0); - - out.close(); - in.close(); - cs.close(); + System.out.println("Server terminated."); } } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/MMapDirectory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/MMapDirectory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/MMapDirectory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/MMapDirectory.java 16 Dec 2014 11:31:35 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,187 +19,270 @@ import java.io.IOException; import java.io.File; -import java.io.RandomAccessFile; import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; // javadoc @link import java.nio.channels.FileChannel; import java.nio.channels.FileChannel.MapMode; +import java.nio.file.StandardOpenOption; -/** File-based {@link Directory} implementation that uses mmap for input. +import java.security.AccessController; +import java.security.PrivilegedExceptionAction; +import java.security.PrivilegedActionException; +import java.util.Locale; +import java.lang.reflect.Method; + +import org.apache.lucene.store.ByteBufferIndexInput.BufferCleaner; +import org.apache.lucene.util.Constants; + +/** File-based {@link Directory} implementation that uses + * mmap for reading, and {@link + * FSDirectory.FSIndexOutput} for writing. * - *

    To use this, invoke Java with the System property - * org.apache.lucene.FSDirectory.class set to - * org.apache.lucene.store.MMapDirectory. This will cause {@link - * FSDirectory#getDirectory(File,boolean)} to return instances of this class. + *

    NOTE: memory mapping uses up a portion of the + * virtual memory address space in your process equal to the + * size of the file being mapped. Before using this class, + * be sure your have plenty of virtual address space, e.g. by + * using a 64 bit JRE, or a 32 bit JRE with indexes that are + * guaranteed to fit within the address space. + * On 32 bit platforms also consult {@link #MMapDirectory(File, LockFactory, int)} + * if you have problems with mmap failing because of fragmented + * address space. If you get an OutOfMemoryException, it is recommended + * to reduce the chunk size, until it works. + * + *

    Due to + * this bug in Sun's JRE, MMapDirectory's {@link IndexInput#close} + * is unable to close the underlying OS file handle. Only when GC + * finally collects the underlying objects, which could be quite + * some time later, will the file handle be closed. + * + *

    This will consume additional transient disk usage: on Windows, + * attempts to delete or overwrite the files will result in an + * exception; on other platforms, which typically have a "delete on + * last close" semantics, while such operations will succeed, the bytes + * are still consuming space on disk. For many applications this + * limitation is not a problem (e.g. if you have plenty of disk space, + * and you don't rely on overwriting files on Windows) but it's still + * an important limitation to be aware of. + * + *

    This class supplies the workaround mentioned in the bug report + * (see {@link #setUseUnmap}), which may fail on + * non-Sun JVMs. It forcefully unmaps the buffer on close by using + * an undocumented internal cleanup functionality. + * {@link #UNMAP_SUPPORTED} is true, if the workaround + * can be enabled (with no guarantees). + *

    + * NOTE: Accessing this class either directly or + * indirectly from a thread while it's interrupted can close the + * underlying channel immediately if at the same time the thread is + * blocked on IO. The channel will remain closed and subsequent access + * to {@link MMapDirectory} will throw a {@link ClosedChannelException}. + *

    + * @see Blog post about MMapDirectory */ public class MMapDirectory extends FSDirectory { + private boolean useUnmapHack = UNMAP_SUPPORTED; + /** + * Default max chunk size. + * @see #MMapDirectory(File, LockFactory, int) + */ + public static final int DEFAULT_MAX_BUFF = Constants.JRE_IS_64BIT ? (1 << 30) : (1 << 28); + final int chunkSizePower; - private static class MMapIndexInput extends IndexInput { + /** Create a new MMapDirectory for the named location. + * + * @param path the path of the directory + * @param lockFactory the lock factory to use, or null for the default + * ({@link NativeFSLockFactory}); + * @throws IOException if there is a low-level I/O error + */ + public MMapDirectory(File path, LockFactory lockFactory) throws IOException { + this(path, lockFactory, DEFAULT_MAX_BUFF); + } - private ByteBuffer buffer; - private final long length; - - private MMapIndexInput(RandomAccessFile raf) throws IOException { - this.length = raf.length(); - this.buffer = raf.getChannel().map(MapMode.READ_ONLY, 0, length); + /** Create a new MMapDirectory for the named location and {@link NativeFSLockFactory}. + * + * @param path the path of the directory + * @throws IOException if there is a low-level I/O error + */ + public MMapDirectory(File path) throws IOException { + this(path, null); + } + + /** + * Create a new MMapDirectory for the named location, specifying the + * maximum chunk size used for memory mapping. + * + * @param path the path of the directory + * @param lockFactory the lock factory to use, or null for the default + * ({@link NativeFSLockFactory}); + * @param maxChunkSize maximum chunk size (default is 1 GiBytes for + * 64 bit JVMs and 256 MiBytes for 32 bit JVMs) used for memory mapping. + *

    + * Especially on 32 bit platform, the address space can be very fragmented, + * so large index files cannot be mapped. Using a lower chunk size makes + * the directory implementation a little bit slower (as the correct chunk + * may be resolved on lots of seeks) but the chance is higher that mmap + * does not fail. On 64 bit Java platforms, this parameter should always + * be {@code 1 << 30}, as the address space is big enough. + *

    + * Please note: The chunk size is always rounded down to a power of 2. + * @throws IOException if there is a low-level I/O error + */ + public MMapDirectory(File path, LockFactory lockFactory, int maxChunkSize) throws IOException { + super(path, lockFactory); + if (maxChunkSize <= 0) { + throw new IllegalArgumentException("Maximum chunk size for mmap must be >0"); } + this.chunkSizePower = 31 - Integer.numberOfLeadingZeros(maxChunkSize); + assert this.chunkSizePower >= 0 && this.chunkSizePower <= 30; + } - public byte readByte() throws IOException { - return buffer.get(); + /** + * true, if this platform supports unmapping mmapped files. + */ + public static final boolean UNMAP_SUPPORTED; + static { + boolean v; + try { + Class.forName("sun.misc.Cleaner"); + Class.forName("java.nio.DirectByteBuffer") + .getMethod("cleaner"); + v = true; + } catch (Exception e) { + v = false; } - - public void readBytes(byte[] b, int offset, int len) - throws IOException { - buffer.get(b, offset, len); - } - - public long getFilePointer() { - return buffer.position(); - } - - public void seek(long pos) throws IOException { - buffer.position((int)pos); - } - - public long length() { - return length; - } - - public Object clone() { - MMapIndexInput clone = (MMapIndexInput)super.clone(); - clone.buffer = buffer.duplicate(); - return clone; - } - - public void close() throws IOException {} + UNMAP_SUPPORTED = v; } - - private static class MultiMMapIndexInput extends IndexInput { - private ByteBuffer[] buffers; - private int[] bufSizes; // keep here, ByteBuffer.size() method is optional + /** + * This method enables the workaround for unmapping the buffers + * from address space after closing {@link IndexInput}, that is + * mentioned in the bug report. This hack may fail on non-Sun JVMs. + * It forcefully unmaps the buffer on close by using + * an undocumented internal cleanup functionality. + *

    NOTE: Enabling this is completely unsupported + * by Java and may lead to JVM crashes if IndexInput + * is closed while another thread is still accessing it (SIGSEGV). + * @throws IllegalArgumentException if {@link #UNMAP_SUPPORTED} + * is false and the workaround cannot be enabled. + */ + public void setUseUnmap(final boolean useUnmapHack) { + if (useUnmapHack && !UNMAP_SUPPORTED) + throw new IllegalArgumentException("Unmap hack not supported on this platform!"); + this.useUnmapHack=useUnmapHack; + } - private final long length; + /** + * Returns true, if the unmap workaround is enabled. + * @see #setUseUnmap + */ + public boolean getUseUnmap() { + return useUnmapHack; + } - private int curBufIndex; - private final int maxBufSize; - - private ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex] - private int curAvail; // redundant for speed: (bufSizes[curBufIndex] - curBuf.position()) - + /** + * Returns the current mmap chunk size. + * @see #MMapDirectory(File, LockFactory, int) + */ + public final int getMaxChunkSize() { + return 1 << chunkSizePower; + } + + /** Creates an IndexInput for the file with the given name. */ + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + ensureOpen(); + File file = new File(getDirectory(), name); + try (FileChannel c = FileChannel.open(file.toPath(), StandardOpenOption.READ)) { + final String resourceDescription = "MMapIndexInput(path=\"" + file.toString() + "\")"; + final boolean useUnmap = getUseUnmap(); + return ByteBufferIndexInput.newInstance(resourceDescription, + map(resourceDescription, c, 0, c.size()), + c.size(), chunkSizePower, useUnmap ? CLEANER : null, useUnmap); + } + } + + /** Maps a file into a set of buffers */ + final ByteBuffer[] map(String resourceDescription, FileChannel fc, long offset, long length) throws IOException { + if ((length >>> chunkSizePower) >= Integer.MAX_VALUE) + throw new IllegalArgumentException("RandomAccessFile too big for chunk size: " + resourceDescription); - public MultiMMapIndexInput(RandomAccessFile raf, int maxBufSize) - throws IOException { - this.length = raf.length(); - this.maxBufSize = maxBufSize; - - if (maxBufSize <= 0) - throw new IllegalArgumentException("Non positive maxBufSize: " - + maxBufSize); - - if ((length / maxBufSize) > Integer.MAX_VALUE) - throw new IllegalArgumentException - ("RandomAccessFile too big for maximum buffer size: " - + raf.toString()); - - int nrBuffers = (int) (length / maxBufSize); - if ((nrBuffers * maxBufSize) < length) nrBuffers++; - - this.buffers = new ByteBuffer[nrBuffers]; - this.bufSizes = new int[nrBuffers]; - - long bufferStart = 0; - FileChannel rafc = raf.getChannel(); - for (int bufNr = 0; bufNr < nrBuffers; bufNr++) { - int bufSize = (length > (bufferStart + maxBufSize)) - ? maxBufSize - : (int) (length - bufferStart); - this.buffers[bufNr] = rafc.map(MapMode.READ_ONLY,bufferStart,bufSize); - this.bufSizes[bufNr] = bufSize; - bufferStart += bufSize; + final long chunkSize = 1L << chunkSizePower; + + // we always allocate one more buffer, the last one may be a 0 byte one + final int nrBuffers = (int) (length >>> chunkSizePower) + 1; + + ByteBuffer buffers[] = new ByteBuffer[nrBuffers]; + + long bufferStart = 0L; + for (int bufNr = 0; bufNr < nrBuffers; bufNr++) { + int bufSize = (int) ( (length > (bufferStart + chunkSize)) + ? chunkSize + : (length - bufferStart) + ); + try { + buffers[bufNr] = fc.map(MapMode.READ_ONLY, offset + bufferStart, bufSize); + } catch (IOException ioe) { + throw convertMapFailedIOException(ioe, resourceDescription, bufSize); } - seek(0L); + bufferStart += bufSize; } + + return buffers; + } - public byte readByte() throws IOException { - // Performance might be improved by reading ahead into an array of - // eg. 128 bytes and readByte() from there. - if (curAvail == 0) { - curBufIndex++; - curBuf = buffers[curBufIndex]; // index out of bounds when too many bytes requested - curBuf.position(0); - curAvail = bufSizes[curBufIndex]; - } - curAvail--; - return curBuf.get(); + private IOException convertMapFailedIOException(IOException ioe, String resourceDescription, int bufSize) { + final String originalMessage; + final Throwable originalCause; + if (ioe.getCause() instanceof OutOfMemoryError) { + // nested OOM confuses users, because its "incorrect", just print a plain message: + originalMessage = "Map failed"; + originalCause = null; + } else { + originalMessage = ioe.getMessage(); + originalCause = ioe.getCause(); } - - public void readBytes(byte[] b, int offset, int len) throws IOException { - while (len > curAvail) { - curBuf.get(b, offset, curAvail); - len -= curAvail; - offset += curAvail; - curBufIndex++; - curBuf = buffers[curBufIndex]; // index out of bounds when too many bytes requested - curBuf.position(0); - curAvail = bufSizes[curBufIndex]; - } - curBuf.get(b, offset, len); - curAvail -= len; + final String moreInfo; + if (!Constants.JRE_IS_64BIT) { + moreInfo = "MMapDirectory should only be used on 64bit platforms, because the address space on 32bit operating systems is too small. "; + } else if (Constants.WINDOWS) { + moreInfo = "Windows is unfortunately very limited on virtual address space. If your index size is several hundred Gigabytes, consider changing to Linux. "; + } else if (Constants.LINUX) { + moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'), and 'sysctl vm.max_map_count'. "; + } else { + moreInfo = "Please review 'ulimit -v', 'ulimit -m' (both should return 'unlimited'). "; } + final IOException newIoe = new IOException(String.format(Locale.ENGLISH, + "%s: %s [this may be caused by lack of enough unfragmented virtual address space "+ + "or too restrictive virtual memory limits enforced by the operating system, "+ + "preventing us to map a chunk of %d bytes. %sMore information: "+ + "http://blog.thetaphi.de/2012/07/use-lucenes-mmapdirectory-on-64bit.html]", + originalMessage, resourceDescription, bufSize, moreInfo), originalCause); + newIoe.setStackTrace(ioe.getStackTrace()); + return newIoe; + } - public long getFilePointer() { - return (curBufIndex * (long) maxBufSize) + curBuf.position(); - } - - public void seek(long pos) throws IOException { - curBufIndex = (int) (pos / maxBufSize); - curBuf = buffers[curBufIndex]; - int bufOffset = (int) (pos - (curBufIndex * maxBufSize)); - curBuf.position(bufOffset); - curAvail = bufSizes[curBufIndex] - bufOffset; - } - - public long length() { - return length; - } - - public Object clone() { - MultiMMapIndexInput clone = (MultiMMapIndexInput)super.clone(); - clone.buffers = new ByteBuffer[buffers.length]; - // No need to clone bufSizes. - // Since most clones will use only one buffer, duplicate() could also be - // done lazy in clones, eg. when adapting curBuf. - for (int bufNr = 0; bufNr < buffers.length; bufNr++) { - clone.buffers[bufNr] = buffers[bufNr].duplicate(); - } + private static final BufferCleaner CLEANER = new BufferCleaner() { + @Override + public void freeBuffer(final ByteBufferIndexInput parent, final ByteBuffer buffer) throws IOException { try { - clone.seek(getFilePointer()); - } catch(IOException ioe) { - RuntimeException newException = new RuntimeException(ioe); - newException.initCause(ioe); - throw newException; - }; - return clone; + AccessController.doPrivileged(new PrivilegedExceptionAction() { + @Override + public Void run() throws Exception { + final Method getCleanerMethod = buffer.getClass() + .getMethod("cleaner"); + getCleanerMethod.setAccessible(true); + final Object cleaner = getCleanerMethod.invoke(buffer); + if (cleaner != null) { + cleaner.getClass().getMethod("clean") + .invoke(cleaner); + } + return null; + } + }); + } catch (PrivilegedActionException e) { + throw new IOException("Unable to unmap the mapped buffer: " + parent.toString(), e.getCause()); + } } - - public void close() throws IOException {} - } - - private final int MAX_BBUF = Integer.MAX_VALUE; - - public IndexInput openInput(String name) throws IOException { - File f = new File(getFile(), name); - RandomAccessFile raf = new RandomAccessFile(f, "r"); - try { - return (raf.length() <= MAX_BBUF) - ? (IndexInput) new MMapIndexInput(raf) - : (IndexInput) new MultiMMapIndexInput(raf, MAX_BBUF); - } finally { - raf.close(); - } - } - - public IndexInput openInput(String name, int bufferSize) throws IOException { - return openInput(name); - } + }; } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/MergeInfo.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/NIOFSDirectory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/NIOFSDirectory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/NIOFSDirectory.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/NIOFSDirectory.java 16 Dec 2014 11:31:33 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with this * work for additional information regarding copyright ownership. The ASF @@ -18,103 +18,179 @@ */ import java.io.File; +import java.io.EOFException; import java.io.IOException; import java.nio.ByteBuffer; +import java.nio.channels.ClosedChannelException; // javadoc @link import java.nio.channels.FileChannel; +import java.nio.file.StandardOpenOption; +import java.util.concurrent.Future; // javadoc /** - * NIO version of FSDirectory. Uses FileChannel.read(ByteBuffer dst, long position) method - * which allows multiple threads to read from the file without synchronizing. FSDirectory - * synchronizes in the FSIndexInput.readInternal method which can cause pileups when there - * are many threads accessing the Directory concurrently. - * - * This class only uses FileChannel when reading; writing - * with an IndexOutput is inherited from FSDirectory. - * - * Note: NIOFSDirectory is not recommended on Windows because of a bug - * in how FileChannel.read is implemented in Sun's JRE. - * Inside of the implementation the position is apparently - * synchronized. See here for details: - - * http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6265734 - * - * @see FSDirectory + * An {@link FSDirectory} implementation that uses java.nio's FileChannel's + * positional read, which allows multiple threads to read from the same file + * without synchronizing. + *

    + * This class only uses FileChannel when reading; writing is achieved with + * {@link FSDirectory.FSIndexOutput}. + *

    + * NOTE: NIOFSDirectory is not recommended on Windows because of a bug in + * how FileChannel.read is implemented in Sun's JRE. Inside of the + * implementation the position is apparently synchronized. See here + * for details. + *

    + *

    + * NOTE: Accessing this class either directly or + * indirectly from a thread while it's interrupted can close the + * underlying file descriptor immediately if at the same time the thread is + * blocked on IO. The file descriptor will remain closed and subsequent access + * to {@link NIOFSDirectory} will throw a {@link ClosedChannelException}. If + * your application uses either {@link Thread#interrupt()} or + * {@link Future#cancel(boolean)} you should use {@link SimpleFSDirectory} in + * favor of {@link NIOFSDirectory}. + *

    */ - public class NIOFSDirectory extends FSDirectory { - // Inherit javadoc - public IndexInput openInput(String name, int bufferSize) throws IOException { - ensureOpen(); - return new NIOFSIndexInput(new File(getFile(), name), bufferSize); + /** Create a new NIOFSDirectory for the named location. + * + * @param path the path of the directory + * @param lockFactory the lock factory to use, or null for the default + * ({@link NativeFSLockFactory}); + * @throws IOException if there is a low-level I/O error + */ + public NIOFSDirectory(File path, LockFactory lockFactory) throws IOException { + super(path, lockFactory); } - private static class NIOFSIndexInput extends FSDirectory.FSIndexInput { + /** Create a new NIOFSDirectory for the named location and {@link NativeFSLockFactory}. + * + * @param path the path of the directory + * @throws IOException if there is a low-level I/O error + */ + public NIOFSDirectory(File path) throws IOException { + super(path, null); + } + /** Creates an IndexInput for the file with the given name. */ + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { + ensureOpen(); + File path = new File(getDirectory(), name); + FileChannel fc = FileChannel.open(path.toPath(), StandardOpenOption.READ); + return new NIOFSIndexInput("NIOFSIndexInput(path=\"" + path + "\")", fc, context); + } + + /** + * Reads bytes with {@link FileChannel#read(ByteBuffer, long)} + */ + static final class NIOFSIndexInput extends BufferedIndexInput { + /** + * The maximum chunk size for reads of 16384 bytes. + */ + private static final int CHUNK_SIZE = 16384; + + /** the file channel we will read from */ + protected final FileChannel channel; + /** is this instance a clone and hence does not own the file to close it */ + boolean isClone = false; + /** start offset: non-zero in the slice case */ + protected final long off; + /** end offset (start+length) */ + protected final long end; + private ByteBuffer byteBuf; // wraps the buffer for NIO - private byte[] otherBuffer; - private ByteBuffer otherByteBuf; + public NIOFSIndexInput(String resourceDesc, FileChannel fc, IOContext context) throws IOException { + super(resourceDesc, context); + this.channel = fc; + this.off = 0L; + this.end = fc.size(); + } + + public NIOFSIndexInput(String resourceDesc, FileChannel fc, long off, long length, int bufferSize) { + super(resourceDesc, bufferSize); + this.channel = fc; + this.off = off; + this.end = off + length; + this.isClone = true; + } + + @Override + public void close() throws IOException { + if (!isClone) { + channel.close(); + } + } + + @Override + public NIOFSIndexInput clone() { + NIOFSIndexInput clone = (NIOFSIndexInput)super.clone(); + clone.isClone = true; + return clone; + } + + @Override + public IndexInput slice(String sliceDescription, long offset, long length) throws IOException { + if (offset < 0 || length < 0 || offset + length > this.length()) { + throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: " + this); + } + return new NIOFSIndexInput(sliceDescription, channel, off + offset, length, getBufferSize()); + } - final FileChannel channel; - - public NIOFSIndexInput(File path, int bufferSize) throws IOException { - super(path, bufferSize); - channel = file.getChannel(); + @Override + public final long length() { + return end - off; } + @Override protected void newBuffer(byte[] newBuffer) { super.newBuffer(newBuffer); byteBuf = ByteBuffer.wrap(newBuffer); } - public void close() throws IOException { - if (!isClone && file.isOpen) { - // Close the channel & file - try { - channel.close(); - } finally { - file.close(); - } - } - } - + @Override protected void readInternal(byte[] b, int offset, int len) throws IOException { - final ByteBuffer bb; // Determine the ByteBuffer we should use - if (b == buffer && 0 == offset) { + if (b == buffer) { // Use our own pre-wrapped byteBuf: assert byteBuf != null; - byteBuf.clear(); - byteBuf.limit(len); bb = byteBuf; + byteBuf.clear().position(offset); } else { - if (offset == 0) { - if (otherBuffer != b) { - // Now wrap this other buffer; with compound - // file, we are repeatedly called with its - // buffer, so we wrap it once and then re-use it - // on subsequent calls - otherBuffer = b; - otherByteBuf = ByteBuffer.wrap(b); - } else - otherByteBuf.clear(); - otherByteBuf.limit(len); - bb = otherByteBuf; - } else - // Always wrap when offset != 0 - bb = ByteBuffer.wrap(b, offset, len); + bb = ByteBuffer.wrap(b, offset, len); } - long pos = getFilePointer(); - while (bb.hasRemaining()) { - int i = channel.read(bb, pos); - if (i == -1) - throw new IOException("read past EOF"); - pos += i; + long pos = getFilePointer() + off; + + if (pos + len > end) { + throw new EOFException("read past EOF: " + this); } + + try { + int readLength = len; + while (readLength > 0) { + final int toRead = Math.min(CHUNK_SIZE, readLength); + bb.limit(bb.position() + toRead); + assert bb.remaining() == toRead; + final int i = channel.read(bb, pos); + if (i < 0) { // be defensive here, even though we checked before hand, something could have changed + throw new EOFException("read past EOF: " + this + " off: " + offset + " len: " + len + " pos: " + pos + " chunkLen: " + toRead + " end: " + end); + } + assert i > 0 : "FileChannel.read with non zero-length bb.remaining() must always read at least one byte (FileChannel is in blocking mode, see spec of ReadableByteChannel)"; + pos += i; + readLength -= i; + } + assert readLength == 0; + } catch (IOException ioe) { + throw new IOException(ioe.getMessage() + ": " + this, ioe); + } } + + @Override + protected void seekInternal(long pos) throws IOException {} } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/NRTCachingDirectory.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/NativeFSLockFactory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/NativeFSLockFactory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/NativeFSLockFactory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/NativeFSLockFactory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,12 +19,16 @@ import java.nio.channels.FileChannel; import java.nio.channels.FileLock; +import java.nio.channels.OverlappingFileLockException; +import java.nio.file.StandardOpenOption; import java.io.File; -import java.io.RandomAccessFile; import java.io.IOException; +import java.util.Collections; import java.util.HashSet; -import java.util.Random; +import java.util.Set; +import org.apache.lucene.util.IOUtils; + /** *

    Implements {@link LockFactory} using native OS file * locks. Note that because this LockFactory relies on @@ -39,14 +43,23 @@ * could be left when the JVM exits abnormally.

    * *

    The primary benefit of {@link NativeFSLockFactory} is - * that lock files will be properly removed (by the OS) if - * the JVM has an abnormal exit.

    + * that locks (not the lock file itsself) will be properly + * removed (by the OS) if the JVM has an abnormal exit.

    * *

    Note that, unlike {@link SimpleFSLockFactory}, the existence of - * leftover lock files in the filesystem on exiting the JVM - * is fine because the OS will free the locks held against - * these files even though the files still remain.

    + * leftover lock files in the filesystem is fine because the OS + * will free the locks held against these files even though the + * files still remain. Lucene will never actively remove the lock + * files, so although you see them, the index may not be locked.

    * + *

    Special care needs to be taken if you change the locking + * implementation: First be certain that no writer is in fact + * writing to the index otherwise you can easily corrupt + * your index. Be sure to do the LockFactory change on all Lucene + * instances and clean up all leftover lock files before starting + * the new configuration for the first time. Different implementations + * can not work together!

    + * *

    If you suspect that this or any other LockFactory is * not working properly in your environment, you can easily * test it by using {@link VerifyingLockFactory}, {@link @@ -55,45 +68,16 @@ * @see LockFactory */ -public class NativeFSLockFactory extends LockFactory { +public class NativeFSLockFactory extends FSLockFactory { /** - * Directory specified by org.apache.lucene.lockDir - * system property. If that is not set, then java.io.tmpdir - * system property is used. - */ - - private File lockDir; - - // Simple test to verify locking system is "working". On - // NFS, if it's misconfigured, you can hit long (35 - // second) timeouts which cause Lock.obtain to take far - // too long (it assumes the obtain() call takes zero - // time). Since it's a configuration problem, we test up - // front once on creating the LockFactory: - private void acquireTestLock() throws IOException { - String randomLockName = "lucene-" + Long.toString(new Random().nextInt(), Character.MAX_RADIX) + "-test.lock"; - - Lock l = makeLock(randomLockName); - try { - l.obtain(); - } catch (IOException e) { - IOException e2 = new IOException("Failed to acquire random test lock; please verify filesystem for lock directory '" + lockDir + "' supports locking"); - e2.initCause(e); - throw e2; - } - - l.release(); - } - - /** * Create a NativeFSLockFactory instance, with null (unset) - * lock directory. This is package-private and is only - * used by FSDirectory when creating this LockFactory via - * the System property - * org.apache.lucene.store.FSDirectoryLockFactoryClass. + * lock directory. When you pass this factory to a {@link FSDirectory} + * subclass, the lock directory is automatically set to the + * directory itself. Be sure to create one instance for each directory + * your create! */ - NativeFSLockFactory() throws IOException { + public NativeFSLockFactory() { this((File) null); } @@ -103,7 +87,7 @@ * * @param lockDirName where lock files are created. */ - public NativeFSLockFactory(String lockDirName) throws IOException { + public NativeFSLockFactory(String lockDirName) { this(new File(lockDirName)); } @@ -113,84 +97,42 @@ * * @param lockDir where lock files are created. */ - public NativeFSLockFactory(File lockDir) throws IOException { + public NativeFSLockFactory(File lockDir) { setLockDir(lockDir); } - /** - * Set the lock directory. This is package-private and is - * only used externally by FSDirectory when creating this - * LockFactory via the System property - * org.apache.lucene.store.FSDirectoryLockFactoryClass. - */ - void setLockDir(File lockDir) throws IOException { - this.lockDir = lockDir; - if (lockDir != null) { - // Ensure that lockDir exists and is a directory. - if (!lockDir.exists()) { - if (!lockDir.mkdirs()) - throw new IOException("Cannot create directory: " + - lockDir.getAbsolutePath()); - } else if (!lockDir.isDirectory()) { - throw new IOException("Found regular file where directory expected: " + - lockDir.getAbsolutePath()); - } - - acquireTestLock(); - } - } - + @Override public synchronized Lock makeLock(String lockName) { if (lockPrefix != null) - lockName = lockPrefix + "-n-" + lockName; + lockName = lockPrefix + "-" + lockName; return new NativeFSLock(lockDir, lockName); } + @Override public void clearLock(String lockName) throws IOException { - // Note that this isn't strictly required anymore - // because the existence of these files does not mean - // they are locked, but, still do this in case people - // really want to see the files go away: - if (lockDir.exists()) { - if (lockPrefix != null) { - lockName = lockPrefix + "-n-" + lockName; - } - File lockFile = new File(lockDir, lockName); - if (lockFile.exists() && !lockFile.delete()) { - throw new IOException("Cannot delete " + lockFile); - } - } + makeLock(lockName).close(); } -}; +} class NativeFSLock extends Lock { - private RandomAccessFile f; private FileChannel channel; private FileLock lock; private File path; private File lockDir; + private static final Set LOCK_HELD = Collections.synchronizedSet(new HashSet()); - /* - * The javadocs for FileChannel state that you should have - * a single instance of a FileChannel (per JVM) for all - * locking against a given file. To ensure this, we have - * a single (static) HashSet that contains the file paths - * of all currently locked locks. This protects against - * possible cases where different Directory instances in - * one JVM (each with their own NativeFSLockFactory - * instance) have set the same lock dir and lock prefix. - */ - private static HashSet LOCK_HELD = new HashSet(); public NativeFSLock(File lockDir, String lockFileName) { this.lockDir = lockDir; path = new File(lockDir, lockFileName); } + + @Override public synchronized boolean obtain() throws IOException { - if (isLocked()) { + if (lock != null) { // Our instance is already locked: return false; } @@ -199,134 +141,101 @@ if (!lockDir.exists()) { if (!lockDir.mkdirs()) throw new IOException("Cannot create directory: " + - lockDir.getAbsolutePath()); + lockDir.getAbsolutePath()); } else if (!lockDir.isDirectory()) { + // TODO: NoSuchDirectoryException instead? throw new IOException("Found regular file where directory expected: " + - lockDir.getAbsolutePath()); + lockDir.getAbsolutePath()); } - - String canonicalPath = path.getCanonicalPath(); - - boolean markedHeld = false; - - try { - - // Make sure nobody else in-process has this lock held - // already, and, mark it held if not: - - synchronized(LOCK_HELD) { - if (LOCK_HELD.contains(canonicalPath)) { - // Someone else in this JVM already has the lock: - return false; - } else { - // This "reserves" the fact that we are the one - // thread trying to obtain this lock, so we own - // the only instance of a channel against this - // file: - LOCK_HELD.add(canonicalPath); - markedHeld = true; - } - } - + final String canonicalPath = path.getCanonicalPath(); + // Make sure nobody else in-process has this lock held + // already, and, mark it held if not: + // This is a pretty crazy workaround for some documented + // but yet awkward JVM behavior: + // + // On some systems, closing a channel releases all locks held by the Java virtual machine on the underlying file + // regardless of whether the locks were acquired via that channel or via another channel open on the same file. + // It is strongly recommended that, within a program, a unique channel be used to acquire all locks on any given + // file. + // + // This essentially means if we close "A" channel for a given file all locks might be released... the odd part + // is that we can't re-obtain the lock in the same JVM but from a different process if that happens. Nevertheless + // this is super trappy. See LUCENE-5738 + boolean obtained = false; + if (LOCK_HELD.add(canonicalPath)) { try { - f = new RandomAccessFile(path, "rw"); - } catch (IOException e) { - // On Windows, we can get intermittant "Access - // Denied" here. So, we treat this as failure to - // acquire the lock, but, store the reason in case - // there is in fact a real error case. - failureReason = e; - f = null; - } - - if (f != null) { + channel = FileChannel.open(path.toPath(), StandardOpenOption.CREATE, StandardOpenOption.WRITE); try { - channel = f.getChannel(); - try { - lock = channel.tryLock(); - } catch (IOException e) { - // At least on OS X, we will sometimes get an - // intermittant "Permission Denied" IOException, - // which seems to simply mean "you failed to get - // the lock". But other IOExceptions could be - // "permanent" (eg, locking is not supported via - // the filesystem). So, we record the failure - // reason here; the timeout obtain (usually the - // one calling us) will use this as "root cause" - // if it fails to get the lock. - failureReason = e; - } finally { - if (lock == null) { - try { - channel.close(); - } finally { - channel = null; - } - } - } - } finally { - if (channel == null) { - try { - f.close(); - } finally { - f = null; - } - } + lock = channel.tryLock(); + obtained = lock != null; + } catch (IOException | OverlappingFileLockException e) { + // At least on OS X, we will sometimes get an + // intermittent "Permission Denied" IOException, + // which seems to simply mean "you failed to get + // the lock". But other IOExceptions could be + // "permanent" (eg, locking is not supported via + // the filesystem). So, we record the failure + // reason here; the timeout obtain (usually the + // one calling us) will use this as "root cause" + // if it fails to get the lock. + failureReason = e; } - } - - } finally { - if (markedHeld && !isLocked()) { - synchronized(LOCK_HELD) { - if (LOCK_HELD.contains(canonicalPath)) { - LOCK_HELD.remove(canonicalPath); - } + } finally { + if (obtained == false) { // not successful - clear up and move out + clearLockHeld(path); + final FileChannel toClose = channel; + channel = null; + IOUtils.closeWhileHandlingException(toClose); } } } - return isLocked(); + return obtained; } - public synchronized void release() throws IOException { - if (isLocked()) { - try { - lock.release(); - } finally { - lock = null; + @Override + public synchronized void close() throws IOException { + try { + if (lock != null) { try { - channel.close(); + lock.release(); + lock = null; } finally { - channel = null; - try { - f.close(); - } finally { - f = null; - synchronized(LOCK_HELD) { - LOCK_HELD.remove(path.getCanonicalPath()); - } - } + clearLockHeld(path); } } - if (!path.delete()) - throw new LockReleaseFailedException("failed to delete " + path); + } finally { + IOUtils.close(channel); + channel = null; } } + private static final void clearLockHeld(File path) throws IOException { + boolean remove = LOCK_HELD.remove(path.getCanonicalPath()); + assert remove : "Lock was cleared but never marked as held"; + } + + @Override public synchronized boolean isLocked() { - return lock != null; + // The test for is isLocked is not directly possible with native file locks: + + // First a shortcut, if a lock reference in this instance is available + if (lock != null) return true; + + // Look if lock file is present; if not, there can definitely be no lock! + if (!path.exists()) return false; + + // Try to obtain and release (if was locked) the lock + try { + boolean obtained = obtain(); + if (obtained) close(); + return !obtained; + } catch (IOException ioe) { + return false; + } } + @Override public String toString() { return "NativeFSLock@" + path; } - - public void finalize() throws Throwable { - try { - if (isLocked()) { - release(); - } - } finally { - super.finalize(); - } - } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/NoLockFactory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/NoLockFactory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/NoLockFactory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/NoLockFactory.java 16 Dec 2014 11:31:35 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -21,7 +21,6 @@ /** * Use this {@link LockFactory} to disable locking entirely. - * This LockFactory is used when you call {@link FSDirectory#setDisableLocks}. * Only one instance of this lock is created. You should call {@link * #getNoLockFactory()} to get the instance. * @@ -33,30 +32,38 @@ // Single instance returned whenever makeLock is called. private static NoLock singletonLock = new NoLock(); private static NoLockFactory singleton = new NoLockFactory(); + + private NoLockFactory() {} public static NoLockFactory getNoLockFactory() { return singleton; } + @Override public Lock makeLock(String lockName) { return singletonLock; } - public void clearLock(String lockName) {}; -}; + @Override + public void clearLock(String lockName) {} +} class NoLock extends Lock { + @Override public boolean obtain() throws IOException { return true; } - public void release() { + @Override + public void close() { } + @Override public boolean isLocked() { return false; } + @Override public String toString() { return "NoLock"; } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/NoSuchDirectoryException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/OutputStreamDataOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/OutputStreamIndexOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/RAMDirectory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/RAMDirectory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/RAMDirectory.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/RAMDirectory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,43 +19,69 @@ import java.io.IOException; import java.io.FileNotFoundException; -import java.io.File; -import java.io.Serializable; -import java.util.HashMap; -import java.util.Iterator; +import java.util.ArrayList; +import java.util.Collection; +import java.util.List; +import java.util.Map; import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicLong; +import org.apache.lucene.util.Accountable; + + /** * A memory-resident {@link Directory} implementation. Locking * implementation is by default the {@link SingleInstanceLockFactory} * but can be changed with {@link #setLockFactory}. - * - * @version $Id$ + * + *

    Warning: This class is not intended to work with huge + * indexes. Everything beyond several hundred megabytes will waste + * resources (GC cycles), because it uses an internal buffer size + * of 1024 bytes, producing millions of {@code byte[1024]} arrays. + * This class is optimized for small memory-resident indexes. + * It also has bad concurrency on multithreaded environments. + * + *

    It is recommended to materialize large indexes on disk and use + * {@link MMapDirectory}, which is a high-performance directory + * implementation working directly on the file system cache of the + * operating system, so copying data to Java heap space is not useful. */ -public class RAMDirectory extends Directory implements Serializable { - - private static final long serialVersionUID = 1l; - - HashMap fileMap = new HashMap(); - long sizeInBytes = 0; +public class RAMDirectory extends BaseDirectory implements Accountable { + protected final Map fileMap = new ConcurrentHashMap<>(); + protected final AtomicLong sizeInBytes = new AtomicLong(); // ***** // Lock acquisition sequence: RAMDirectory, then RAMFile - // ***** + // ***** /** Constructs an empty {@link Directory}. */ public RAMDirectory() { - setLockFactory(new SingleInstanceLockFactory()); + try { + setLockFactory(new SingleInstanceLockFactory()); + } catch (IOException e) { + // Cannot happen + } } /** * Creates a new RAMDirectory instance from a different * Directory implementation. This can be used to load * a disk-based index into memory. - *

    - * This should be used only with indices that can fit into memory. - *

    - * Note that the resulting RAMDirectory instance is fully + * + *

    Warning: This class is not intended to work with huge + * indexes. Everything beyond several hundred megabytes will waste + * resources (GC cycles), because it uses an internal buffer size + * of 1024 bytes, producing millions of {@code byte[1024]} arrays. + * This class is optimized for small memory-resident indexes. + * It also has bad concurrency on multithreaded environments. + * + *

    For disk-based indexes it is recommended to use + * {@link MMapDirectory}, which is a high-performance directory + * implementation working directly on the file system cache of the + * operating system, so copying data to Java heap space is not useful. + * + *

    Note that the resulting RAMDirectory instance is fully * independent from the original Directory (it is a * complete copy). Any subsequent changes to the * original Directory will not be visible in the @@ -64,180 +90,124 @@ * @param dir a Directory value * @exception IOException if an error occurs */ - public RAMDirectory(Directory dir) throws IOException { - this(dir, false); + public RAMDirectory(Directory dir, IOContext context) throws IOException { + this(dir, false, context); } - private RAMDirectory(Directory dir, boolean closeDir) throws IOException { + private RAMDirectory(Directory dir, boolean closeDir, IOContext context) throws IOException { this(); - Directory.copy(dir, this, closeDir); + for (String file : dir.listAll()) { + dir.copy(this, file, file, context); + } + if (closeDir) { + dir.close(); + } } - /** - * Creates a new RAMDirectory instance from the {@link FSDirectory}. - * - * @param dir a File specifying the index directory - * - * @see #RAMDirectory(Directory) - */ - public RAMDirectory(File dir) throws IOException { - this(FSDirectory.getDirectory(dir), true); + @Override + public String getLockID() { + return "lucene-" + Integer.toHexString(hashCode()); } - - /** - * Creates a new RAMDirectory instance from the {@link FSDirectory}. - * - * @param dir a String specifying the full index directory path - * - * @see #RAMDirectory(Directory) - */ - public RAMDirectory(String dir) throws IOException { - this(FSDirectory.getDirectory(dir), true); - } - - /** Returns an array of strings, one for each file in the directory. */ - public synchronized final String[] list() { + + @Override + public final String[] listAll() { ensureOpen(); - Set fileNames = fileMap.keySet(); - String[] result = new String[fileNames.size()]; - int i = 0; - Iterator it = fileNames.iterator(); - while (it.hasNext()) - result[i++] = (String)it.next(); - return result; + // NOTE: fileMap.keySet().toArray(new String[0]) is broken in non Sun JDKs, + // and the code below is resilient to map changes during the array population. + Set fileNames = fileMap.keySet(); + List names = new ArrayList<>(fileNames.size()); + for (String name : fileNames) names.add(name); + return names.toArray(new String[names.size()]); } /** Returns true iff the named file exists in this directory. */ + @Override public final boolean fileExists(String name) { ensureOpen(); - RAMFile file; - synchronized (this) { - file = (RAMFile)fileMap.get(name); - } - return file != null; + return fileMap.containsKey(name); } - /** Returns the time the named file was last modified. - * @throws IOException if the file does not exist - */ - public final long fileModified(String name) throws IOException { - ensureOpen(); - RAMFile file; - synchronized (this) { - file = (RAMFile)fileMap.get(name); - } - if (file==null) - throw new FileNotFoundException(name); - return file.getLastModified(); - } - - /** Set the modified time of an existing file to now. - * @throws IOException if the file does not exist - */ - public void touchFile(String name) throws IOException { - ensureOpen(); - RAMFile file; - synchronized (this) { - file = (RAMFile)fileMap.get(name); - } - if (file==null) - throw new FileNotFoundException(name); - - long ts2, ts1 = System.currentTimeMillis(); - do { - try { - Thread.sleep(0, 1); - } catch (InterruptedException e) {} - ts2 = System.currentTimeMillis(); - } while(ts1 == ts2); - - file.setLastModified(ts2); - } - /** Returns the length in bytes of a file in the directory. * @throws IOException if the file does not exist */ + @Override public final long fileLength(String name) throws IOException { ensureOpen(); - RAMFile file; - synchronized (this) { - file = (RAMFile)fileMap.get(name); - } - if (file==null) + RAMFile file = fileMap.get(name); + if (file == null) { throw new FileNotFoundException(name); + } return file.getLength(); } - /** Return total size in bytes of all files in this - * directory. This is currently quantized to - * RAMOutputStream.BUFFER_SIZE. */ - public synchronized final long sizeInBytes() { + /** + * Return total size in bytes of all files in this directory. This is + * currently quantized to RAMOutputStream.BUFFER_SIZE. + */ + @Override + public final long ramBytesUsed() { ensureOpen(); - return sizeInBytes; + return sizeInBytes.get(); } /** Removes an existing file in the directory. * @throws IOException if the file does not exist */ - public synchronized void deleteFile(String name) throws IOException { + @Override + public void deleteFile(String name) throws IOException { ensureOpen(); - RAMFile file = (RAMFile)fileMap.get(name); - if (file!=null) { - fileMap.remove(name); - file.directory = null; - sizeInBytes -= file.sizeInBytes; // updates to RAMFile.sizeInBytes synchronized on directory - } else + RAMFile file = fileMap.remove(name); + if (file != null) { + file.directory = null; + sizeInBytes.addAndGet(-file.sizeInBytes); + } else { throw new FileNotFoundException(name); - } - - /** Renames an existing file in the directory. - * @throws FileNotFoundException if from does not exist - * @deprecated - */ - public synchronized final void renameFile(String from, String to) throws IOException { - ensureOpen(); - RAMFile fromFile = (RAMFile)fileMap.get(from); - if (fromFile==null) - throw new FileNotFoundException(from); - RAMFile toFile = (RAMFile)fileMap.get(to); - if (toFile!=null) { - sizeInBytes -= toFile.sizeInBytes; // updates to RAMFile.sizeInBytes synchronized on directory - toFile.directory = null; } - fileMap.remove(from); - fileMap.put(to, fromFile); } /** Creates a new, empty file in the directory with the given name. Returns a stream writing this file. */ - public IndexOutput createOutput(String name) throws IOException { + @Override + public IndexOutput createOutput(String name, IOContext context) throws IOException { ensureOpen(); - RAMFile file = new RAMFile(this); - synchronized (this) { - RAMFile existing = (RAMFile)fileMap.get(name); - if (existing!=null) { - sizeInBytes -= existing.sizeInBytes; - existing.directory = null; - } - fileMap.put(name, file); + RAMFile file = newRAMFile(); + RAMFile existing = fileMap.remove(name); + if (existing != null) { + sizeInBytes.addAndGet(-existing.sizeInBytes); + existing.directory = null; } - return new RAMOutputStream(file); + fileMap.put(name, file); + return new RAMOutputStream(file, true); } + /** + * Returns a new {@link RAMFile} for storing data. This method can be + * overridden to return different {@link RAMFile} impls, that e.g. override + * {@link RAMFile#newBuffer(int)}. + */ + protected RAMFile newRAMFile() { + return new RAMFile(this); + } + + @Override + public void sync(Collection names) throws IOException { + } + /** Returns a stream reading an existing file. */ - public IndexInput openInput(String name) throws IOException { + @Override + public IndexInput openInput(String name, IOContext context) throws IOException { ensureOpen(); - RAMFile file; - synchronized (this) { - file = (RAMFile)fileMap.get(name); - } - if (file == null) + RAMFile file = fileMap.get(name); + if (file == null) { throw new FileNotFoundException(name); - return new RAMInputStream(file); + } + return new RAMInputStream(name, file); } /** Closes the store to future operations, releasing associated memory. */ + @Override public void close() { isOpen = false; - fileMap = null; + fileMap.clear(); } + } Index: 3rdParty_sources/lucene/org/apache/lucene/store/RAMFile.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/RAMFile.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/RAMFile.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/RAMFile.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,63 +18,52 @@ */ import java.util.ArrayList; -import java.io.Serializable; -class RAMFile implements Serializable { +import org.apache.lucene.util.Accountable; - private static final long serialVersionUID = 1l; - - private ArrayList buffers = new ArrayList(); +/** + * Represents a file in RAM as a list of byte[] buffers. + * @lucene.internal */ +public class RAMFile implements Accountable { + protected ArrayList buffers = new ArrayList<>(); long length; RAMDirectory directory; - long sizeInBytes; // Only maintained if in a directory; updates synchronized on directory + protected long sizeInBytes; - // This is publicly modifiable via Directory.touchFile(), so direct access not supported - private long lastModified = System.currentTimeMillis(); - // File used as buffer, in no RAMDirectory - RAMFile() {} + public RAMFile() {} RAMFile(RAMDirectory directory) { this.directory = directory; } // For non-stream access from thread that might be concurrent with writing - synchronized long getLength() { + public synchronized long getLength() { return length; } - synchronized void setLength(long length) { + protected synchronized void setLength(long length) { this.length = length; } - // For non-stream access from thread that might be concurrent with writing - synchronized long getLastModified() { - return lastModified; - } - - synchronized void setLastModified(long lastModified) { - this.lastModified = lastModified; - } - - final synchronized byte[] addBuffer(int size) { + protected final byte[] addBuffer(int size) { byte[] buffer = newBuffer(size); - if (directory!=null) - synchronized (directory) { // Ensure addition of buffer and adjustment to directory size are atomic wrt directory - buffers.add(buffer); - directory.sizeInBytes += size; - sizeInBytes += size; - } - else + synchronized(this) { buffers.add(buffer); + sizeInBytes += size; + } + + if (directory != null) { + directory.sizeInBytes.getAndAdd(size); + } return buffer; } - final synchronized byte[] getBuffer(int index) { - return (byte[]) buffers.get(index); + protected final synchronized byte[] getBuffer(int index) { + return buffers.get(index); } - final synchronized int numBuffers() { + protected final synchronized int numBuffers() { return buffers.size(); } @@ -84,15 +73,13 @@ * @param size size of allocated buffer. * @return allocated buffer. */ - byte[] newBuffer(int size) { + protected byte[] newBuffer(int size) { return new byte[size]; } - // Only valid if in a directory - long getSizeInBytes() { - synchronized (directory) { - return sizeInBytes; - } + @Override + public synchronized long ramBytesUsed() { + return sizeInBytes; } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/RAMInputStream.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/RAMInputStream.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/RAMInputStream.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/RAMInputStream.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,8 +1,6 @@ package org.apache.lucene.store; -import java.io.IOException; - -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,17 +17,17 @@ * limitations under the License. */ -/** - * A memory-resident {@link IndexInput} implementation. - * - * @version $Id$ - */ +import java.io.IOException; +import java.io.EOFException; -class RAMInputStream extends IndexInput implements Cloneable { +/** A memory-resident {@link IndexInput} implementation. + * + * @lucene.internal */ +public class RAMInputStream extends IndexInput implements Cloneable { static final int BUFFER_SIZE = RAMOutputStream.BUFFER_SIZE; - private RAMFile file; - private long length; + private final RAMFile file; + private final long length; private byte[] currentBuffer; private int currentBufferIndex; @@ -38,11 +36,16 @@ private long bufferStart; private int bufferLength; - RAMInputStream(RAMFile f) throws IOException { - file = f; - length = file.length; + public RAMInputStream(String name, RAMFile f) throws IOException { + this(name, f, f.length); + } + + RAMInputStream(String name, RAMFile f, long length) throws IOException { + super("RAMInputStream(name=" + name + ")"); + this.file = f; + this.length = length; if (length/BUFFER_SIZE >= Integer.MAX_VALUE) { - throw new IOException("Too large RAMFile! "+length); + throw new IOException("RAMInputStream too large length=" + length + ": " + name); } // make sure that we switch to the @@ -51,14 +54,17 @@ currentBuffer = null; } + @Override public void close() { // nothing to do here } + @Override public long length() { return length; } + @Override public byte readByte() throws IOException { if (bufferPosition >= bufferLength) { currentBufferIndex++; @@ -67,6 +73,7 @@ return currentBuffer[bufferPosition++]; } + @Override public void readBytes(byte[] b, int offset, int len) throws IOException { while (len > 0) { if (bufferPosition >= bufferLength) { @@ -84,33 +91,71 @@ } private final void switchCurrentBuffer(boolean enforceEOF) throws IOException { - if (currentBufferIndex >= file.numBuffers()) { + bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; + if (bufferStart > length || currentBufferIndex >= file.numBuffers()) { // end of file reached, no more buffers left - if (enforceEOF) - throw new IOException("Read past EOF"); - else { + if (enforceEOF) { + throw new EOFException("read past EOF: " + this); + } else { // Force EOF if a read takes place at this position currentBufferIndex--; bufferPosition = BUFFER_SIZE; } } else { - currentBuffer = (byte[]) file.getBuffer(currentBufferIndex); + currentBuffer = file.getBuffer(currentBufferIndex); bufferPosition = 0; - bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; long buflen = length - bufferStart; bufferLength = buflen > BUFFER_SIZE ? BUFFER_SIZE : (int) buflen; } } + @Override public long getFilePointer() { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } + @Override public void seek(long pos) throws IOException { if (currentBuffer==null || pos < bufferStart || pos >= bufferStart + BUFFER_SIZE) { currentBufferIndex = (int) (pos / BUFFER_SIZE); switchCurrentBuffer(false); } bufferPosition = (int) (pos % BUFFER_SIZE); } + + @Override + public IndexInput slice(String sliceDescription, final long offset, final long length) throws IOException { + if (offset < 0 || length < 0 || offset + length > this.length) { + throw new IllegalArgumentException("slice() " + sliceDescription + " out of bounds: " + this); + } + final String newResourceDescription = (sliceDescription == null) ? toString() : (toString() + " [slice=" + sliceDescription + "]"); + return new RAMInputStream(newResourceDescription, file, offset + length) { + { + seek(0L); + } + + @Override + public void seek(long pos) throws IOException { + if (pos < 0L) { + throw new IllegalArgumentException("Seeking to negative position: " + this); + } + super.seek(pos + offset); + } + + @Override + public long getFilePointer() { + return super.getFilePointer() - offset; + } + + @Override + public long length() { + return super.length() - offset; + } + + @Override + public IndexInput slice(String sliceDescription, long ofs, long len) throws IOException { + return super.slice(sliceDescription, offset + ofs, len); + } + }; + } } Index: 3rdParty_sources/lucene/org/apache/lucene/store/RAMOutputStream.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/RAMOutputStream.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/RAMOutputStream.java 17 Aug 2012 14:55:05 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/RAMOutputStream.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,41 +18,51 @@ */ import java.io.IOException; +import java.util.zip.CRC32; +import java.util.zip.Checksum; +import org.apache.lucene.util.Accountable; + /** * A memory-resident {@link IndexOutput} implementation. - * - * @version $Id$ + * + * @lucene.internal */ - -public class RAMOutputStream extends IndexOutput { +public class RAMOutputStream extends IndexOutput implements Accountable { static final int BUFFER_SIZE = 1024; - private RAMFile file; + private final RAMFile file; private byte[] currentBuffer; private int currentBufferIndex; private int bufferPosition; private long bufferStart; private int bufferLength; + + private final Checksum crc; /** Construct an empty output buffer. */ public RAMOutputStream() { - this(new RAMFile()); + this(new RAMFile(), false); } - RAMOutputStream(RAMFile f) { + public RAMOutputStream(RAMFile f, boolean checksum) { file = f; // make sure that we switch to the // first needed buffer lazily currentBufferIndex = -1; currentBuffer = null; + if (checksum) { + crc = new BufferedChecksum(new CRC32()); + } else { + crc = null; + } } /** Copy the current contents of this buffer to the named output. */ - public void writeTo(IndexOutput out) throws IOException { + public void writeTo(DataOutput out) throws IOException { flush(); final long end = file.length; long pos = 0; @@ -63,52 +73,67 @@ if (nextPos > end) { // at the last buffer length = (int)(end - pos); } - out.writeBytes((byte[])file.getBuffer(buffer++), length); + out.writeBytes(file.getBuffer(buffer++), length); pos = nextPos; } } - /** Resets this to an empty buffer. */ - public void reset() { - try { - seek(0); - } catch (IOException e) { // should never happen - throw new RuntimeException(e.toString()); + /** Copy the current contents of this buffer to output + * byte array */ + public void writeTo(byte[] bytes, int offset) throws IOException { + flush(); + final long end = file.length; + long pos = 0; + int buffer = 0; + int bytesUpto = offset; + while (pos < end) { + int length = BUFFER_SIZE; + long nextPos = pos + length; + if (nextPos > end) { // at the last buffer + length = (int)(end - pos); + } + System.arraycopy(file.getBuffer(buffer++), 0, bytes, bytesUpto, length); + bytesUpto += length; + pos = nextPos; } + } + /** Resets this to an empty file. */ + public void reset() { + currentBuffer = null; + currentBufferIndex = -1; + bufferPosition = 0; + bufferStart = 0; + bufferLength = 0; file.setLength(0); + if (crc != null) { + crc.reset(); + } } + @Override public void close() throws IOException { flush(); } - public void seek(long pos) throws IOException { - // set the file length in case we seek back - // and flush() has not been called yet - setFileLength(); - if (pos < bufferStart || pos >= bufferStart + bufferLength) { - currentBufferIndex = (int) (pos / BUFFER_SIZE); - switchCurrentBuffer(); - } - - bufferPosition = (int) (pos % BUFFER_SIZE); - } - - public long length() { - return file.length; - } - + @Override public void writeByte(byte b) throws IOException { if (bufferPosition == bufferLength) { currentBufferIndex++; switchCurrentBuffer(); } + if (crc != null) { + crc.update(b); + } currentBuffer[bufferPosition++] = b; } + @Override public void writeBytes(byte[] b, int offset, int len) throws IOException { assert b != null; + if (crc != null) { + crc.update(b, offset, len); + } while (len > 0) { if (bufferPosition == bufferLength) { currentBufferIndex++; @@ -124,11 +149,11 @@ } } - private final void switchCurrentBuffer() throws IOException { + private final void switchCurrentBuffer() { if (currentBufferIndex == file.numBuffers()) { currentBuffer = file.addBuffer(BUFFER_SIZE); } else { - currentBuffer = (byte[]) file.getBuffer(currentBufferIndex); + currentBuffer = file.getBuffer(currentBufferIndex); } bufferPosition = 0; bufferStart = (long) BUFFER_SIZE * (long) currentBufferIndex; @@ -142,17 +167,28 @@ } } + @Override public void flush() throws IOException { - file.setLastModified(System.currentTimeMillis()); setFileLength(); } + @Override public long getFilePointer() { return currentBufferIndex < 0 ? 0 : bufferStart + bufferPosition; } /** Returns byte usage of all buffers. */ - public long sizeInBytes() { - return file.numBuffers() * BUFFER_SIZE; + @Override + public long ramBytesUsed() { + return (long) file.numBuffers() * (long) BUFFER_SIZE; } + + @Override + public long getChecksum() throws IOException { + if (crc == null) { + throw new IllegalStateException("internal RAMOutputStream created with checksum disabled"); + } else { + return crc.getValue(); + } + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/RandomAccessInput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/RateLimitedDirectoryWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/RateLimitedIndexOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/RateLimiter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/SimpleFSDirectory.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/SimpleFSLockFactory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/SimpleFSLockFactory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/SimpleFSLockFactory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/SimpleFSLockFactory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -22,12 +22,10 @@ /** *

    Implements {@link LockFactory} using {@link - * File#createNewFile()}. This is the default LockFactory - * for {@link FSDirectory}.

    + * File#createNewFile()}.

    * - *

    NOTE: the javadocs - * for File.createNewFile contain a vague + *

    NOTE: the {@linkplain File#createNewFile() javadocs + * for File.createNewFile()} contain a vague * yet spooky warning about not using the API for file * locking. This warning was added due to this @@ -39,11 +37,19 @@ * is hit when trying to create a writer, in which case you * need to explicitly clear the lock file first. You can * either manually remove the file, or use the {@link - * org.apache.lucene.index.IndexReader#unlock(Directory)} + * org.apache.lucene.index.IndexWriter#unlock(Directory)} * API. But, first be certain that no writer is in fact * writing to the index otherwise you can easily corrupt * your index.

    * + *

    Special care needs to be taken if you change the locking + * implementation: First be certain that no writer is in fact + * writing to the index otherwise you can easily corrupt + * your index. Be sure to do the LockFactory change all Lucene + * instances and clean up all leftover lock files before starting + * the new configuration for the first time. Different implementations + * can not work together!

    + * *

    If you suspect that this or any other LockFactory is * not working properly in your environment, you can easily * test it by using {@link VerifyingLockFactory}, {@link @@ -52,61 +58,44 @@ * @see LockFactory */ -public class SimpleFSLockFactory extends LockFactory { +public class SimpleFSLockFactory extends FSLockFactory { /** - * Directory specified by org.apache.lucene.lockDir - * system property. If that is not set, then java.io.tmpdir - * system property is used. - */ - - private File lockDir; - - /** * Create a SimpleFSLockFactory instance, with null (unset) - * lock directory. This is package-private and is only - * used by FSDirectory when creating this LockFactory via - * the System property - * org.apache.lucene.store.FSDirectoryLockFactoryClass. + * lock directory. When you pass this factory to a {@link FSDirectory} + * subclass, the lock directory is automatically set to the + * directory itself. Be sure to create one instance for each directory + * your create! */ - SimpleFSLockFactory() throws IOException { + public SimpleFSLockFactory() { this((File) null); } /** * Instantiate using the provided directory (as a File instance). * @param lockDir where lock files should be created. */ - public SimpleFSLockFactory(File lockDir) throws IOException { + public SimpleFSLockFactory(File lockDir) { setLockDir(lockDir); } /** * Instantiate using the provided directory name (String). * @param lockDirName where lock files should be created. */ - public SimpleFSLockFactory(String lockDirName) throws IOException { - lockDir = new File(lockDirName); - setLockDir(lockDir); + public SimpleFSLockFactory(String lockDirName) { + setLockDir(new File(lockDirName)); } - /** - * Set the lock directory. This is package-private and is - * only used externally by FSDirectory when creating this - * LockFactory via the System property - * org.apache.lucene.store.FSDirectoryLockFactoryClass. - */ - void setLockDir(File lockDir) throws IOException { - this.lockDir = lockDir; - } - + @Override public Lock makeLock(String lockName) { if (lockPrefix != null) { lockName = lockPrefix + "-" + lockName; } return new SimpleFSLock(lockDir, lockName); } + @Override public void clearLock(String lockName) throws IOException { if (lockDir.exists()) { if (lockPrefix != null) { @@ -118,7 +107,7 @@ } } } -}; +} class SimpleFSLock extends Lock { @@ -130,6 +119,7 @@ lockFile = new File(lockDir, lockFileName); } + @Override public boolean obtain() throws IOException { // Ensure that lockDir exists and is a directory: @@ -138,21 +128,36 @@ throw new IOException("Cannot create directory: " + lockDir.getAbsolutePath()); } else if (!lockDir.isDirectory()) { + // TODO: NoSuchDirectoryException instead? throw new IOException("Found regular file where directory expected: " + lockDir.getAbsolutePath()); } - return lockFile.createNewFile(); + + try { + return lockFile.createNewFile(); + } catch (IOException ioe) { + // On Windows, on concurrent createNewFile, the 2nd process gets "access denied". + // In that case, the lock was not aquired successfully, so return false. + // We record the failure reason here; the obtain with timeout (usually the + // one calling us) will use this as "root cause" if it fails to get the lock. + failureReason = ioe; + return false; + } } - public void release() throws LockReleaseFailedException { - if (lockFile.exists() && !lockFile.delete()) + @Override + public void close() throws LockReleaseFailedException { + if (lockFile.exists() && !lockFile.delete()) { throw new LockReleaseFailedException("failed to delete " + lockFile); + } } + @Override public boolean isLocked() { return lockFile.exists(); } + @Override public String toString() { return "SimpleFSLock@" + lockFile; } Index: 3rdParty_sources/lucene/org/apache/lucene/store/SingleInstanceLockFactory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/SingleInstanceLockFactory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/SingleInstanceLockFactory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/SingleInstanceLockFactory.java 16 Dec 2014 11:31:34 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -33,53 +33,59 @@ public class SingleInstanceLockFactory extends LockFactory { - private HashSet locks = new HashSet(); + private HashSet locks = new HashSet<>(); + @Override public Lock makeLock(String lockName) { // We do not use the LockPrefix at all, because the private // HashSet instance effectively scopes the locking to this // single Directory instance. return new SingleInstanceLock(locks, lockName); } + @Override public void clearLock(String lockName) throws IOException { synchronized(locks) { if (locks.contains(lockName)) { locks.remove(lockName); } } } -}; +} class SingleInstanceLock extends Lock { String lockName; - private HashSet locks; + private HashSet locks; - public SingleInstanceLock(HashSet locks, String lockName) { + public SingleInstanceLock(HashSet locks, String lockName) { this.locks = locks; this.lockName = lockName; } + @Override public boolean obtain() throws IOException { synchronized(locks) { return locks.add(lockName); } } - public void release() { + @Override + public void close() { synchronized(locks) { locks.remove(lockName); } } + @Override public boolean isLocked() { synchronized(locks) { return locks.contains(lockName); } } + @Override public String toString() { - return "SingleInstanceLock: " + lockName; + return super.toString() + ": " + lockName; } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/store/TrackingDirectoryWrapper.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/store/VerifyingLockFactory.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/VerifyingLockFactory.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/VerifyingLockFactory.java 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/VerifyingLockFactory.java 16 Dec 2014 11:31:33 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.store; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,7 +17,6 @@ * limitations under the License. */ -import java.net.Socket; import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; @@ -38,80 +37,68 @@ public class VerifyingLockFactory extends LockFactory { - LockFactory lf; - byte id; - String host; - int port; + final LockFactory lf; + final InputStream in; + final OutputStream out; private class CheckedLock extends Lock { - private Lock lock; + private final Lock lock; public CheckedLock(Lock lock) { this.lock = lock; } - private void verify(byte message) { - try { - Socket s = new Socket(host, port); - OutputStream out = s.getOutputStream(); - out.write(id); - out.write(message); - InputStream in = s.getInputStream(); - int result = in.read(); - in.close(); - out.close(); - s.close(); - if (result != 0) - throw new RuntimeException("lock was double acquired"); - } catch (Exception e) { - throw new RuntimeException(e); + private void verify(byte message) throws IOException { + out.write(message); + out.flush(); + final int ret = in.read(); + if (ret < 0) { + throw new IllegalStateException("Lock server died because of locking error."); } + if (ret != message) { + throw new IOException("Protocol violation."); + } } - public synchronized boolean obtain(long lockWaitTimeout) - throws LockObtainFailedException, IOException { - boolean obtained = lock.obtain(lockWaitTimeout); + @Override + public synchronized boolean obtain() throws IOException { + boolean obtained = lock.obtain(); if (obtained) verify((byte) 1); return obtained; } - public synchronized boolean obtain() - throws LockObtainFailedException, IOException { - return lock.obtain(); - } - - public synchronized boolean isLocked() { + @Override + public synchronized boolean isLocked() throws IOException { return lock.isLocked(); } - public synchronized void release() throws IOException { + @Override + public synchronized void close() throws IOException { if (isLocked()) { verify((byte) 0); - lock.release(); + lock.close(); } } } /** - * @param id should be a unique id across all clients * @param lf the LockFactory that we are testing - * @param host host or IP where {@link LockVerifyServer} - is running - * @param port the port {@link LockVerifyServer} is - listening on + * @param in the socket's input to {@link LockVerifyServer} + * @param out the socket's output to {@link LockVerifyServer} */ - public VerifyingLockFactory(byte id, LockFactory lf, String host, int port) throws IOException { - this.id = id; + public VerifyingLockFactory(LockFactory lf, InputStream in, OutputStream out) throws IOException { this.lf = lf; - this.host = host; - this.port = port; + this.in = in; + this.out = out; } + @Override public synchronized Lock makeLock(String lockName) { return new CheckedLock(lf.makeLock(lockName)); } + @Override public synchronized void clearLock(String lockName) throws IOException { lf.clearLock(lockName); Index: 3rdParty_sources/lucene/org/apache/lucene/store/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/store/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/store/package.html 17 Aug 2012 14:55:06 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/store/package.html 16 Dec 2014 11:31:33 -0000 1.1.2.1 @@ -18,7 +18,6 @@ - Binary i/o API, used for all index data. Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Accountable.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/ArrayInPlaceMergeSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/ArrayIntroSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/ArrayTimSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/ArrayUtil.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/ArrayUtil.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/ArrayUtil.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/ArrayUtil.java 16 Dec 2014 11:31:32 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,45 +17,295 @@ * limitations under the License. */ +import java.util.Collection; +import java.util.Comparator; + +/** + * Methods for manipulating arrays. + * + * @lucene.internal + */ + public final class ArrayUtil { - public static int getNextSize(int targetSize) { - /* This over-allocates proportional to the list size, making room - * for additional growth. The over-allocation is mild, but is - * enough to give linear-time amortized behavior over a long - * sequence of appends() in the presence of a poorly-performing - * system realloc(). - * The growth pattern is: 0, 4, 8, 16, 25, 35, 46, 58, 72, 88, ... - */ - return (targetSize >> 3) + (targetSize < 9 ? 3 : 6) + targetSize; + /** Maximum length for an array (Integer.MAX_VALUE - RamUsageEstimator.NUM_BYTES_ARRAY_HEADER). */ + public static final int MAX_ARRAY_LENGTH = Integer.MAX_VALUE - RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; + + private ArrayUtil() {} // no instance + + /* + Begin Apache Harmony code + + Revision taken on Friday, June 12. https://svn.apache.org/repos/asf/harmony/enhanced/classlib/archive/java6/modules/luni/src/main/java/java/lang/Integer.java + + */ + + /** + * Parses the string argument as if it was an int value and returns the + * result. Throws NumberFormatException if the string does not represent an + * int quantity. + * + * @param chars a string representation of an int quantity. + * @return int the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + public static int parseInt(char[] chars) throws NumberFormatException { + return parseInt(chars, 0, chars.length, 10); } - public static int getShrinkSize(int currentSize, int targetSize) { - final int newSize = getNextSize(targetSize); + /** + * Parses a char array into an int. + * @param chars the character array + * @param offset The offset into the array + * @param len The length + * @return the int + * @throws NumberFormatException if it can't parse + */ + public static int parseInt(char[] chars, int offset, int len) throws NumberFormatException { + return parseInt(chars, offset, len, 10); + } + + /** + * Parses the string argument as if it was an int value and returns the + * result. Throws NumberFormatException if the string does not represent an + * int quantity. The second argument specifies the radix to use when parsing + * the value. + * + * @param chars a string representation of an int quantity. + * @param radix the base to use for conversion. + * @return int the value represented by the argument + * @throws NumberFormatException if the argument could not be parsed as an int quantity. + */ + public static int parseInt(char[] chars, int offset, int len, int radix) + throws NumberFormatException { + if (chars == null || radix < Character.MIN_RADIX + || radix > Character.MAX_RADIX) { + throw new NumberFormatException(); + } + int i = 0; + if (len == 0) { + throw new NumberFormatException("chars length is 0"); + } + boolean negative = chars[offset + i] == '-'; + if (negative && ++i == len) { + throw new NumberFormatException("can't convert to an int"); + } + if (negative == true){ + offset++; + len--; + } + return parse(chars, offset, len, radix, negative); + } + + + private static int parse(char[] chars, int offset, int len, int radix, + boolean negative) throws NumberFormatException { + int max = Integer.MIN_VALUE / radix; + int result = 0; + for (int i = 0; i < len; i++){ + int digit = Character.digit(chars[i + offset], radix); + if (digit == -1) { + throw new NumberFormatException("Unable to parse"); + } + if (max > result) { + throw new NumberFormatException("Unable to parse"); + } + int next = result * radix - digit; + if (next > result) { + throw new NumberFormatException("Unable to parse"); + } + result = next; + } + /*while (offset < len) { + + }*/ + if (!negative) { + result = -result; + if (result < 0) { + throw new NumberFormatException("Unable to parse"); + } + } + return result; + } + + + /* + + END APACHE HARMONY CODE + */ + + /** Returns an array size >= minTargetSize, generally + * over-allocating exponentially to achieve amortized + * linear-time cost as the array grows. + * + * NOTE: this was originally borrowed from Python 2.4.2 + * listobject.c sources (attribution in LICENSE.txt), but + * has now been substantially changed based on + * discussions from java-dev thread with subject "Dynamic + * array reallocation algorithms", started on Jan 12 + * 2010. + * + * @param minTargetSize Minimum required value to be returned. + * @param bytesPerElement Bytes used by each element of + * the array. See constants in {@link RamUsageEstimator}. + * + * @lucene.internal + */ + + public static int oversize(int minTargetSize, int bytesPerElement) { + + if (minTargetSize < 0) { + // catch usage that accidentally overflows int + throw new IllegalArgumentException("invalid array size " + minTargetSize); + } + + if (minTargetSize == 0) { + // wait until at least one element is requested + return 0; + } + + if (minTargetSize > MAX_ARRAY_LENGTH) { + throw new IllegalArgumentException("requested array size " + minTargetSize + " exceeds maximum array in java (" + MAX_ARRAY_LENGTH + ")"); + } + + // asymptotic exponential growth by 1/8th, favors + // spending a bit more CPU to not tie up too much wasted + // RAM: + int extra = minTargetSize >> 3; + + if (extra < 3) { + // for very small arrays, where constant overhead of + // realloc is presumably relatively high, we grow + // faster + extra = 3; + } + + int newSize = minTargetSize + extra; + + // add 7 to allow for worst case byte alignment addition below: + if (newSize+7 < 0 || newSize+7 > MAX_ARRAY_LENGTH) { + // int overflowed, or we exceeded the maximum array length + return MAX_ARRAY_LENGTH; + } + + if (Constants.JRE_IS_64BIT) { + // round up to 8 byte alignment in 64bit env + switch(bytesPerElement) { + case 4: + // round up to multiple of 2 + return (newSize + 1) & 0x7ffffffe; + case 2: + // round up to multiple of 4 + return (newSize + 3) & 0x7ffffffc; + case 1: + // round up to multiple of 8 + return (newSize + 7) & 0x7ffffff8; + case 8: + // no rounding + default: + // odd (invalid?) size + return newSize; + } + } else { + // round up to 4 byte alignment in 64bit env + switch(bytesPerElement) { + case 2: + // round up to multiple of 2 + return (newSize + 1) & 0x7ffffffe; + case 1: + // round up to multiple of 4 + return (newSize + 3) & 0x7ffffffc; + case 4: + case 8: + // no rounding + default: + // odd (invalid?) size + return newSize; + } + } + } + + public static int getShrinkSize(int currentSize, int targetSize, int bytesPerElement) { + final int newSize = oversize(targetSize, bytesPerElement); // Only reallocate if we are "substantially" smaller. // This saves us from "running hot" (constantly making a // bit bigger then a bit smaller, over and over): - if (newSize < currentSize/2) + if (newSize < currentSize / 2) return newSize; else return currentSize; } + public static short[] grow(short[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + short[] newArray = new short[oversize(minSize, RamUsageEstimator.NUM_BYTES_SHORT)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static short[] grow(short[] array) { + return grow(array, 1 + array.length); + } + + public static float[] grow(float[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + float[] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_FLOAT)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static float[] grow(float[] array) { + return grow(array, 1 + array.length); + } + + public static double[] grow(double[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + double[] newArray = new double[oversize(minSize, RamUsageEstimator.NUM_BYTES_DOUBLE)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static double[] grow(double[] array) { + return grow(array, 1 + array.length); + } + + public static short[] shrink(short[] array, int targetSize) { + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_SHORT); + if (newSize != array.length) { + short[] newArray = new short[newSize]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else + return array; + } + public static int[] grow(int[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; if (array.length < minSize) { - int[] newArray = new int[getNextSize(minSize)]; + int[] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_INT)]; System.arraycopy(array, 0, newArray, 0, array.length); return newArray; } else return array; } public static int[] grow(int[] array) { - return grow(array, 1+array.length); + return grow(array, 1 + array.length); } public static int[] shrink(int[] array, int targetSize) { - final int newSize = getShrinkSize(array.length, targetSize); + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_INT); if (newSize != array.length) { int[] newArray = new int[newSize]; System.arraycopy(array, 0, newArray, 0, newSize); @@ -65,20 +315,22 @@ } public static long[] grow(long[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; if (array.length < minSize) { - long[] newArray = new long[getNextSize(minSize)]; + long[] newArray = new long[oversize(minSize, RamUsageEstimator.NUM_BYTES_LONG)]; System.arraycopy(array, 0, newArray, 0, array.length); return newArray; } else return array; } public static long[] grow(long[] array) { - return grow(array, 1+array.length); + return grow(array, 1 + array.length); } public static long[] shrink(long[] array, int targetSize) { - final int newSize = getShrinkSize(array.length, targetSize); + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_LONG); if (newSize != array.length) { long[] newArray = new long[newSize]; System.arraycopy(array, 0, newArray, 0, newSize); @@ -88,20 +340,22 @@ } public static byte[] grow(byte[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; if (array.length < minSize) { - byte[] newArray = new byte[getNextSize(minSize)]; + byte[] newArray = new byte[oversize(minSize, 1)]; System.arraycopy(array, 0, newArray, 0, array.length); return newArray; } else return array; } public static byte[] grow(byte[] array) { - return grow(array, 1+array.length); + return grow(array, 1 + array.length); } public static byte[] shrink(byte[] array, int targetSize) { - final int newSize = getShrinkSize(array.length, targetSize); + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, 1); if (newSize != array.length) { byte[] newArray = new byte[newSize]; System.arraycopy(array, 0, newArray, 0, newSize); @@ -110,21 +364,356 @@ return array; } - /** Returns hash of chars in range start (inclusive) to - * end (inclusive) */ + public static boolean[] grow(boolean[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + boolean[] newArray = new boolean[oversize(minSize, 1)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static boolean[] grow(boolean[] array) { + return grow(array, 1 + array.length); + } + + public static boolean[] shrink(boolean[] array, int targetSize) { + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, 1); + if (newSize != array.length) { + boolean[] newArray = new boolean[newSize]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else + return array; + } + + public static char[] grow(char[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + char[] newArray = new char[oversize(minSize, RamUsageEstimator.NUM_BYTES_CHAR)]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static char[] grow(char[] array) { + return grow(array, 1 + array.length); + } + + public static char[] shrink(char[] array, int targetSize) { + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_CHAR); + if (newSize != array.length) { + char[] newArray = new char[newSize]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else + return array; + } + + public static int[][] grow(int[][] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + int[][] newArray = new int[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else { + return array; + } + } + + public static int[][] grow(int[][] array) { + return grow(array, 1 + array.length); + } + + public static int[][] shrink(int[][] array, int targetSize) { + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + if (newSize != array.length) { + int[][] newArray = new int[newSize][]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else { + return array; + } + } + + public static float[][] grow(float[][] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + float[][] newArray = new float[oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)][]; + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else { + return array; + } + } + + public static float[][] grow(float[][] array) { + return grow(array, 1 + array.length); + } + + public static float[][] shrink(float[][] array, int targetSize) { + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + if (newSize != array.length) { + float[][] newArray = new float[newSize][]; + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else { + return array; + } + } + + /** + * Returns hash of chars in range start (inclusive) to + * end (inclusive) + */ public static int hashCode(char[] array, int start, int end) { int code = 0; - for(int i=end-1;i>=start;i--) - code = code*31 + array[i]; + for (int i = end - 1; i >= start; i--) + code = code * 31 + array[i]; return code; } - /** Returns hash of chars in range start (inclusive) to - * end (inclusive) */ + /** + * Returns hash of bytes in range start (inclusive) to + * end (inclusive) + */ public static int hashCode(byte[] array, int start, int end) { int code = 0; - for(int i=end-1;i>=start;i--) - code = code*31 + array[i]; + for (int i = end - 1; i >= start; i--) + code = code * 31 + array[i]; return code; } + + + // Since Arrays.equals doesn't implement offsets for equals + /** + * See if two array slices are the same. + * + * @param left The left array to compare + * @param offsetLeft The offset into the array. Must be positive + * @param right The right array to compare + * @param offsetRight the offset into the right array. Must be positive + * @param length The length of the section of the array to compare + * @return true if the two arrays, starting at their respective offsets, are equal + * + * @see java.util.Arrays#equals(char[], char[]) + */ + public static boolean equals(char[] left, int offsetLeft, char[] right, int offsetRight, int length) { + if ((offsetLeft + length <= left.length) && (offsetRight + length <= right.length)) { + for (int i = 0; i < length; i++) { + if (left[offsetLeft + i] != right[offsetRight + i]) { + return false; + } + + } + return true; + } + return false; + } + + // Since Arrays.equals doesn't implement offsets for equals + /** + * See if two array slices are the same. + * + * @param left The left array to compare + * @param offsetLeft The offset into the array. Must be positive + * @param right The right array to compare + * @param offsetRight the offset into the right array. Must be positive + * @param length The length of the section of the array to compare + * @return true if the two arrays, starting at their respective offsets, are equal + * + * @see java.util.Arrays#equals(byte[], byte[]) + */ + public static boolean equals(byte[] left, int offsetLeft, byte[] right, int offsetRight, int length) { + if ((offsetLeft + length <= left.length) && (offsetRight + length <= right.length)) { + for (int i = 0; i < length; i++) { + if (left[offsetLeft + i] != right[offsetRight + i]) { + return false; + } + + } + return true; + } + return false; + } + + /* DISABLE THIS FOR NOW: This has performance problems until Java creates intrinsics for Class#getComponentType() and Array.newInstance() + public static T[] grow(T[] array, int minSize) { + assert minSize >= 0: "size must be positive (got " + minSize + "): likely integer overflow?"; + if (array.length < minSize) { + @SuppressWarnings("unchecked") final T[] newArray = + (T[]) Array.newInstance(array.getClass().getComponentType(), oversize(minSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF)); + System.arraycopy(array, 0, newArray, 0, array.length); + return newArray; + } else + return array; + } + + public static T[] grow(T[] array) { + return grow(array, 1 + array.length); + } + + public static T[] shrink(T[] array, int targetSize) { + assert targetSize >= 0: "size must be positive (got " + targetSize + "): likely integer overflow?"; + final int newSize = getShrinkSize(array.length, targetSize, RamUsageEstimator.NUM_BYTES_OBJECT_REF); + if (newSize != array.length) { + @SuppressWarnings("unchecked") final T[] newArray = + (T[]) Array.newInstance(array.getClass().getComponentType(), newSize); + System.arraycopy(array, 0, newArray, 0, newSize); + return newArray; + } else + return array; + } + */ + + // Since Arrays.equals doesn't implement offsets for equals + /** + * See if two array slices are the same. + * + * @param left The left array to compare + * @param offsetLeft The offset into the array. Must be positive + * @param right The right array to compare + * @param offsetRight the offset into the right array. Must be positive + * @param length The length of the section of the array to compare + * @return true if the two arrays, starting at their respective offsets, are equal + * + * @see java.util.Arrays#equals(char[], char[]) + */ + public static boolean equals(int[] left, int offsetLeft, int[] right, int offsetRight, int length) { + if ((offsetLeft + length <= left.length) && (offsetRight + length <= right.length)) { + for (int i = 0; i < length; i++) { + if (left[offsetLeft + i] != right[offsetRight + i]) { + return false; + } + + } + return true; + } + return false; + } + + public static int[] toIntArray(Collection ints) { + + final int[] result = new int[ints.size()]; + int upto = 0; + for(int v : ints) { + result[upto++] = v; + } + + // paranoia: + assert upto == result.length; + + return result; + } + + private static class NaturalComparator> implements Comparator { + NaturalComparator() {} + @Override + public int compare(T o1, T o2) { + return o1.compareTo(o2); + } + } + + @SuppressWarnings("rawtypes") + private static final Comparator NATURAL_COMPARATOR = new NaturalComparator(); + + /** Get the natural {@link Comparator} for the provided object class. */ + @SuppressWarnings("unchecked") + public static > Comparator naturalComparator() { + return (Comparator) NATURAL_COMPARATOR; + } + + /** Swap values stored in slots i and j */ + public static void swap(T[] arr, int i, int j) { + final T tmp = arr[i]; + arr[i] = arr[j]; + arr[j] = tmp; + } + + // intro-sorts + + /** + * Sorts the given array slice using the {@link Comparator}. This method uses the intro sort + * algorithm, but falls back to insertion sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static void introSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + if (toIndex-fromIndex <= 1) return; + new ArrayIntroSorter<>(a, comp).sort(fromIndex, toIndex); + } + + /** + * Sorts the given array using the {@link Comparator}. This method uses the intro sort + * algorithm, but falls back to insertion sort for small arrays. + */ + public static void introSort(T[] a, Comparator comp) { + introSort(a, 0, a.length, comp); + } + + /** + * Sorts the given array slice in natural order. This method uses the intro sort + * algorithm, but falls back to insertion sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static > void introSort(T[] a, int fromIndex, int toIndex) { + if (toIndex-fromIndex <= 1) return; + introSort(a, fromIndex, toIndex, ArrayUtil.naturalComparator()); + } + + /** + * Sorts the given array in natural order. This method uses the intro sort + * algorithm, but falls back to insertion sort for small arrays. + */ + public static > void introSort(T[] a) { + introSort(a, 0, a.length); + } + + // tim sorts: + + /** + * Sorts the given array slice using the {@link Comparator}. This method uses the Tim sort + * algorithm, but falls back to binary sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static void timSort(T[] a, int fromIndex, int toIndex, Comparator comp) { + if (toIndex-fromIndex <= 1) return; + new ArrayTimSorter<>(a, comp, a.length / 64).sort(fromIndex, toIndex); + } + + /** + * Sorts the given array using the {@link Comparator}. This method uses the Tim sort + * algorithm, but falls back to binary sort for small arrays. + */ + public static void timSort(T[] a, Comparator comp) { + timSort(a, 0, a.length, comp); + } + + /** + * Sorts the given array slice in natural order. This method uses the Tim sort + * algorithm, but falls back to binary sort for small arrays. + * @param fromIndex start index (inclusive) + * @param toIndex end index (exclusive) + */ + public static > void timSort(T[] a, int fromIndex, int toIndex) { + if (toIndex-fromIndex <= 1) return; + timSort(a, fromIndex, toIndex, ArrayUtil.naturalComparator()); + } + + /** + * Sorts the given array in natural order. This method uses the Tim sort + * algorithm, but falls back to binary sort for small arrays. + */ + public static > void timSort(T[] a) { + timSort(a, 0, a.length); + } + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Attribute.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/AttributeFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/AttributeImpl.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/AttributeReflector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/AttributeSource.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/BitUtil.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/BitUtil.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/BitUtil.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/BitUtil.java 16 Dec 2014 11:31:30 -0000 1.1.2.1 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,760 +17,151 @@ package org.apache.lucene.util; // from org.apache.solr.util rev 555343 -/** A variety of high efficiencly bit twiddling routines. - * - * @version $Id$ +/** A variety of high efficiency bit twiddling routines. + * @lucene.internal */ -public class BitUtil { +public final class BitUtil { - /** Returns the number of bits set in the long */ - public static int pop(long x) { - /* Hacker's Delight 32 bit pop function: - * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc - * - int pop(unsigned x) { - x = x - ((x >> 1) & 0x55555555); - x = (x & 0x33333333) + ((x >> 2) & 0x33333333); - x = (x + (x >> 4)) & 0x0F0F0F0F; - x = x + (x >> 8); - x = x + (x >> 16); - return x & 0x0000003F; - } - ***/ + private static final byte[] BYTE_COUNTS = { // table of bits/byte + 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7, + 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8 + }; - // 64 bit java version of the C function from above - x = x - ((x >>> 1) & 0x5555555555555555L); - x = (x & 0x3333333333333333L) + ((x >>>2 ) & 0x3333333333333333L); - x = (x + (x >>> 4)) & 0x0F0F0F0F0F0F0F0FL; - x = x + (x >>> 8); - x = x + (x >>> 16); - x = x + (x >>> 32); - return ((int)x) & 0x7F; - } + // The General Idea: instead of having an array per byte that has + // the offsets of the next set bit, that array could be + // packed inside a 32 bit integer (8 4 bit numbers). That + // should be faster than accessing an array for each index, and + // the total array size is kept smaller (256*sizeof(int))=1K + /***** the python code that generated bitlist + def bits2int(val): + arr=0 + for shift in range(8,0,-1): + if val & 0x80: + arr = (arr << 4) | shift + val = val << 1 + return arr - /*** Returns the number of set bits in an array of longs. */ - public static long pop_array(long A[], int wordOffset, int numWords) { - /* - * Robert Harley and David Seal's bit counting algorithm, as documented - * in the revisions of Hacker's Delight - * http://www.hackersdelight.org/revisions.pdf - * http://www.hackersdelight.org/HDcode/newCode/pop_arrayHS.cc - * - * This function was adapted to Java, and extended to use 64 bit words. - * if only we had access to wider registers like SSE from java... - * - * This function can be transformed to compute the popcount of other functions - * on bitsets via something like this: - * sed 's/A\[\([^]]*\)\]/\(A[\1] \& B[\1]\)/g' - * - */ - int n = wordOffset+numWords; - long tot=0, tot8=0; - long ones=0, twos=0, fours=0; + def int_table(): + tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] + return ','.join(tbl) + ******/ + private static final int[] BIT_LISTS = { + 0x0, 0x1, 0x2, 0x21, 0x3, 0x31, 0x32, 0x321, 0x4, 0x41, 0x42, 0x421, 0x43, + 0x431, 0x432, 0x4321, 0x5, 0x51, 0x52, 0x521, 0x53, 0x531, 0x532, 0x5321, + 0x54, 0x541, 0x542, 0x5421, 0x543, 0x5431, 0x5432, 0x54321, 0x6, 0x61, 0x62, + 0x621, 0x63, 0x631, 0x632, 0x6321, 0x64, 0x641, 0x642, 0x6421, 0x643, + 0x6431, 0x6432, 0x64321, 0x65, 0x651, 0x652, 0x6521, 0x653, 0x6531, 0x6532, + 0x65321, 0x654, 0x6541, 0x6542, 0x65421, 0x6543, 0x65431, 0x65432, 0x654321, + 0x7, 0x71, 0x72, 0x721, 0x73, 0x731, 0x732, 0x7321, 0x74, 0x741, 0x742, + 0x7421, 0x743, 0x7431, 0x7432, 0x74321, 0x75, 0x751, 0x752, 0x7521, 0x753, + 0x7531, 0x7532, 0x75321, 0x754, 0x7541, 0x7542, 0x75421, 0x7543, 0x75431, + 0x75432, 0x754321, 0x76, 0x761, 0x762, 0x7621, 0x763, 0x7631, 0x7632, + 0x76321, 0x764, 0x7641, 0x7642, 0x76421, 0x7643, 0x76431, 0x76432, 0x764321, + 0x765, 0x7651, 0x7652, 0x76521, 0x7653, 0x76531, 0x76532, 0x765321, 0x7654, + 0x76541, 0x76542, 0x765421, 0x76543, 0x765431, 0x765432, 0x7654321, 0x8, + 0x81, 0x82, 0x821, 0x83, 0x831, 0x832, 0x8321, 0x84, 0x841, 0x842, 0x8421, + 0x843, 0x8431, 0x8432, 0x84321, 0x85, 0x851, 0x852, 0x8521, 0x853, 0x8531, + 0x8532, 0x85321, 0x854, 0x8541, 0x8542, 0x85421, 0x8543, 0x85431, 0x85432, + 0x854321, 0x86, 0x861, 0x862, 0x8621, 0x863, 0x8631, 0x8632, 0x86321, 0x864, + 0x8641, 0x8642, 0x86421, 0x8643, 0x86431, 0x86432, 0x864321, 0x865, 0x8651, + 0x8652, 0x86521, 0x8653, 0x86531, 0x86532, 0x865321, 0x8654, 0x86541, + 0x86542, 0x865421, 0x86543, 0x865431, 0x865432, 0x8654321, 0x87, 0x871, + 0x872, 0x8721, 0x873, 0x8731, 0x8732, 0x87321, 0x874, 0x8741, 0x8742, + 0x87421, 0x8743, 0x87431, 0x87432, 0x874321, 0x875, 0x8751, 0x8752, 0x87521, + 0x8753, 0x87531, 0x87532, 0x875321, 0x8754, 0x87541, 0x87542, 0x875421, + 0x87543, 0x875431, 0x875432, 0x8754321, 0x876, 0x8761, 0x8762, 0x87621, + 0x8763, 0x87631, 0x87632, 0x876321, 0x8764, 0x87641, 0x87642, 0x876421, + 0x87643, 0x876431, 0x876432, 0x8764321, 0x8765, 0x87651, 0x87652, 0x876521, + 0x87653, 0x876531, 0x876532, 0x8765321, 0x87654, 0x876541, 0x876542, + 0x8765421, 0x876543, 0x8765431, 0x8765432, 0x87654321 + }; - int i; - for (i = wordOffset; i <= n - 8; i+=8) { - /*** C macro from Hacker's Delight - #define CSA(h,l, a,b,c) \ - {unsigned u = a ^ b; unsigned v = c; \ - h = (a & b) | (u & v); l = u ^ v;} - ***/ + private BitUtil() {} // no instance - long twosA,twosB,foursA,foursB,eights; + /** Return the number of bits sets in b. */ + public static int bitCount(byte b) { + return BYTE_COUNTS[b & 0xFF]; + } - // CSA(twosA, ones, ones, A[i], A[i+1]) - { - long b=A[i], c=A[i+1]; - long u=ones ^ b; - twosA=(ones & b)|( u & c); - ones=u^c; - } - // CSA(twosB, ones, ones, A[i+2], A[i+3]) - { - long b=A[i+2], c=A[i+3]; - long u=ones^b; - twosB =(ones&b)|(u&c); - ones=u^c; - } - //CSA(foursA, twos, twos, twosA, twosB) - { - long u=twos^twosA; - foursA=(twos&twosA)|(u&twosB); - twos=u^twosB; - } - //CSA(twosA, ones, ones, A[i+4], A[i+5]) - { - long b=A[i+4], c=A[i+5]; - long u=ones^b; - twosA=(ones&b)|(u&c); - ones=u^c; - } - // CSA(twosB, ones, ones, A[i+6], A[i+7]) - { - long b=A[i+6], c=A[i+7]; - long u=ones^b; - twosB=(ones&b)|(u&c); - ones=u^c; - } - //CSA(foursB, twos, twos, twosA, twosB) - { - long u=twos^twosA; - foursB=(twos&twosA)|(u&twosB); - twos=u^twosB; - } + /** Return the list of bits which are set in b encoded as followed: + * (i >>> (4 * n)) & 0x0F is the offset of the n-th set bit of + * the given byte plus one, or 0 if there are n or less bits set in the given + * byte. For example bitList(12) returns 0x43:

      + *
    • 0x43 & 0x0F is 3, meaning the the first bit set is at offset 3-1 = 2,
    • + *
    • (0x43 >>> 4) & 0x0F is 4, meaning there is a second bit set at offset 4-1=3,
    • + *
    • (0x43 >>> 8) & 0x0F is 0, meaning there is no more bit set in this byte.
    • + *
    */ + public static int bitList(byte b) { + return BIT_LISTS[b & 0xFF]; + } - //CSA(eights, fours, fours, foursA, foursB) - { - long u=fours^foursA; - eights=(fours&foursA)|(u&foursB); - fours=u^foursB; - } - tot8 += pop(eights); - } + // The pop methods used to rely on bit-manipulation tricks for speed but it + // turns out that it is faster to use the Long.bitCount method (which is an + // intrinsic since Java 6u18) in a naive loop, see LUCENE-2221 - // handle trailing words in a binary-search manner... - // derived from the loop above by setting specific elements to 0. - // the original method in Hackers Delight used a simple for loop: - // for (i = i; i < n; i++) // Add in the last elements - // tot = tot + pop(A[i]); - - if (i<=n-4) { - long twosA, twosB, foursA, eights; - { - long b=A[i], c=A[i+1]; - long u=ones ^ b; - twosA=(ones & b)|( u & c); - ones=u^c; - } - { - long b=A[i+2], c=A[i+3]; - long u=ones^b; - twosB =(ones&b)|(u&c); - ones=u^c; - } - { - long u=twos^twosA; - foursA=(twos&twosA)|(u&twosB); - twos=u^twosB; - } - eights=fours&foursA; - fours=fours^foursA; - - tot8 += pop(eights); - i+=4; + /** Returns the number of set bits in an array of longs. */ + public static long pop_array(long[] arr, int wordOffset, int numWords) { + long popCount = 0; + for (int i = wordOffset, end = wordOffset + numWords; i < end; ++i) { + popCount += Long.bitCount(arr[i]); } - - if (i<=n-2) { - long b=A[i], c=A[i+1]; - long u=ones ^ b; - long twosA=(ones & b)|( u & c); - ones=u^c; - - long foursA=twos&twosA; - twos=twos^twosA; - - long eights=fours&foursA; - fours=fours^foursA; - - tot8 += pop(eights); - i+=2; - } - - if (i>= 1 - return i - print ','.join([ str(ntz(i)) for i in range(256) ]) - ***/ - /** table of number of trailing zeros in a byte */ - public static final byte[] ntzTable = {8,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,7,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,6,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,5,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0,4,0,1,0,2,0,1,0,3,0,1,0,2,0,1,0}; - - - /** Returns number of trailing zeros in the 64 bit long value. */ - public static int ntz(long val) { - // A full binary search to determine the low byte was slower than - // a linear search for nextSetBit(). This is most likely because - // the implementation of nextSetBit() shifts bits to the right, increasing - // the probability that the first non-zero byte is in the rhs. - // - // This implementation does a single binary search at the top level only - // so that all other bit shifting can be done on ints instead of longs to - // remain friendly to 32 bit architectures. In addition, the case of a - // non-zero first byte is checked for first because it is the most common - // in dense bit arrays. - - int lower = (int)val; - int lowByte = lower & 0xff; - if (lowByte != 0) return ntzTable[lowByte]; - - if (lower!=0) { - lowByte = (lower>>>8) & 0xff; - if (lowByte != 0) return ntzTable[lowByte] + 8; - lowByte = (lower>>>16) & 0xff; - if (lowByte != 0) return ntzTable[lowByte] + 16; - // no need to mask off low byte for the last byte in the 32 bit word - // no need to check for zero on the last byte either. - return ntzTable[lower>>>24] + 24; - } else { - // grab upper 32 bits - int upper=(int)(val>>32); - lowByte = upper & 0xff; - if (lowByte != 0) return ntzTable[lowByte] + 32; - lowByte = (upper>>>8) & 0xff; - if (lowByte != 0) return ntzTable[lowByte] + 40; - lowByte = (upper>>>16) & 0xff; - if (lowByte != 0) return ntzTable[lowByte] + 48; - // no need to mask off low byte for the last byte in the 32 bit word - // no need to check for zero on the last byte either. - return ntzTable[upper>>>24] + 56; - } - } - - /** returns 0 based index of first set bit - * (only works for x!=0) - *
    This is an alternate implementation of ntz() - */ - public static int ntz2(long x) { - int n = 0; - int y = (int)x; - if (y==0) {n+=32; y = (int)(x>>>32); } // the only 64 bit shift necessary - if ((y & 0x0000FFFF) == 0) { n+=16; y>>>=16; } - if ((y & 0x000000FF) == 0) { n+=8; y>>>=8; } - return (ntzTable[ y & 0xff ]) + n; - } - - /** returns 0 based index of first set bit - *
    This is an alternate implementation of ntz() - */ - public static int ntz3(long x) { - // another implementation taken from Hackers Delight, extended to 64 bits - // and converted to Java. - // Many 32 bit ntz algorithms are at http://www.hackersdelight.org/HDcode/ntz.cc - int n = 1; - - // do the first step as a long, all others as ints. - int y = (int)x; - if (y==0) {n+=32; y = (int)(x>>>32); } - if ((y & 0x0000FFFF) == 0) { n+=16; y>>>=16; } - if ((y & 0x000000FF) == 0) { n+=8; y>>>=8; } - if ((y & 0x0000000F) == 0) { n+=4; y>>>=4; } - if ((y & 0x00000003) == 0) { n+=2; y>>>=2; } - return n - (y & 1); - } - - - /** returns true if v is a power of two or zero*/ - public static boolean isPowerOfTwo(int v) { - return ((v & (v-1)) == 0); - } - - /** returns true if v is a power of two or zero*/ - public static boolean isPowerOfTwo(long v) { - return ((v & (v-1)) == 0); - } - /** returns the next highest power of two, or the current value if it's already a power of two or zero*/ public static int nextHighestPowerOfTwo(int v) { v--; @@ -796,4 +187,29 @@ return v; } + /** Same as {@link #zigZagEncode(long)} but on integers. */ + public static int zigZagEncode(int i) { + return (i >> 31) ^ (i << 1); + } + + /** + *
    Zig-zag + * encode the provided long. Assuming the input is a signed long whose + * absolute value can be stored on n bits, the returned value will + * be an unsigned long that can be stored on n+1 bits. + */ + public static long zigZagEncode(long l) { + return (l >> 63) ^ (l << 1); + } + + /** Decode an int previously encoded with {@link #zigZagEncode(int)}. */ + public static int zigZagDecode(int i) { + return ((i >>> 1) ^ -(i & 1)); + } + + /** Decode a long previously encoded with {@link #zigZagEncode(long)}. */ + public static long zigZagDecode(long l) { + return ((l >>> 1) ^ -(l & 1)); + } + } Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BitVector.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Bits.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BroadWord.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/ByteBlockPool.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BytesRef.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BytesRefArray.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BytesRefBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BytesRefHash.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/BytesRefIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/CharsRef.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/CharsRefBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/CloseableThreadLocal.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/CloseableThreadLocal.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/CloseableThreadLocal.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/CloseableThreadLocal.java 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,10 +17,12 @@ * limitations under the License. */ -import java.util.Map; -import java.util.HashMap; -import java.util.Iterator; +import java.io.Closeable; import java.lang.ref.WeakReference; +import java.util.Iterator; +import java.util.Map; +import java.util.WeakHashMap; +import java.util.concurrent.atomic.AtomicInteger; /** Java's builtin ThreadLocal has a serious flaw: * it can take an arbitrarily long amount of time to @@ -33,65 +35,110 @@ * While not technically a memory leak, because eventually * the memory will be reclaimed, it can take a long time * and you can easily hit OutOfMemoryError because from the - * GC's standpoint the stale entries are not reclaimaible. + * GC's standpoint the stale entries are not reclaimable. * * This class works around that, by only enrolling * WeakReference values into the ThreadLocal, and * separately holding a hard reference to each stored * value. When you call {@link #close}, these hard * references are cleared and then GC is freely able to - * reclaim space by objects stored in it. */ + * reclaim space by objects stored in it. + * + * We can not rely on {@link ThreadLocal#remove()} as it + * only removes the value for the caller thread, whereas + * {@link #close} takes care of all + * threads. You should not call {@link #close} until all + * threads are done using the instance. + * + * @lucene.internal + */ -public class CloseableThreadLocal { +public class CloseableThreadLocal implements Closeable { - private ThreadLocal t = new ThreadLocal(); + private ThreadLocal> t = new ThreadLocal<>(); - private Map hardRefs = new HashMap(); + // Use a WeakHashMap so that if a Thread exits and is + // GC'able, its entry may be removed: + private Map hardRefs = new WeakHashMap<>(); - protected Object initialValue() { + // Increase this to decrease frequency of purging in get: + private static int PURGE_MULTIPLIER = 20; + + // On each get or set we decrement this; when it hits 0 we + // purge. After purge, we set this to + // PURGE_MULTIPLIER * stillAliveCount. This keeps + // amortized cost of purging linear. + private final AtomicInteger countUntilPurge = new AtomicInteger(PURGE_MULTIPLIER); + + protected T initialValue() { return null; } - public Object get() { - WeakReference weakRef = (WeakReference) t.get(); + public T get() { + WeakReference weakRef = t.get(); if (weakRef == null) { - Object iv = initialValue(); + T iv = initialValue(); if (iv != null) { set(iv); return iv; - } else + } else { return null; + } } else { - Object v = weakRef.get(); - // This can never be null, because we hold a hard - // reference to the underlying object: - assert v != null; - return v; + maybePurge(); + return weakRef.get(); } } - public void set(Object object) { + public void set(T object) { - t.set(new WeakReference(object)); + t.set(new WeakReference<>(object)); synchronized(hardRefs) { hardRefs.put(Thread.currentThread(), object); + maybePurge(); + } + } - // Purge dead threads - Iterator it = hardRefs.keySet().iterator(); - while(it.hasNext()) { - Thread t = (Thread) it.next(); - if (!t.isAlive()) + private void maybePurge() { + if (countUntilPurge.getAndDecrement() == 0) { + purge(); + } + } + + // Purge dead threads + private void purge() { + synchronized(hardRefs) { + int stillAliveCount = 0; + for (Iterator it = hardRefs.keySet().iterator(); it.hasNext();) { + final Thread t = it.next(); + if (!t.isAlive()) { it.remove(); + } else { + stillAliveCount++; + } } + int nextCount = (1+stillAliveCount) * PURGE_MULTIPLIER; + if (nextCount <= 0) { + // defensive: int overflow! + nextCount = 1000000; + } + + countUntilPurge.set(nextCount); } } + @Override public void close() { // Clear the hard refs; then, the only remaining refs to // all values we were storing are weak (unless somewhere // else is still using them) and so GC may reclaim them: hardRefs = null; + // Take care of the current thread right now; others will be + // taken care of via the WeakReferences. + if (t != null) { + t.remove(); + } t = null; } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/CollectionUtil.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/CommandLineUtil.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/Constants.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/Constants.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/Constants.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/Constants.java 16 Dec 2014 11:31:32 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,31 +17,107 @@ * limitations under the License. */ +import java.lang.reflect.Field; +import java.util.StringTokenizer; + + /** * Some useful constants. - * - * - * @version $Id$ **/ public final class Constants { - private Constants() {} // can't construct + private Constants() {} // can't construct - /** The value of System.getProperty("java.version"). **/ + /** JVM vendor info. */ + public static final String JVM_VENDOR = System.getProperty("java.vm.vendor"); + public static final String JVM_VERSION = System.getProperty("java.vm.version"); + public static final String JVM_NAME = System.getProperty("java.vm.name"); + public static final String JVM_SPEC_VERSION = System.getProperty("java.specification.version"); + + /** The value of System.getProperty("java.version"). **/ public static final String JAVA_VERSION = System.getProperty("java.version"); - /** True iff this is Java version 1.1. */ - public static final boolean JAVA_1_1 = JAVA_VERSION.startsWith("1.1."); - /** True iff this is Java version 1.2. */ - public static final boolean JAVA_1_2 = JAVA_VERSION.startsWith("1.2."); - /** True iff this is Java version 1.3. */ - public static final boolean JAVA_1_3 = JAVA_VERSION.startsWith("1.3."); - /** The value of System.getProperty("os.name"). **/ + /** The value of System.getProperty("os.name"). **/ public static final String OS_NAME = System.getProperty("os.name"); /** True iff running on Linux. */ public static final boolean LINUX = OS_NAME.startsWith("Linux"); /** True iff running on Windows. */ public static final boolean WINDOWS = OS_NAME.startsWith("Windows"); /** True iff running on SunOS. */ public static final boolean SUN_OS = OS_NAME.startsWith("SunOS"); + /** True iff running on Mac OS X */ + public static final boolean MAC_OS_X = OS_NAME.startsWith("Mac OS X"); + /** True iff running on FreeBSD */ + public static final boolean FREE_BSD = OS_NAME.startsWith("FreeBSD"); + + public static final String OS_ARCH = System.getProperty("os.arch"); + public static final String OS_VERSION = System.getProperty("os.version"); + public static final String JAVA_VENDOR = System.getProperty("java.vendor"); + + private static final int JVM_MAJOR_VERSION; + private static final int JVM_MINOR_VERSION; + + /** True iff running on a 64bit JVM */ + public static final boolean JRE_IS_64BIT; + + static { + final StringTokenizer st = new StringTokenizer(JVM_SPEC_VERSION, "."); + JVM_MAJOR_VERSION = Integer.parseInt(st.nextToken()); + if (st.hasMoreTokens()) { + JVM_MINOR_VERSION = Integer.parseInt(st.nextToken()); + } else { + JVM_MINOR_VERSION = 0; + } + boolean is64Bit = false; + try { + final Class unsafeClass = Class.forName("sun.misc.Unsafe"); + final Field unsafeField = unsafeClass.getDeclaredField("theUnsafe"); + unsafeField.setAccessible(true); + final Object unsafe = unsafeField.get(null); + final int addressSize = ((Number) unsafeClass.getMethod("addressSize") + .invoke(unsafe)).intValue(); + //System.out.println("Address size: " + addressSize); + is64Bit = addressSize >= 8; + } catch (Exception e) { + final String x = System.getProperty("sun.arch.data.model"); + if (x != null) { + is64Bit = x.indexOf("64") != -1; + } else { + if (OS_ARCH != null && OS_ARCH.indexOf("64") != -1) { + is64Bit = true; + } else { + is64Bit = false; + } + } + } + JRE_IS_64BIT = is64Bit; + } + + public static final boolean JRE_IS_MINIMUM_JAVA8 = JVM_MAJOR_VERSION > 1 || (JVM_MAJOR_VERSION == 1 && JVM_MINOR_VERSION >= 8); + public static final boolean JRE_IS_MINIMUM_JAVA9 = JVM_MAJOR_VERSION > 1 || (JVM_MAJOR_VERSION == 1 && JVM_MINOR_VERSION >= 9); + + /** @deprecated With Lucene 4.0, we are always on Java 6 */ + @Deprecated + public static final boolean JRE_IS_MINIMUM_JAVA6 = + new Boolean(true).booleanValue(); // prevent inlining in foreign class files + + /** @deprecated With Lucene 4.8, we are always on Java 7 */ + @Deprecated + public static final boolean JRE_IS_MINIMUM_JAVA7 = + new Boolean(true).booleanValue(); // prevent inlining in foreign class files + + /** + * This is the internal Lucene version, including bugfix versions, recorded into each segment. + * @deprecated Use {@link Version#LATEST} + */ + @Deprecated + public static final String LUCENE_MAIN_VERSION = Version.LATEST.toString(); + + /** + * Don't use this constant because the name is not self-describing! + * @deprecated Use {@link Version#LATEST} + */ + @Deprecated + public static final String LUCENE_VERSION = Version.LATEST.toString(); + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Counter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/DocIdBitSet.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/DocIdBitSet.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/DocIdBitSet.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/DocIdBitSet.java 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,29 +18,66 @@ */ import java.util.BitSet; + import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; /** Simple DocIdSet and DocIdSetIterator backed by a BitSet */ -public class DocIdBitSet extends DocIdSet { - private BitSet bitSet; +public class DocIdBitSet extends DocIdSet implements Bits { + + private static final long BASE_RAM_BYTES_USED = + RamUsageEstimator.shallowSizeOfInstance(DocIdBitSet.class) + + RamUsageEstimator.shallowSizeOfInstance(BitSet.class) + + RamUsageEstimator.NUM_BYTES_ARRAY_HEADER; // the array that stores the bits + + private final BitSet bitSet; public DocIdBitSet(BitSet bitSet) { this.bitSet = bitSet; } + @Override public DocIdSetIterator iterator() { return new DocIdBitSetIterator(bitSet); } + + @Override + public Bits bits() { + return this; + } + + /** This DocIdSet implementation is cacheable. */ + @Override + public boolean isCacheable() { + return true; + } /** * Returns the underlying BitSet. */ public BitSet getBitSet() { - return this.bitSet; + return this.bitSet; } + @Override + public boolean get(int index) { + return bitSet.get(index); + } + + @Override + public int length() { + // the size may not be correct... + return bitSet.size(); + } + + @Override + public long ramBytesUsed() { + // unfortunately this is likely underestimated if the Bitset implementation + // over-sizes the array that stores the bits + return BASE_RAM_BYTES_USED + (bitSet.size() + 7) >>> 3; + } + private static class DocIdBitSetIterator extends DocIdSetIterator { private int docId; private BitSet bitSet; @@ -50,28 +87,32 @@ this.docId = -1; } - public int doc() { - assert docId != -1; + @Override + public int docID() { return docId; } - public boolean next() { + @Override + public int nextDoc() { // (docId + 1) on next line requires -1 initial value for docNr: - return checkNextDocId(bitSet.nextSetBit(docId + 1)); + int d = bitSet.nextSetBit(docId + 1); + // -1 returned by BitSet.nextSetBit() when exhausted + docId = d == -1 ? NO_MORE_DOCS : d; + return docId; } - public boolean skipTo(int skipDocNr) { - return checkNextDocId( bitSet.nextSetBit(skipDocNr)); + @Override + public int advance(int target) { + int d = bitSet.nextSetBit(target); + // -1 returned by BitSet.nextSetBit() when exhausted + docId = d == -1 ? NO_MORE_DOCS : d; + return docId; } - - private boolean checkNextDocId(int d) { - if (d == -1) { // -1 returned by BitSet.nextSetBit() when exhausted - docId = Integer.MAX_VALUE; - return false; - } else { - docId = d; - return true; - } + + @Override + public long cost() { + // upper bound + return bitSet.length(); } } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/DoubleBarrelLRUCache.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/FieldCacheSanityChecker.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/FilterIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/FixedBitSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/GrowableByteArrayDataOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/IOUtils.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/InPlaceMergeSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/IndexableBinaryStringTools.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/InfoStream.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/IntBlockPool.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/IntroSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/IntsRef.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/IntsRefBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/LongBitSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/LongValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/LongsRef.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/MapOfSets.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/MathUtil.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/MergedIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/MutableBits.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/NamedSPILoader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/NamedThreadFactory.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/NumericUtils.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/OfflineSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSet.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSet.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSet.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSet.java 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,7 +18,6 @@ package org.apache.lucene.util; import java.util.Arrays; -import java.io.Serializable; import org.apache.lucene.search.DocIdSet; import org.apache.lucene.search.DocIdSetIterator; @@ -73,49 +72,73 @@ 1% full 2.51 3.49   1.00   1.02 - - * @version $Id$ */ -public class OpenBitSet extends DocIdSet implements Cloneable, Serializable { +public class OpenBitSet extends DocIdSet implements Bits, Cloneable { + + private static final long BASE_RAM_BYTES_USED = RamUsageEstimator.shallowSizeOfInstance(OpenBitSet.class); + protected long[] bits; protected int wlen; // number of words (elements) used in the array - /** Constructs an OpenBitSet large enough to hold numBits. - * - * @param numBits - */ + // Used only for assert: + private long numBits; + + /** Constructs an OpenBitSet large enough to hold {@code numBits}. */ public OpenBitSet(long numBits) { + this.numBits = numBits; bits = new long[bits2words(numBits)]; wlen = bits.length; } + /** Constructor: allocates enough space for 64 bits. */ public OpenBitSet() { this(64); } - /** Constructs an OpenBitSet from an existing long[]. - *
    - * The first 64 bits are in long[0], - * with bit index 0 at the least significant bit, and bit index 63 at the most significant. - * Given a bit index, - * the word containing it is long[index/64], and it is at bit number index%64 within that word. + /** + * Constructs an OpenBitSet from an existing long[]. *

    - * numWords are the number of elements in the array that contain - * set bits (non-zero longs). - * numWords should be <= bits.length, and - * any existing words in the array at position >= numWords should be zero. - * + * The first 64 bits are in long[0], with bit index 0 at the least significant + * bit, and bit index 63 at the most significant. Given a bit index, the word + * containing it is long[index/64], and it is at bit number index%64 within + * that word. + *

    + * numWords are the number of elements in the array that contain set bits + * (non-zero longs). numWords should be <= bits.length, and any existing + * words in the array at position >= numWords should be zero. + * */ public OpenBitSet(long[] bits, int numWords) { + if (numWords > bits.length) { + throw new IllegalArgumentException("numWords cannot exceed bits.length"); + } this.bits = bits; this.wlen = numWords; + this.numBits = wlen * 64; } + @Override public DocIdSetIterator iterator() { return new OpenBitSetIterator(bits, wlen); } + @Override + public Bits bits() { + return this; + } + + /** This DocIdSet implementation is cacheable. */ + @Override + public boolean isCacheable() { + return true; + } + + @Override + public long ramBytesUsed() { + return BASE_RAM_BYTES_USED + RamUsageEstimator.sizeOf(bits); + } + /** Returns the current capacity in bits (1 greater than the index of the last bit) */ public long capacity() { return bits.length << 6; } @@ -124,35 +147,32 @@ * compatibility. This is *not* equal to {@link #cardinality} */ public long size() { - return capacity(); + return capacity(); } + @Override + public int length() { + return bits.length << 6; + } + /** Returns true if there are no set bits */ public boolean isEmpty() { return cardinality()==0; } /** Expert: returns the long[] storing the bits */ public long[] getBits() { return bits; } - /** Expert: sets a new long[] to use as the bit storage */ - public void setBits(long[] bits) { this.bits = bits; } - /** Expert: gets the number of longs in the array that are in use */ public int getNumWords() { return wlen; } - /** Expert: sets the number of longs in the array that are in use */ - public void setNumWords(int nWords) { this.wlen=nWords; } - - - /** Returns true or false for the specified bit index. */ + @Override public boolean get(int index) { int i = index >> 6; // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an explicit check. if (i>=bits.length) return false; - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; return (bits[i] & bitmask) != 0; } @@ -161,11 +181,11 @@ * The index should be less than the OpenBitSet size */ public boolean fastGet(int index) { + assert index >= 0 && index < numBits; int i = index >> 6; // div 64 // signed shift will keep a negative index and force an // array-index-out-of-bounds-exception, removing the need for an explicit check. - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; return (bits[i] & bitmask) != 0; } @@ -176,18 +196,17 @@ public boolean get(long index) { int i = (int)(index >> 6); // div 64 if (i>=bits.length) return false; - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; return (bits[i] & bitmask) != 0; } /** Returns true or false for the specified bit index. * The index should be less than the OpenBitSet size. */ public boolean fastGet(long index) { + assert index >= 0 && index < numBits; int i = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; return (bits[i] & bitmask) != 0; } @@ -208,9 +227,9 @@ * The index should be less than the OpenBitSet size */ public int getBit(int index) { + assert index >= 0 && index < numBits; int i = index >> 6; // div 64 - int bit = index & 0x3f; // mod 64 - return ((int)(bits[i]>>>bit)) & 0x01; + return ((int)(bits[i]>>>index)) & 0x01; } @@ -226,8 +245,7 @@ /** sets a bit, expanding the set size if necessary */ public void set(long index) { int wordNum = expandingWordNum(index); - int bit = (int)index & 0x3f; - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] |= bitmask; } @@ -236,19 +254,19 @@ * The index should be less than the OpenBitSet size. */ public void fastSet(int index) { + assert index >= 0 && index < numBits; int wordNum = index >> 6; // div 64 - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] |= bitmask; } /** Sets the bit at the specified index. * The index should be less than the OpenBitSet size. */ public void fastSet(long index) { + assert index >= 0 && index < numBits; int wordNum = (int)(index >> 6); - int bit = (int)index & 0x3f; - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] |= bitmask; } @@ -264,7 +282,7 @@ // since endIndex is one past the end, this is index of the last // word to be changed. - int endWord = expandingWordNum(endIndex-1); + int endWord = expandingWordNum(endIndex-1); long startmask = -1L << startIndex; long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap @@ -279,25 +297,21 @@ bits[endWord] |= endmask; } - - protected int expandingWordNum(long index) { int wordNum = (int)(index >> 6); - if (wordNum>=wlen) { - ensureCapacity(index+1); - wlen = wordNum+1; + if (wordNum >= wlen) { + ensureCapacity(index + 1); } return wordNum; } - /** clears a bit. * The index should be less than the OpenBitSet size. */ public void fastClear(int index) { + assert index >= 0 && index < numBits; int wordNum = index >> 6; - int bit = index & 0x03f; - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] &= ~bitmask; // hmmm, it takes one more instruction to clear than it does to set... any // way to work around this? If there were only 63 bits per word, we could @@ -312,18 +326,17 @@ * The index should be less than the OpenBitSet size. */ public void fastClear(long index) { + assert index >= 0 && index < numBits; int wordNum = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] &= ~bitmask; } /** clears a bit, allowing access beyond the current set size without changing the size.*/ public void clear(long index) { int wordNum = (int)(index >> 6); // div 64 if (wordNum>=wlen) return; - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] &= ~bitmask; } @@ -332,6 +345,43 @@ * @param startIndex lower index * @param endIndex one-past the last bit to clear */ + public void clear(int startIndex, int endIndex) { + if (endIndex <= startIndex) return; + + int startWord = (startIndex>>6); + if (startWord >= wlen) return; + + // since endIndex is one past the end, this is index of the last + // word to be changed. + int endWord = ((endIndex-1)>>6); + + long startmask = -1L << startIndex; + long endmask = -1L >>> -endIndex; // 64-(endIndex&0x3f) is the same as -endIndex due to wrap + + // invert masks since we are clearing + startmask = ~startmask; + endmask = ~endmask; + + if (startWord == endWord) { + bits[startWord] &= (startmask | endmask); + return; + } + + bits[startWord] &= startmask; + + int middle = Math.min(wlen, endWord); + Arrays.fill(bits, startWord+1, middle, 0L); + if (endWord < wlen) { + bits[endWord] &= endmask; + } + } + + + /** Clears a range of bits. Clearing past the end does not change the size of the set. + * + * @param startIndex lower index + * @param endIndex one-past the last bit to clear + */ public void clear(long startIndex, long endIndex) { if (endIndex <= startIndex) return; @@ -369,9 +419,9 @@ * The index should be less than the OpenBitSet size. */ public boolean getAndSet(int index) { + assert index >= 0 && index < numBits; int wordNum = index >> 6; // div 64 - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; boolean val = (bits[wordNum] & bitmask) != 0; bits[wordNum] |= bitmask; return val; @@ -381,9 +431,9 @@ * The index should be less than the OpenBitSet size. */ public boolean getAndSet(long index) { + assert index >= 0 && index < numBits; int wordNum = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; boolean val = (bits[wordNum] & bitmask) != 0; bits[wordNum] |= bitmask; return val; @@ -393,37 +443,36 @@ * The index should be less than the OpenBitSet size. */ public void fastFlip(int index) { + assert index >= 0 && index < numBits; int wordNum = index >> 6; // div 64 - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] ^= bitmask; } /** flips a bit. * The index should be less than the OpenBitSet size. */ public void fastFlip(long index) { + assert index >= 0 && index < numBits; int wordNum = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] ^= bitmask; } /** flips a bit, expanding the set size if necessary */ public void flip(long index) { int wordNum = expandingWordNum(index); - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] ^= bitmask; } /** flips a bit and returns the resulting bit value. * The index should be less than the OpenBitSet size. */ public boolean flipAndGet(int index) { + assert index >= 0 && index < numBits; int wordNum = index >> 6; // div 64 - int bit = index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] ^= bitmask; return (bits[wordNum] & bitmask) != 0; } @@ -432,9 +481,9 @@ * The index should be less than the OpenBitSet size. */ public boolean flipAndGet(long index) { + assert index >= 0 && index < numBits; int wordNum = (int)(index >> 6); // div 64 - int bit = (int)index & 0x3f; // mod 64 - long bitmask = 1L << bit; + long bitmask = 1L << index; bits[wordNum] ^= bitmask; return (bits[wordNum] & bitmask) != 0; } @@ -446,12 +495,11 @@ */ public void flip(long startIndex, long endIndex) { if (endIndex <= startIndex) return; - int oldlen = wlen; int startWord = (int)(startIndex>>6); // since endIndex is one past the end, this is index of the last // word to be changed. - int endWord = expandingWordNum(endIndex-1); + int endWord = expandingWordNum(endIndex-1); /*** Grrr, java shifting wraps around so -1L>>>64 == -1 * for that reason, make sure not to use endmask if the bits to flip will @@ -562,12 +610,12 @@ long word = bits[i] >> subIndex; // skip all the bits to the right of index if (word!=0) { - return (i<<6) + subIndex + BitUtil.ntz(word); + return (i<<6) + subIndex + Long.numberOfTrailingZeros(word); } while(++i < wlen) { word = bits[i]; - if (word!=0) return (i<<6) + BitUtil.ntz(word); + if (word!=0) return (i<<6) + Long.numberOfTrailingZeros(word); } return -1; @@ -583,24 +631,89 @@ long word = bits[i] >>> subIndex; // skip all the bits to the right of index if (word!=0) { - return (((long)i)<<6) + (subIndex + BitUtil.ntz(word)); + return (((long)i)<<6) + (subIndex + Long.numberOfTrailingZeros(word)); } while(++i < wlen) { word = bits[i]; - if (word!=0) return (((long)i)<<6) + BitUtil.ntz(word); + if (word!=0) return (((long)i)<<6) + Long.numberOfTrailingZeros(word); } return -1; } + /** Returns the index of the first set bit starting downwards at + * the index specified. + * -1 is returned if there are no more set bits. + */ + public int prevSetBit(int index) { + int i = index >> 6; + final int subIndex; + long word; + if (i >= wlen) { + i = wlen - 1; + if (i < 0) return -1; + subIndex = 63; // last possible bit + word = bits[i]; + } else { + if (i < 0) return -1; + subIndex = index & 0x3f; // index within the word + word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index + } + if (word != 0) { + return (i << 6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197 + } - public Object clone() { + while (--i >= 0) { + word = bits[i]; + if (word !=0 ) { + return (i << 6) + 63 - Long.numberOfLeadingZeros(word); + } + } + + return -1; + } + + /** Returns the index of the first set bit starting downwards at + * the index specified. + * -1 is returned if there are no more set bits. + */ + public long prevSetBit(long index) { + int i = (int) (index >> 6); + final int subIndex; + long word; + if (i >= wlen) { + i = wlen - 1; + if (i < 0) return -1; + subIndex = 63; // last possible bit + word = bits[i]; + } else { + if (i < 0) return -1; + subIndex = (int)index & 0x3f; // index within the word + word = (bits[i] << (63-subIndex)); // skip all the bits to the left of index + } + + if (word != 0) { + return (((long)i)<<6) + subIndex - Long.numberOfLeadingZeros(word); // See LUCENE-3197 + } + + while (--i >= 0) { + word = bits[i]; + if (word !=0 ) { + return (((long)i)<<6) + 63 - Long.numberOfLeadingZeros(word); + } + } + + return -1; + } + + @Override + public OpenBitSet clone() { try { OpenBitSet obs = (OpenBitSet)super.clone(); - obs.bits = (long[]) obs.bits.clone(); // hopefully an array clone is as fast(er) than arraycopy + obs.bits = obs.bits.clone(); // hopefully an array clone is as fast(er) than arraycopy return obs; } catch (CloneNotSupportedException e) { throw new RuntimeException(e); @@ -628,6 +741,7 @@ public void union(OpenBitSet other) { int newLen = Math.max(wlen,other.wlen); ensureCapacityWords(newLen); + assert (numBits = Math.max(other.numBits, numBits)) >= 0; long[] thisArr = this.bits; long[] otherArr = other.bits; @@ -656,6 +770,7 @@ public void xor(OpenBitSet other) { int newLen = Math.max(wlen,other.wlen); ensureCapacityWords(newLen); + assert (numBits = Math.max(other.numBits, numBits)) >= 0; long[] thisArr = this.bits; long[] otherArr = other.bits; @@ -698,24 +813,22 @@ return false; } - - - /** Expand the long[] with the size given as a number of words (64 bit longs). - * getNumWords() is unchanged by this call. - */ + /** Expand the long[] with the size given as a number of words (64 bit longs). */ public void ensureCapacityWords(int numWords) { - if (bits.length < numWords) { - long[] newBits = new long[numWords]; - System.arraycopy(bits,0,newBits,0,wlen); - bits = newBits; - } + bits = ArrayUtil.grow(bits, numWords); + wlen = numWords; + assert (this.numBits = Math.max(this.numBits, numWords << 6)) >= 0; } - /** Ensure that the long[] is big enough to hold numBits, expanding it if necessary. - * getNumWords() is unchanged by this call. + /** + * Ensure that the long[] is big enough to hold numBits, expanding it if + * necessary. */ public void ensureCapacity(long numBits) { ensureCapacityWords(bits2words(numBits)); + // ensureCapacityWords sets numBits to a multiple of 64, but we want to set + // it to exactly what the app asked. + assert (this.numBits = Math.max(this.numBits, numBits)) >= 0; } /** Lowers numWords, the number of words in use, @@ -729,11 +842,11 @@ /** returns the number of 64 bit words it would take to hold numBits */ public static int bits2words(long numBits) { - return (int)(((numBits-1)>>>6)+1); + return (int)(((numBits-1)>>>6)+1); } - /** returns true if both sets have the same bits set */ + @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof OpenBitSet)) return false; @@ -759,15 +872,19 @@ } + @Override public int hashCode() { - long h = 0x98761234; // something non-zero for length==0 - for (int i = bits.length; --i>=0;) { + // Start with a zero hash and use a mix that results in zero if the input is zero. + // This effectively truncates trailing zeros without an explicit check. + long h = 0; + for (int i = bits.length; --i>=0;) { h ^= bits[i]; h = (h << 1) | (h >>> 63); // rotate left } - return (int)((h>>32) ^ h); // fold leftmost bits into right + // fold leftmost bits into right and add a constant to prevent + // empty sets from returning 0, which is too common. + return (int)((h>>32) ^ h) + 0x98761234; } - } Index: 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetDISI.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetDISI.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetDISI.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetDISI.java 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -19,7 +19,9 @@ import java.io.IOException; import org.apache.lucene.search.DocIdSetIterator; - + +/** OpenBitSet with added methods to bulk-update the bits + * from a {@link DocIdSetIterator}. */ public class OpenBitSetDISI extends OpenBitSet { /** Construct an OpenBitSetDISI with its bits set @@ -47,8 +49,10 @@ * constructor. */ public void inPlaceOr(DocIdSetIterator disi) throws IOException { - while (disi.next() && (disi.doc() < size())) { - fastSet(disi.doc()); + int doc; + long size = size(); + while ((doc = disi.nextDoc()) < size) { + fastSet(doc); } } @@ -59,20 +63,15 @@ * constructor. */ public void inPlaceAnd(DocIdSetIterator disi) throws IOException { - int index = nextSetBit(0); - int lastNotCleared = -1; - while ((index != -1) && disi.skipTo(index)) { - while ((index != -1) && (index < disi.doc())) { - fastClear(index); - index = nextSetBit(index + 1); - } - if (index == disi.doc()) { - lastNotCleared = index; - index++; - } - assert (index == -1) || (index > disi.doc()); + int bitSetDoc = nextSetBit(0); + int disiDoc; + while (bitSetDoc != -1 && (disiDoc = disi.advance(bitSetDoc)) != DocIdSetIterator.NO_MORE_DOCS) { + clear(bitSetDoc, disiDoc); + bitSetDoc = nextSetBit(disiDoc + 1); } - clear(lastNotCleared+1, size()); + if (bitSetDoc != -1) { + clear(bitSetDoc, size()); + } } /** @@ -82,8 +81,10 @@ * constructor. */ public void inPlaceNot(DocIdSetIterator disi) throws IOException { - while (disi.next() && (disi.doc() < size())) { - fastClear(disi.doc()); + int doc; + long size = size(); + while ((doc = disi.nextDoc()) < size) { + fastClear(doc); } } @@ -94,8 +95,10 @@ * constructor. */ public void inPlaceXor(DocIdSetIterator disi) throws IOException { - while (disi.next() && (disi.doc() < size())) { - fastFlip(disi.doc()); + int doc; + long size = size(); + while ((doc = disi.nextDoc()) < size) { + fastFlip(doc); } } } Index: 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetIterator.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetIterator.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetIterator.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/OpenBitSetIterator.java 16 Dec 2014 11:31:32 -0000 1.1.2.1 @@ -1,4 +1,4 @@ -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,53 +17,26 @@ package org.apache.lucene.util; -import java.io.IOException; - import org.apache.lucene.search.DocIdSetIterator; /** An iterator to iterate over set bits in an OpenBitSet. * This is faster than nextSetBit() for iterating over the complete set of bits, * especially when the density of the bits set is high. - * - * @version $Id$ */ public class OpenBitSetIterator extends DocIdSetIterator { - // The General Idea: instead of having an array per byte that has - // the offsets of the next set bit, that array could be - // packed inside a 32 bit integer (8 4 bit numbers). That - // should be faster than accessing an array for each index, and - // the total array size is kept smaller (256*sizeof(int))=1K - protected final static int[] bitlist={ - 0x0,0x1,0x2,0x21,0x3,0x31,0x32,0x321,0x4,0x41,0x42,0x421,0x43,0x431,0x432,0x4321,0x5,0x51,0x52,0x521,0x53,0x531,0x532,0x5321,0x54,0x541,0x542,0x5421,0x543,0x5431,0x5432,0x54321,0x6,0x61,0x62,0x621,0x63,0x631,0x632,0x6321,0x64,0x641,0x642,0x6421,0x643,0x6431,0x6432,0x64321,0x65,0x651,0x652,0x6521,0x653,0x6531,0x6532,0x65321,0x654,0x6541,0x6542,0x65421,0x6543,0x65431,0x65432,0x654321,0x7,0x71,0x72,0x721,0x73,0x731,0x732,0x7321,0x74,0x741,0x742,0x7421,0x743,0x7431,0x7432,0x74321,0x75,0x751,0x752,0x7521,0x753,0x7531,0x7532,0x75321,0x754,0x7541,0x7542,0x75421,0x7543,0x75431,0x75432,0x754321,0x76,0x761,0x762,0x7621,0x763,0x7631,0x7632,0x76321,0x764,0x7641,0x7642,0x76421,0x7643,0x76431,0x76432,0x764321,0x765,0x7651,0x7652,0x76521,0x7653,0x76531,0x76532,0x765321,0x7654,0x76541,0x76542,0x765421,0x76543,0x765431,0x765432,0x7654321,0x8,0x81,0x82,0x821,0x83,0x831,0x832,0x8321,0x84,0x841,0x842,0x8421,0x843,0x8431,0x8432,0x84321,0x85,0x851,0x852,0x8521,0x853,0x8531,0x8532,0x85321,0x854,0x8541,0x8542,0x85421,0x8543,0x85431,0x85432,0x854321,0x86,0x861,0x862,0x8621,0x863,0x8631,0x8632,0x86321,0x864,0x8641,0x8642,0x86421,0x8643,0x86431,0x86432,0x864321,0x865,0x8651,0x8652,0x86521,0x8653,0x86531,0x86532,0x865321,0x8654,0x86541,0x86542,0x865421,0x86543,0x865431,0x865432,0x8654321,0x87,0x871,0x872,0x8721,0x873,0x8731,0x8732,0x87321,0x874,0x8741,0x8742,0x87421,0x8743,0x87431,0x87432,0x874321,0x875,0x8751,0x8752,0x87521,0x8753,0x87531,0x87532,0x875321,0x8754,0x87541,0x87542,0x875421,0x87543,0x875431,0x875432,0x8754321,0x876,0x8761,0x8762,0x87621,0x8763,0x87631,0x87632,0x876321,0x8764,0x87641,0x87642,0x876421,0x87643,0x876431,0x876432,0x8764321,0x8765,0x87651,0x87652,0x876521,0x87653,0x876531,0x876532,0x8765321,0x87654,0x876541,0x876542,0x8765421,0x876543,0x8765431,0x8765432,0x87654321 - }; - /***** the python code that generated bitlist - def bits2int(val): - arr=0 - for shift in range(8,0,-1): - if val & 0x80: - arr = (arr << 4) | shift - val = val << 1 - return arr - - def int_table(): - tbl = [ hex(bits2int(val)).strip('L') for val in range(256) ] - return ','.join(tbl) - ******/ - // hmmm, what about an iterator that finds zeros though, // or a reverse iterator... should they be separate classes // for efficiency, or have a common root interface? (or // maybe both? could ask for a SetBitsIterator, etc... - - private final long[] arr; - private final int words; + final long[] arr; + final int words; private int i=-1; private long word; private int wordShift; private int indexArray; - private int curDocId; + private int curDocId = -1; public OpenBitSetIterator(OpenBitSet obs) { this(obs.getBits(), obs.getNumWords()); @@ -79,7 +52,7 @@ if ((int)word ==0) {wordShift +=32; word = word >>>32; } if ((word & 0x0000FFFF) == 0) { wordShift +=16; word >>>=16; } if ((word & 0x000000FF) == 0) { wordShift +=8; word >>>=8; } - indexArray = bitlist[(int)word & 0xff]; + indexArray = BitUtil.bitList((byte) word); } /***** alternate shift implementations @@ -104,20 +77,20 @@ } ******/ - public boolean next() { - if (indexArray==0) { - if (word!=0) { + @Override + public int nextDoc() { + if (indexArray == 0) { + if (word != 0) { word >>>= 8; wordShift += 8; } - while (word==0) { + while (word == 0) { if (++i >= words) { - curDocId = -1; - return false; + return curDocId = NO_MORE_DOCS; } word = arr[i]; - wordShift =-1; // loop invariant code motion should move this + wordShift = -1; // loop invariant code motion should move this } // after the first time, should I go with a linear search, or @@ -129,31 +102,29 @@ indexArray >>>= 4; // should i<<6 be cached as a separate variable? // it would only save one cycle in the best circumstances. - curDocId = (i<<6) + bitIndex; - return true; + return curDocId = (i<<6) + bitIndex; } - - public boolean skipTo(int target) { - indexArray=0; + + @Override + public int advance(int target) { + indexArray = 0; i = target >> 6; - if (i>=words) { - word =0; // setup so next() will also return -1 - curDocId = -1; - return false; + if (i >= words) { + word = 0; // setup so next() will also return -1 + return curDocId = NO_MORE_DOCS; } wordShift = target & 0x3f; word = arr[i] >>> wordShift; - if (word !=0) { + if (word != 0) { wordShift--; // compensate for 1 based arrIndex } else { - while (word ==0) { + while (word == 0) { if (++i >= words) { - curDocId = -1; - return false; + return curDocId = NO_MORE_DOCS; } word = arr[i]; } - wordShift =-1; + wordShift = -1; } shift(); @@ -162,12 +133,16 @@ indexArray >>>= 4; // should i<<6 be cached as a separate variable? // it would only save one cycle in the best circumstances. - curDocId = (i<<6) + bitIndex; - return true; + return curDocId = (i<<6) + bitIndex; } + + @Override + public int docID() { + return curDocId; + } - public int doc() { - return this.curDocId; + @Override + public long cost() { + return words / 64; } - } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/PForDeltaDocIdSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/PagedBytes.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Parameter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/PrintStreamInfoStream.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/PriorityQueue.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/PriorityQueue.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/PriorityQueue.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/PriorityQueue.java 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -18,77 +18,148 @@ */ /** A PriorityQueue maintains a partial ordering of its elements such that the - least element can always be found in constant time. Put()'s and pop()'s - require log(size) time. */ -public abstract class PriorityQueue { - private int size; - private int maxSize; - protected Object[] heap; + * least element can always be found in constant time. Put()'s and pop()'s + * require log(size) time. + * + *

    NOTE: This class will pre-allocate a full array of + * length maxSize+1 if instantiated via the + * {@link #PriorityQueue(int,boolean)} constructor with + * prepopulate set to true. + * + * @lucene.internal +*/ +public abstract class PriorityQueue { + private int size = 0; + private final int maxSize; + private final T[] heap; - /** Determines the ordering of objects in this priority queue. Subclasses - must define this one method. */ - protected abstract boolean lessThan(Object a, Object b); + public PriorityQueue(int maxSize) { + this(maxSize, true); + } - /** Subclass constructors must call this. */ - protected final void initialize(int maxSize) { - size = 0; - int heapSize; - if (0 == maxSize) + public PriorityQueue(int maxSize, boolean prepopulate) { + final int heapSize; + if (0 == maxSize) { // We allocate 1 extra to avoid if statement in top() heapSize = 2; - else + } else { + // NOTE: we add +1 because all access to heap is + // 1-based not 0-based. heap[0] is unused. heapSize = maxSize + 1; - heap = new Object[heapSize]; + + if (heapSize > ArrayUtil.MAX_ARRAY_LENGTH) { + // Throw exception to prevent confusing OOME: + throw new IllegalArgumentException("maxSize must be <= " + (ArrayUtil.MAX_ARRAY_LENGTH-1) + "; got: " + maxSize); + } + } + // T is unbounded type, so this unchecked cast works always: + @SuppressWarnings("unchecked") final T[] h = (T[]) new Object[heapSize]; + this.heap = h; this.maxSize = maxSize; + + if (prepopulate) { + // If sentinel objects are supported, populate the queue with them + T sentinel = getSentinelObject(); + if (sentinel != null) { + heap[1] = sentinel; + for (int i = 2; i < heap.length; i++) { + heap[i] = getSentinelObject(); + } + size = maxSize; + } + } } + /** Determines the ordering of objects in this priority queue. Subclasses + * must define this one method. + * @return true iff parameter a is less than parameter b. + */ + protected abstract boolean lessThan(T a, T b); + /** - * Adds an Object to a PriorityQueue in log(size) time. - * If one tries to add more objects than maxSize from initialize - * a RuntimeException (ArrayIndexOutOfBound) is thrown. + * This method can be overridden by extending classes to return a sentinel + * object which will be used by the {@link PriorityQueue#PriorityQueue(int,boolean)} + * constructor to fill the queue, so that the code which uses that queue can always + * assume it's full and only change the top without attempting to insert any new + * object.
    + * + * Those sentinel values should always compare worse than any non-sentinel + * value (i.e., {@link #lessThan} should always favor the + * non-sentinel values).
    + * + * By default, this method returns false, which means the queue will not be + * filled with sentinel values. Otherwise, the value returned will be used to + * pre-populate the queue. Adds sentinel values to the queue.
    + * + * If this method is extended to return a non-null value, then the following + * usage pattern is recommended: + * + *

    +   * // extends getSentinelObject() to return a non-null value.
    +   * PriorityQueue<MyObject> pq = new MyQueue<MyObject>(numHits);
    +   * // save the 'top' element, which is guaranteed to not be null.
    +   * MyObject pqTop = pq.top();
    +   * <...>
    +   * // now in order to add a new element, which is 'better' than top (after 
    +   * // you've verified it is better), it is as simple as:
    +   * pqTop.change().
    +   * pqTop = pq.updateTop();
    +   * 
    + * + * NOTE: if this method returns a non-null value, it will be called by + * the {@link PriorityQueue#PriorityQueue(int,boolean)} constructor + * {@link #size()} times, relying on a new object to be returned and will not + * check if it's null again. Therefore you should ensure any call to this + * method creates a new instance and behaves consistently, e.g., it cannot + * return null if it previously returned non-null. + * + * @return the sentinel object to use to pre-populate the queue, or null if + * sentinel objects are not supported. */ - public final void put(Object element) { - size++; - heap[size] = element; - upHeap(); + protected T getSentinelObject() { + return null; } /** - * Adds element to the PriorityQueue in log(size) time if either - * the PriorityQueue is not full, or not lessThan(element, top()). - * @param element - * @return true if element is added, false otherwise. + * Adds an Object to a PriorityQueue in log(size) time. If one tries to add + * more objects than maxSize from initialize an + * {@link ArrayIndexOutOfBoundsException} is thrown. + * + * @return the new 'top' element in the queue. */ - public boolean insert(Object element) { - return insertWithOverflow(element) != element; + public final T add(T element) { + size++; + heap[size] = element; + upHeap(); + return heap[1]; } /** - * insertWithOverflow() is the same as insert() except its - * return value: it returns the object (if any) that was + * Adds an Object to a PriorityQueue in log(size) time. + * It returns the object (if any) that was * dropped off the heap because it was full. This can be * the given parameter (in case it is smaller than the * full heap's minimum, and couldn't be added), or another * object that was previously the smallest value in the * heap and now has been replaced by a larger one, or null * if the queue wasn't yet full with maxSize elements. */ - public Object insertWithOverflow(Object element) { + public T insertWithOverflow(T element) { if (size < maxSize) { - put(element); + add(element); return null; } else if (size > 0 && !lessThan(element, heap[1])) { - Object ret = heap[1]; + T ret = heap[1]; heap[1] = element; - adjustTop(); + updateTop(); return ret; } else { return element; } } /** Returns the least element of the PriorityQueue in constant time. */ - public final Object top() { + public final T top() { // We don't need to check size here: if maxSize is 0, // then heap is length 2 array with both entries null. // If size is 0 then heap[1] is already null. @@ -97,27 +168,41 @@ /** Removes and returns the least element of the PriorityQueue in log(size) time. */ - public final Object pop() { + public final T pop() { if (size > 0) { - Object result = heap[1]; // save first value - heap[1] = heap[size]; // move last to first - heap[size] = null; // permit GC of objects + T result = heap[1]; // save first value + heap[1] = heap[size]; // move last to first + heap[size] = null; // permit GC of objects size--; - downHeap(); // adjust heap + downHeap(); // adjust heap return result; - } else + } else { return null; + } } - - /** Should be called when the Object at top changes values. Still log(n) - * worst case, but it's at least twice as fast to
    -   *  { pq.top().change(); pq.adjustTop(); }
    -   * 
    instead of
    -   *  { o = pq.pop(); o.change(); pq.push(o); }
    +  
    +  /**
    +   * Should be called when the Object at top changes values. Still log(n) worst
    +   * case, but it's at least twice as fast to
    +   * 
    +   * 
    +   * pq.top().change();
    +   * pq.updateTop();
        * 
    + * + * instead of + * + *
    +   * o = pq.pop();
    +   * o.change();
    +   * pq.push(o);
    +   * 
    + * + * @return the new 'top' element. */ - public final void adjustTop() { + public final T updateTop() { downHeap(); + return heap[1]; } /** Returns the number of elements currently stored in the PriorityQueue. */ @@ -127,40 +212,48 @@ /** Removes all entries from the PriorityQueue. */ public final void clear() { - for (int i = 0; i <= size; i++) + for (int i = 0; i <= size; i++) { heap[i] = null; + } size = 0; } private final void upHeap() { int i = size; - Object node = heap[i]; // save bottom node + T node = heap[i]; // save bottom node int j = i >>> 1; while (j > 0 && lessThan(node, heap[j])) { - heap[i] = heap[j]; // shift parents down + heap[i] = heap[j]; // shift parents down i = j; j = j >>> 1; } - heap[i] = node; // install saved node + heap[i] = node; // install saved node } private final void downHeap() { int i = 1; - Object node = heap[i]; // save top node - int j = i << 1; // find smaller child + T node = heap[i]; // save top node + int j = i << 1; // find smaller child int k = j + 1; if (k <= size && lessThan(heap[k], heap[j])) { j = k; } while (j <= size && lessThan(heap[j], node)) { - heap[i] = heap[j]; // shift up child + heap[i] = heap[j]; // shift up child i = j; j = i << 1; k = j + 1; if (k <= size && lessThan(heap[k], heap[j])) { j = k; } } - heap[i] = node; // install saved node + heap[i] = node; // install saved node } + + /** This method returns the internal heap array as Object[]. + * @lucene.internal + */ + protected final Object[] getHeapArray() { + return (Object[]) heap; + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/QueryBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/RamUsageEstimator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/RecyclingByteBlockAllocator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/RecyclingIntBlockAllocator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/RefCount.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/RollingBuffer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/SPIClassIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/ScorerDocQueue.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/SentinelIntSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/SetOnce.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/SloppyMath.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/SmallFloat.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/SmallFloat.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/SmallFloat.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/SmallFloat.java 16 Dec 2014 11:31:32 -0000 1.1.2.1 @@ -18,9 +18,12 @@ /** Floating point numbers smaller than 32 bits. * - * @version $Id$ + * @lucene.internal */ public class SmallFloat { + + /** No instance */ + private SmallFloat() {} /** Converts a 32 bit float to an 8 bit float. *
    Values less than zero are all mapped to zero. @@ -39,7 +42,7 @@ int fzero = (63-zeroExp)<> (24-numMantissaBits); - if (smallfloat < fzero) { + if (smallfloat <= fzero) { return (bits<=0) ? (byte)0 // negative numbers and zero both map to 0 byte :(byte)1; // underflow is mapped to smallest non-zero number. @@ -75,7 +78,7 @@ public static byte floatToByte315(float f) { int bits = Float.floatToRawIntBits(f); int smallfloat = bits >> (24-3); - if (smallfloat < (63-15)<<3) { + if (smallfloat <= ((63-15)<<3)) { return (bits<=0) ? (byte)0 : (byte)1; } if (smallfloat >= ((63-15)<<3) + 0x100) { @@ -103,7 +106,7 @@ public static byte floatToByte52(float f) { int bits = Float.floatToRawIntBits(f); int smallfloat = bits >> (24-5); - if (smallfloat < (63-2)<<5) { + if (smallfloat <= (63-2)<<5) { return (bits<=0) ? (byte)0 : (byte)1; } if (smallfloat >= ((63-2)<<5) + 0x100) { Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/SortedVIntList.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Sorter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/StrictStringTokenizer.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/StringHelper.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/StringHelper.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/StringHelper.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/StringHelper.java 16 Dec 2014 11:31:30 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,50 +17,215 @@ * limitations under the License. */ +import java.util.Comparator; +import java.util.StringTokenizer; /** * Methods for manipulating strings. * - * $Id$ + * @lucene.internal */ public abstract class StringHelper { /** - * Compares two byte[] arrays, element by element, and returns the + * Compares two {@link BytesRef}, element by element, and returns the * number of elements common to both arrays. * - * @param bytes1 The first byte[] to compare - * @param bytes2 The second byte[] to compare + * @param left The first {@link BytesRef} to compare + * @param right The second {@link BytesRef} to compare * @return The number of common elements. */ - public static final int bytesDifference(byte[] bytes1, int len1, byte[] bytes2, int len2) { - int len = len1 < len2 ? len1 : len2; + public static int bytesDifference(BytesRef left, BytesRef right) { + int len = left.length < right.length ? left.length : right.length; + final byte[] bytesLeft = left.bytes; + final int offLeft = left.offset; + byte[] bytesRight = right.bytes; + final int offRight = right.offset; for (int i = 0; i < len; i++) - if (bytes1[i] != bytes2[i]) + if (bytesLeft[i+offLeft] != bytesRight[i+offRight]) return i; return len; } + + /** + * Returns the length of {@code currentTerm} needed for use as a sort key. + * so that {@link BytesRef#compareTo(BytesRef)} still returns the same result. + * This method assumes currentTerm comes after priorTerm. + */ + public static int sortKeyLength(final BytesRef priorTerm, final BytesRef currentTerm) { + final int currentTermOffset = currentTerm.offset; + final int priorTermOffset = priorTerm.offset; + final int limit = Math.min(priorTerm.length, currentTerm.length); + for (int i = 0; i < limit; i++) { + if (priorTerm.bytes[priorTermOffset+i] != currentTerm.bytes[currentTermOffset+i]) { + return i+1; + } + } + return Math.min(1+priorTerm.length, currentTerm.length); + } + private StringHelper() { + } + + public static boolean equals(String s1, String s2) { + if (s1 == null) { + return s2 == null; + } else { + return s1.equals(s2); + } + } + /** - * Compares two strings, character by character, and returns the - * first position where the two strings differ from one another. - * - * @param s1 The first string to compare - * @param s2 The second string to compare - * @return The first position where the two strings differ. + * Returns true iff the ref starts with the given prefix. + * Otherwise false. + * + * @param ref + * the {@code byte[]} to test + * @param prefix + * the expected prefix + * @return Returns true iff the ref starts with the given prefix. + * Otherwise false. */ - public static final int stringDifference(String s1, String s2) { - int len1 = s1.length(); - int len2 = s2.length(); - int len = len1 < len2 ? len1 : len2; - for (int i = 0; i < len; i++) { - if (s1.charAt(i) != s2.charAt(i)) { - return i; + public static boolean startsWith(byte[] ref, BytesRef prefix) { + if (ref.length < prefix.length) { + return false; + } + + for(int i=0;itrue iff the ref starts with the given prefix. + * Otherwise false. + * + * @param ref + * the {@link BytesRef} to test + * @param prefix + * the expected prefix + * @return Returns true iff the ref starts with the given prefix. + * Otherwise false. + */ + public static boolean startsWith(BytesRef ref, BytesRef prefix) { + return sliceEquals(ref, prefix, 0); } + + /** + * Returns true iff the ref ends with the given suffix. Otherwise + * false. + * + * @param ref + * the {@link BytesRef} to test + * @param suffix + * the expected suffix + * @return Returns true iff the ref ends with the given suffix. + * Otherwise false. + */ + public static boolean endsWith(BytesRef ref, BytesRef suffix) { + return sliceEquals(ref, suffix, ref.length - suffix.length); + } + + private static boolean sliceEquals(BytesRef sliceToTest, BytesRef other, int pos) { + if (pos < 0 || sliceToTest.length - pos < other.length) { + return false; + } + int i = sliceToTest.offset + pos; + int j = other.offset; + final int k = other.offset + other.length; + + while (j < k) { + if (sliceToTest.bytes[i++] != other.bytes[j++]) { + return false; + } + } + + return true; + } + + /** Pass this as the seed to {@link #murmurhash3_x86_32}. */ + + // Poached from Guava: set a different salt/seed + // for each JVM instance, to frustrate hash key collision + // denial of service attacks, and to catch any places that + // somehow rely on hash function/order across JVM + // instances: + public static final int GOOD_FAST_HASH_SEED; + + static { + String prop = System.getProperty("tests.seed"); + if (prop != null) { + // So if there is a test failure that relied on hash + // order, we remain reproducible based on the test seed: + if (prop.length() > 8) { + prop = prop.substring(prop.length()-8); + } + GOOD_FAST_HASH_SEED = (int) Long.parseLong(prop, 16); + } else { + GOOD_FAST_HASH_SEED = (int) System.currentTimeMillis(); + } + } + + /** Returns the MurmurHash3_x86_32 hash. + * Original source/tests at https://github.com/yonik/java_util/ + */ + @SuppressWarnings("fallthrough") + public static int murmurhash3_x86_32(byte[] data, int offset, int len, int seed) { + + final int c1 = 0xcc9e2d51; + final int c2 = 0x1b873593; + + int h1 = seed; + int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block + + for (int i=offset; i>> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >>> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >>> 16; + + return h1; + } + + public static int murmurhash3_x86_32(BytesRef bytes, int seed) { + return murmurhash3_x86_32(bytes.bytes, bytes.offset, bytes.length, seed); + } } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/ThreadInterruptedException.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/TimSorter.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/ToStringUtils.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/ToStringUtils.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/ToStringUtils.java 17 Aug 2012 14:54:54 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/ToStringUtils.java 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -1,6 +1,6 @@ package org.apache.lucene.util; -/** +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -17,12 +17,40 @@ * limitations under the License. */ -public class ToStringUtils { - /** for printing boost only if not 1.0 */ +/** + * Helper methods to ease implementing {@link Object#toString()}. + */ +public final class ToStringUtils { + + private ToStringUtils() {} // no instance + + /** + * for printing boost only if not 1.0 + */ public static String boost(float boost) { if (boost != 1.0f) { return "^" + Float.toString(boost); } else return ""; } + public static void byteArray(StringBuilder buffer, byte[] bytes) { + for (int i = 0; i < bytes.length; i++) { + buffer.append("b[").append(i).append("]=").append(bytes[i]); + if (i < bytes.length - 1) { + buffer.append(','); + } + + } + } + + private final static char [] HEX = "0123456789abcdef".toCharArray(); + + public static String longHex(long x) { + char [] asHex = new char [16]; + for (int i = 16; --i >= 0; x >>>= 4) { + asHex[i] = HEX[(int) x & 0x0F]; + } + return "0x" + new String(asHex); + } + } Index: 3rdParty_sources/lucene/org/apache/lucene/util/UnicodeUtil.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/UnicodeUtil.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/UnicodeUtil.java 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/UnicodeUtil.java 16 Dec 2014 11:31:32 -0000 1.1.2.1 @@ -1,6 +1,7 @@ package org.apache.lucene.util; -/** + +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -49,16 +50,62 @@ * remains attached. */ +/* + * Additional code came from the IBM ICU library. + * + * http://www.icu-project.org + * + * Full Copyright for that code follows. + */ + +/* + * Copyright (C) 1999-2010, International Business Machines + * Corporation and others. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, and/or sell copies of the + * Software, and to permit persons to whom the Software is furnished to do so, + * provided that the above copyright notice(s) and this permission notice appear + * in all copies of the Software and that both the above copyright notice(s) and + * this permission notice appear in supporting documentation. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS. + * IN NO EVENT SHALL THE COPYRIGHT HOLDER OR HOLDERS INCLUDED IN THIS NOTICE BE + * LIABLE FOR ANY CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR + * ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER + * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT + * OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Except as contained in this notice, the name of a copyright holder shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization of the + * copyright holder. + */ + /** * Class to encode java's UTF16 char[] into UTF8 byte[] * without always allocating a new byte[] as - * String.getBytes("UTF-8") does. + * String.getBytes(StandardCharsets.UTF_8) does. * - *

    WARNING: This API is a new and experimental and - * may suddenly change.

    + * @lucene.internal */ -final public class UnicodeUtil { +public final class UnicodeUtil { + + /** A binary term consisting of a number of 0xff bytes, likely to be bigger than other terms + * (e.g. collation keys) one would normally encounter, and definitely bigger than any UTF-8 terms. + *

    + * WARNING: This is not a valid UTF8 Term + **/ + public static final BytesRef BIG_TERM = new BytesRef( + new byte[] {-1,-1,-1,-1,-1,-1,-1,-1,-1,-1} + ); // TODO this is unrelated here find a better place for it + + private UnicodeUtil() {} // no instance public static final int UNI_SUR_HIGH_START = 0xD800; public static final int UNI_SUR_HIGH_END = 0xDBFF; @@ -68,122 +115,30 @@ private static final long UNI_MAX_BMP = 0x0000FFFF; - private static final int HALF_BASE = 0x0010000; private static final long HALF_SHIFT = 10; private static final long HALF_MASK = 0x3FFL; + + private static final int SURROGATE_OFFSET = + Character.MIN_SUPPLEMENTARY_CODE_POINT - + (UNI_SUR_HIGH_START << HALF_SHIFT) - UNI_SUR_LOW_START; - public static final class UTF8Result { - public byte[] result = new byte[10]; - public int length; + /** Maximum number of UTF8 bytes per UTF16 character. */ + public static final int MAX_UTF8_BYTES_PER_CHAR = 4; - public void setLength(int newLength) { - if (result.length < newLength) { - byte[] newArray = new byte[(int) (1.5*newLength)]; - System.arraycopy(result, 0, newArray, 0, length); - result = newArray; - } - length = newLength; - } - } - - public static final class UTF16Result { - public char[] result = new char[10]; - public int[] offsets = new int[10]; - public int length; - - public void setLength(int newLength) { - if (result.length < newLength) { - char[] newArray = new char[(int) (1.5*newLength)]; - System.arraycopy(result, 0, newArray, 0, length); - result = newArray; - } - length = newLength; - } - - public void copyText(UTF16Result other) { - setLength(other.length); - System.arraycopy(other.result, 0, result, 0, length); - } - } - /** Encode characters from a char[] source, starting at - * offset and stopping when the character 0xffff is seen. - * Returns the number of bytes written to bytesOut. */ - public static void UTF16toUTF8(final char[] source, final int offset, UTF8Result result) { + * offset for length chars. It is the responsibility of the + * caller to make sure that the destination array is large enough. + */ + public static int UTF16toUTF8(final char[] source, final int offset, final int length, byte[] out) { int upto = 0; int i = offset; - byte[] out = result.result; - - while(true) { - - final int code = (int) source[i++]; - - if (upto+4 > out.length) { - byte[] newOut = new byte[2*out.length]; - assert newOut.length >= upto+4; - System.arraycopy(out, 0, newOut, 0, upto); - result.result = out = newOut; - } - if (code < 0x80) - out[upto++] = (byte) code; - else if (code < 0x800) { - out[upto++] = (byte) (0xC0 | (code >> 6)); - out[upto++] = (byte)(0x80 | (code & 0x3F)); - } else if (code < 0xD800 || code > 0xDFFF) { - if (code == 0xffff) - // END - break; - out[upto++] = (byte)(0xE0 | (code >> 12)); - out[upto++] = (byte)(0x80 | ((code >> 6) & 0x3F)); - out[upto++] = (byte)(0x80 | (code & 0x3F)); - } else { - // surrogate pair - // confirm valid high surrogate - if (code < 0xDC00 && source[i] != 0xffff) { - int utf32 = (int) source[i]; - // confirm valid low surrogate and write pair - if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { - utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); - i++; - out[upto++] = (byte)(0xF0 | (utf32 >> 18)); - out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); - out[upto++] = (byte)(0x80 | ((utf32 >> 6) & 0x3F)); - out[upto++] = (byte)(0x80 | (utf32 & 0x3F)); - continue; - } - } - // replace unpaired surrogate or out-of-order low surrogate - // with substitution character - out[upto++] = (byte) 0xEF; - out[upto++] = (byte) 0xBF; - out[upto++] = (byte) 0xBD; - } - } - //assert matches(source, offset, i-offset-1, out, upto); - result.length = upto; - } - - /** Encode characters from a char[] source, starting at - * offset for length chars. Returns the number of bytes - * written to bytesOut. */ - public static void UTF16toUTF8(final char[] source, final int offset, final int length, UTF8Result result) { - - int upto = 0; - int i = offset; final int end = offset + length; - byte[] out = result.result; while(i < end) { final int code = (int) source[i++]; - if (upto+4 > out.length) { - byte[] newOut = new byte[2*out.length]; - assert newOut.length >= upto+4; - System.arraycopy(out, 0, newOut, 0, upto); - result.result = out = newOut; - } if (code < 0x80) out[upto++] = (byte) code; else if (code < 0x800) { @@ -196,11 +151,11 @@ } else { // surrogate pair // confirm valid high surrogate - if (code < 0xDC00 && i < end && source[i] != 0xffff) { + if (code < 0xDC00 && i < end) { int utf32 = (int) source[i]; // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { - utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); + utf32 = (code << 10) + utf32 + SURROGATE_OFFSET; i++; out[upto++] = (byte)(0xF0 | (utf32 >> 18)); out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); @@ -217,27 +172,21 @@ } } //assert matches(source, offset, length, out, upto); - result.length = upto; + return upto; } /** Encode characters from this String, starting at offset - * for length characters. Returns the number of bytes - * written to bytesOut. */ - public static void UTF16toUTF8(final String s, final int offset, final int length, UTF8Result result) { + * for length characters. It is the responsibility of the + * caller to make sure that the destination array is large enough. + */ + // TODO: broken if incoming result.offset != 0 + public static int UTF16toUTF8(final CharSequence s, final int offset, final int length, byte[] out) { final int end = offset + length; - byte[] out = result.result; - int upto = 0; for(int i=offset;i out.length) { - byte[] newOut = new byte[2*out.length]; - assert newOut.length >= upto+4; - System.arraycopy(out, 0, newOut, 0, upto); - result.result = out = newOut; - } if (code < 0x80) out[upto++] = (byte) code; else if (code < 0x800) { @@ -254,7 +203,7 @@ int utf32 = (int) s.charAt(i+1); // confirm valid low surrogate and write pair if (utf32 >= 0xDC00 && utf32 <= 0xDFFF) { - utf32 = ((code - 0xD7C0) << 10) + (utf32 & 0x3FF); + utf32 = (code << 10) + utf32 + SURROGATE_OFFSET; i++; out[upto++] = (byte)(0xF0 | (utf32 >> 18)); out[upto++] = (byte)(0x80 | ((utf32 >> 12) & 0x3F)); @@ -271,85 +220,15 @@ } } //assert matches(s, offset, length, out, upto); - result.length = upto; + return upto; } - /** Convert UTF8 bytes into UTF16 characters. If offset - * is non-zero, conversion starts at that starting point - * in utf8, re-using the results from the previous call - * up until offset. */ - public static void UTF8toUTF16(final byte[] utf8, final int offset, final int length, final UTF16Result result) { - - final int end = offset + length; - char[] out = result.result; - if (result.offsets.length <= end) { - int[] newOffsets = new int[2*end]; - System.arraycopy(result.offsets, 0, newOffsets, 0, result.offsets.length); - result.offsets = newOffsets; - } - final int[] offsets = result.offsets; - - // If incremental decoding fell in the middle of a - // single unicode character, rollback to its start: - int upto = offset; - while(offsets[upto] == -1) - upto--; - - int outUpto = offsets[upto]; - - // Pre-allocate for worst case 1-for-1 - if (outUpto+length >= out.length) { - char[] newOut = new char[2*(outUpto+length)]; - System.arraycopy(out, 0, newOut, 0, outUpto); - result.result = out = newOut; - } - - while (upto < end) { - - final int b = utf8[upto]&0xff; - final int ch; - - offsets[upto++] = outUpto; - - if (b < 0xc0) { - assert b < 0x80; - ch = b; - } else if (b < 0xe0) { - ch = ((b&0x1f)<<6) + (utf8[upto]&0x3f); - offsets[upto++] = -1; - } else if (b < 0xf0) { - ch = ((b&0xf)<<12) + ((utf8[upto]&0x3f)<<6) + (utf8[upto+1]&0x3f); - offsets[upto++] = -1; - offsets[upto++] = -1; - } else { - assert b < 0xf8; - ch = ((b&0x7)<<18) + ((utf8[upto]&0x3f)<<12) + ((utf8[upto+1]&0x3f)<<6) + (utf8[upto+2]&0x3f); - offsets[upto++] = -1; - offsets[upto++] = -1; - offsets[upto++] = -1; - } - - if (ch <= UNI_MAX_BMP) { - // target is a character <= 0xFFFF - out[outUpto++] = (char) ch; - } else { - // target is a character in range 0xFFFF - 0x10FFFF - final int chHalf = ch - HALF_BASE; - out[outUpto++] = (char) ((chHalf >> HALF_SHIFT) + UNI_SUR_HIGH_START); - out[outUpto++] = (char) ((chHalf & HALF_MASK) + UNI_SUR_LOW_START); - } - } - - offsets[upto] = outUpto; - result.length = outUpto; - } - // Only called from assert /* private static boolean matches(char[] source, int offset, int length, byte[] result, int upto) { try { String s1 = new String(source, offset, length); - String s2 = new String(result, 0, upto, "UTF-8"); + String s2 = new String(result, 0, upto, StandardCharsets.UTF_8); if (!s1.equals(s2)) { //System.out.println("DIFF: s1 len=" + s1.length()); //for(int i=0;i= UNI_SUR_LOW_START && nextCH <= UNI_SUR_LOW_END) { // Valid surrogate pair } else - // Unmatched hight surrogate + // Unmatched high surrogate return false; } else - // Unmatched hight surrogate + // Unmatched high surrogate return false; } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) // Unmatched low surrogate @@ -423,7 +302,7 @@ return true; } - public static final boolean validUTF16String(char[] s, int size) { + public static boolean validUTF16String(char[] s, int size) { for(int i=0;i= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { @@ -443,5 +322,280 @@ return true; } - */ + + // Borrowed from Python's 3.1.2 sources, + // Objects/unicodeobject.c, and modified (see commented + // out section, and the -1s) to disallow the reserved for + // future (RFC 3629) 5/6 byte sequence characters, and + // invalid 0xFE and 0xFF bytes. + + /* Map UTF-8 encoded prefix byte to sequence length. -1 (0xFF) + * means illegal prefix. see RFC 2279 for details */ + static final int [] utf8CodeLength; + static { + final int v = Integer.MIN_VALUE; + utf8CodeLength = new int [] { + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, v, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 4, 4, 4, 4, 4, 4, 4, 4 //, 5, 5, 5, 5, 6, 6, 0, 0 + }; + } + + /** + * Returns the number of code points in this UTF8 sequence. + * + *

    This method assumes valid UTF8 input. This method + * does not perform full UTF8 validation, it will check only the + * first byte of each codepoint (for multi-byte sequences any bytes after + * the head are skipped). + * + * @throws IllegalArgumentException If invalid codepoint header byte occurs or the + * content is prematurely truncated. + */ + public static int codePointCount(BytesRef utf8) { + int pos = utf8.offset; + final int limit = pos + utf8.length; + final byte[] bytes = utf8.bytes; + + int codePointCount = 0; + for (; pos < limit; codePointCount++) { + int v = bytes[pos] & 0xFF; + if (v < /* 0xxx xxxx */ 0x80) { pos += 1; continue; } + if (v >= /* 110x xxxx */ 0xc0) { + if (v < /* 111x xxxx */ 0xe0) { pos += 2; continue; } + if (v < /* 1111 xxxx */ 0xf0) { pos += 3; continue; } + if (v < /* 1111 1xxx */ 0xf8) { pos += 4; continue; } + // fallthrough, consider 5 and 6 byte sequences invalid. + } + + // Anything not covered above is invalid UTF8. + throw new IllegalArgumentException(); + } + + // Check if we didn't go over the limit on the last character. + if (pos > limit) throw new IllegalArgumentException(); + + return codePointCount; + } + + /** + *

    This method assumes valid UTF8 input. This method + * does not perform full UTF8 validation, it will check only the + * first byte of each codepoint (for multi-byte sequences any bytes after + * the head are skipped). It is the responsibility of the caller to make sure + * that the destination array is large enough. + * + * @throws IllegalArgumentException If invalid codepoint header byte occurs or the + * content is prematurely truncated. + */ + public static int UTF8toUTF32(final BytesRef utf8, final int[] ints) { + // TODO: ints cannot be null, should be an assert + int utf32Count = 0; + int utf8Upto = utf8.offset; + final byte[] bytes = utf8.bytes; + final int utf8Limit = utf8.offset + utf8.length; + while(utf8Upto < utf8Limit) { + final int numBytes = utf8CodeLength[bytes[utf8Upto] & 0xFF]; + int v = 0; + switch(numBytes) { + case 1: + ints[utf32Count++] = bytes[utf8Upto++]; + continue; + case 2: + // 5 useful bits + v = bytes[utf8Upto++] & 31; + break; + case 3: + // 4 useful bits + v = bytes[utf8Upto++] & 15; + break; + case 4: + // 3 useful bits + v = bytes[utf8Upto++] & 7; + break; + default : + throw new IllegalArgumentException("invalid utf8"); + } + + // TODO: this may read past utf8's limit. + final int limit = utf8Upto + numBytes-1; + while(utf8Upto < limit) { + v = v << 6 | bytes[utf8Upto++]&63; + } + ints[utf32Count++] = v; + } + + return utf32Count; + } + + /** Shift value for lead surrogate to form a supplementary character. */ + private static final int LEAD_SURROGATE_SHIFT_ = 10; + /** Mask to retrieve the significant value from a trail surrogate.*/ + private static final int TRAIL_SURROGATE_MASK_ = 0x3FF; + /** Trail surrogate minimum value */ + private static final int TRAIL_SURROGATE_MIN_VALUE = 0xDC00; + /** Lead surrogate minimum value */ + private static final int LEAD_SURROGATE_MIN_VALUE = 0xD800; + /** The minimum value for Supplementary code points */ + private static final int SUPPLEMENTARY_MIN_VALUE = 0x10000; + /** Value that all lead surrogate starts with */ + private static final int LEAD_SURROGATE_OFFSET_ = LEAD_SURROGATE_MIN_VALUE + - (SUPPLEMENTARY_MIN_VALUE >> LEAD_SURROGATE_SHIFT_); + + /** + * Cover JDK 1.5 API. Create a String from an array of codePoints. + * + * @param codePoints The code array + * @param offset The start of the text in the code point array + * @param count The number of code points + * @return a String representing the code points between offset and count + * @throws IllegalArgumentException If an invalid code point is encountered + * @throws IndexOutOfBoundsException If the offset or count are out of bounds. + */ + public static String newString(int[] codePoints, int offset, int count) { + if (count < 0) { + throw new IllegalArgumentException(); + } + char[] chars = new char[count]; + int w = 0; + for (int r = offset, e = offset + count; r < e; ++r) { + int cp = codePoints[r]; + if (cp < 0 || cp > 0x10ffff) { + throw new IllegalArgumentException(); + } + while (true) { + try { + if (cp < 0x010000) { + chars[w] = (char) cp; + w++; + } else { + chars[w] = (char) (LEAD_SURROGATE_OFFSET_ + (cp >> LEAD_SURROGATE_SHIFT_)); + chars[w + 1] = (char) (TRAIL_SURROGATE_MIN_VALUE + (cp & TRAIL_SURROGATE_MASK_)); + w += 2; + } + break; + } catch (IndexOutOfBoundsException ex) { + int newlen = (int) (Math.ceil((double) codePoints.length * (w + 2) + / (r - offset + 1))); + char[] temp = new char[newlen]; + System.arraycopy(chars, 0, temp, 0, w); + chars = temp; + } + } + } + return new String(chars, 0, w); + } + + // for debugging + public static String toHexString(String s) { + StringBuilder sb = new StringBuilder(); + for(int i=0;i 0) { + sb.append(' '); + } + if (ch < 128) { + sb.append(ch); + } else { + if (ch >= UNI_SUR_HIGH_START && ch <= UNI_SUR_HIGH_END) { + sb.append("H:"); + } else if (ch >= UNI_SUR_LOW_START && ch <= UNI_SUR_LOW_END) { + sb.append("L:"); + } else if (ch > UNI_SUR_LOW_END) { + if (ch == 0xffff) { + sb.append("F:"); + } else { + sb.append("E:"); + } + } + + sb.append("0x" + Integer.toHexString(ch)); + } + } + return sb.toString(); + } + + /** + * Interprets the given byte array as UTF-8 and converts to UTF-16. It is the + * responsibility of the caller to make sure that the destination array is large enough. + *

    + * NOTE: Full characters are read, even if this reads past the length passed (and + * can result in an ArrayOutOfBoundsException if invalid UTF-8 is passed). + * Explicit checks for valid UTF-8 are not performed. + */ + // TODO: broken if chars.offset != 0 + public static int UTF8toUTF16(byte[] utf8, int offset, int length, char[] out) { + int out_offset = 0; + final int limit = offset + length; + while (offset < limit) { + int b = utf8[offset++]&0xff; + if (b < 0xc0) { + assert b < 0x80; + out[out_offset++] = (char)b; + } else if (b < 0xe0) { + out[out_offset++] = (char)(((b&0x1f)<<6) + (utf8[offset++]&0x3f)); + } else if (b < 0xf0) { + out[out_offset++] = (char)(((b&0xf)<<12) + ((utf8[offset]&0x3f)<<6) + (utf8[offset+1]&0x3f)); + offset += 2; + } else { + assert b < 0xf8: "b = 0x" + Integer.toHexString(b); + int ch = ((b&0x7)<<18) + ((utf8[offset]&0x3f)<<12) + ((utf8[offset+1]&0x3f)<<6) + (utf8[offset+2]&0x3f); + offset += 3; + if (ch < UNI_MAX_BMP) { + out[out_offset++] = (char)ch; + } else { + int chHalf = ch - 0x0010000; + out[out_offset++] = (char) ((chHalf >> 10) + 0xD800); + out[out_offset++] = (char) ((chHalf & HALF_MASK) + 0xDC00); + } + } + } + return out_offset; + } + + /** + * Utility method for {@link #UTF8toUTF16(byte[], int, int, char[])} + * @see #UTF8toUTF16(byte[], int, int, char[]) + */ + public static int UTF8toUTF16(BytesRef bytesRef, char[] chars) { + return UTF8toUTF16(bytesRef.bytes, bytesRef.offset, bytesRef.length, chars); + } + + /** + * @deprecated Uses {@link CharsRef} as a buffer. + */ + @Deprecated + public static void UTF8toUTF16(BytesRef bytes, CharsRef chars) { + assert chars.offset == 0; + if (chars.chars.length < bytes.length) { + chars.chars = new char[bytes.length]; + } + chars.length = UTF8toUTF16(bytes, chars.chars); + } + + /** + * @deprecated Uses {@link BytesRef} as a buffer. + */ + @Deprecated + public static void UTF16toUTF8(CharSequence chars, BytesRef bytes) { + assert bytes.offset == 0; + if (bytes.length < MAX_UTF8_BYTES_PER_CHAR * chars.length()) { + bytes.bytes = new byte[MAX_UTF8_BYTES_PER_CHAR * chars.length()]; + } + bytes.length = UTF16toUTF8(chars, 0, chars.length(), bytes.bytes); + } + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/Version.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/VirtualMethod.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/WAH8DocIdSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/WeakIdentityMap.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/apache/lucene/util/package.html =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/apache/lucene/util/package.html,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/apache/lucene/util/package.html 17 Aug 2012 14:54:53 -0000 1.1 +++ 3rdParty_sources/lucene/org/apache/lucene/util/package.html 16 Dec 2014 11:31:31 -0000 1.1.2.1 @@ -18,7 +18,6 @@ - Some utility classes. Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Automata.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Automaton.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/AutomatonProvider.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/ByteRunAutomaton.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/CharacterRunAutomaton.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/CompiledAutomaton.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/DaciukMihovAutomatonBuilder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Lev1ParametricDescription.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Lev1TParametricDescription.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Lev2ParametricDescription.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Lev2TParametricDescription.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/LevenshteinAutomata.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/MinimizationOperations.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Operations.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/RegExp.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/RunAutomaton.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/SortedIntSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/StatePair.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/Transition.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/UTF32ToUTF8.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/UTF32ToUTF8.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/createLevAutomata.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/automaton/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/cache/Cache.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/cache/SimpleLRUCache.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/cache/SimpleMapCache.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/Builder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/ByteSequenceOutputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/BytesRefFSTEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/BytesStore.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/CharSequenceOutputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/FST.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/FSTEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/ForwardBytesReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/IntSequenceOutputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/IntsRefFSTEnum.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/NoOutputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/NodeHash.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/Outputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/PairOutputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/PositiveIntOutputs.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/ReverseBytesReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/Util.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/fst/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValue.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueBool.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueDate.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueDouble.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueFloat.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueInt.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueLong.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/MutableValueStr.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/mutable/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/AbstractBlockPackedWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/AbstractPagedMutable.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BlockPackedReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BlockPackedReaderIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BlockPackedWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperation.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked1.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked10.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked11.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked12.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked13.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked14.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked15.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked16.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked17.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked18.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked19.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked2.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked20.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked21.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked22.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked23.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked24.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked3.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked4.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked5.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked6.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked7.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked8.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPacked9.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/BulkOperationPackedSingleBlock.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/DeltaPackedLongValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Direct16.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Direct32.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Direct64.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Direct8.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/DirectPacked64SingleBlockReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/DirectPackedReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/DirectReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/DirectWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/EliasFanoDecoder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/EliasFanoDocIdSet.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/EliasFanoEncoder.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/GrowableWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/MonotonicBlockPackedReader.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/MonotonicBlockPackedWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/MonotonicLongValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Packed16ThreeBlocks.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Packed64.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Packed64SingleBlock.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/Packed8ThreeBlocks.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PackedDataInput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PackedDataOutput.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PackedInts.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PackedLongValues.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PackedReaderIterator.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PackedWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PagedGrowableWriter.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/PagedMutable.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/gen_BulkOperation.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/gen_Direct.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/gen_Packed64SingleBlock.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/gen_PackedThreeBlocks.py'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/apache/lucene/util/packed/package.html'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/tartarus/snowball/Among.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/Among.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/Among.java 17 Aug 2012 14:55:14 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/Among.java 16 Dec 2014 11:32:18 -0000 1.1.2.1 @@ -1,34 +1,74 @@ +/* +Copyright (c) 2001, Dr Martin Porter +Copyright (c) 2002, Richard Boulton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + package org.tartarus.snowball; import java.lang.reflect.Method; +/** + * This is the rev 502 of the Snowball SVN trunk, + * but modified: + * made abstract and introduced abstract method stem to avoid expensive reflection in filter class. + * refactored StringBuffers to StringBuilder + * uses char[] as buffer instead of StringBuffer/StringBuilder + * eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b + * reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS + */ public class Among { - public Among (String s, int substring_i, int result, - String methodname, SnowballProgram methodobject) { - this.s_size = s.length(); - this.s = s; - this.substring_i = substring_i; - this.result = result; - this.methodobject = methodobject; - if (methodname.length() == 0) { - this.method = null; - } else { - try { - this.method = methodobject.getClass(). - getDeclaredMethod(methodname, new Class[0]); - } catch (NoSuchMethodException e) { - // FIXME - debug message - this.method = null; - } - } + private static final Class[] EMPTY_PARAMS = new Class[0]; + + public Among(String s, int substring_i, int result, + String methodname, SnowballProgram methodobject) { + this.s_size = s.length(); + this.s = s.toCharArray(); + this.substring_i = substring_i; + this.result = result; + this.methodobject = methodobject; + if (methodname.length() == 0) { + this.method = null; + } else { + try { + this.method = methodobject.getClass(). + getDeclaredMethod(methodname, EMPTY_PARAMS); + } catch (NoSuchMethodException e) { + throw new RuntimeException(e); + } } + } - public int s_size; /* search string */ - public String s; /* search string */ - public int substring_i; /* index to longest matching substring */ - public int result; /* result of the lookup */ - public Method method; /* method to use if substring matches */ - public SnowballProgram methodobject; /* object to invoke method on */ + public final int s_size; /* search string */ + public final char[] s; /* search string */ + public final int substring_i; /* index to longest matching substring */ + public final int result; /* result of the lookup */ + public final Method method; /* method to use if substring matches */ + public final SnowballProgram methodobject; /* object to invoke method on */ }; Index: 3rdParty_sources/lucene/org/tartarus/snowball/SnowballProgram.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/SnowballProgram.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/SnowballProgram.java 17 Aug 2012 14:55:14 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/SnowballProgram.java 16 Dec 2014 11:32:18 -0000 1.1.2.1 @@ -1,18 +1,58 @@ +/* +Copyright (c) 2001, Dr Martin Porter +Copyright (c) 2002, Richard Boulton +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + * Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * Neither the name of the copyright holders nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + */ + + package org.tartarus.snowball; + import java.lang.reflect.InvocationTargetException; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.RamUsageEstimator; + /** - * This is the rev 500 of the Snowball SVN trunk, + * This is the rev 502 of the Snowball SVN trunk, * but modified: - * made abstract and introduced abstract method stem - * to avoid expensive + * made abstract and introduced abstract method stem to avoid expensive reflection in filter class. + * refactored StringBuffers to StringBuilder + * uses char[] as buffer instead of StringBuffer/StringBuilder + * eq_s,eq_s_b,insert,replace_s take CharSequence like eq_v and eq_v_b + * reflection calls (Lovins, etc) use EMPTY_ARGS/EMPTY_PARAMS */ public abstract class SnowballProgram { + private static final Object[] EMPTY_ARGS = new Object[0]; + protected SnowballProgram() { - current = new StringBuffer(); - setCurrent(""); + current = new char[8]; + setCurrent(""); } public abstract boolean stem(); @@ -22,32 +62,65 @@ */ public void setCurrent(String value) { - current.replace(0, current.length(), value); - cursor = 0; - limit = current.length(); - limit_backward = 0; - bra = cursor; - ket = limit; + current = value.toCharArray(); + cursor = 0; + limit = value.length(); + limit_backward = 0; + bra = cursor; + ket = limit; } /** * Get the current string. */ public String getCurrent() { - String result = current.toString(); - // Make a new StringBuffer. If we reuse the old one, and a user of - // the library keeps a reference to the buffer returned (for example, - // by converting it to a String in a way which doesn't force a copy), - // the buffer size will not decrease, and we will risk wasting a large - // amount of memory. - // Thanks to Wolfram Esser for spotting this problem. - current = new StringBuffer(); - return result; + return new String(current, 0, limit); } + + /** + * Set the current string. + * @param text character array containing input + * @param length valid length of text. + */ + public void setCurrent(char text[], int length) { + current = text; + cursor = 0; + limit = length; + limit_backward = 0; + bra = cursor; + ket = limit; + } + /** + * Get the current buffer containing the stem. + *

    + * NOTE: this may be a reference to a different character array than the + * one originally provided with setCurrent, in the exceptional case that + * stemming produced a longer intermediate or result string. + *

    + *

    + * It is necessary to use {@link #getCurrentBufferLength()} to determine + * the valid length of the returned buffer. For example, many words are + * stemmed simply by subtracting from the length to remove suffixes. + *

    + * @see #getCurrentBufferLength() + */ + public char[] getCurrentBuffer() { + return current; + } + + /** + * Get the valid length of the character array in + * {@link #getCurrentBuffer()}. + * @return valid length of the array. + */ + public int getCurrentBufferLength() { + return limit; + } + // current string - protected StringBuffer current; + private char current[]; protected int cursor; protected int limit; @@ -57,347 +130,350 @@ protected void copy_from(SnowballProgram other) { - current = other.current; - cursor = other.cursor; - limit = other.limit; - limit_backward = other.limit_backward; - bra = other.bra; - ket = other.ket; + current = other.current; + cursor = other.cursor; + limit = other.limit; + limit_backward = other.limit_backward; + bra = other.bra; + ket = other.ket; } protected boolean in_grouping(char [] s, int min, int max) { - if (cursor >= limit) return false; - char ch = current.charAt(cursor); - if (ch > max || ch < min) return false; - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false; - cursor++; - return true; + if (cursor >= limit) return false; + char ch = current[cursor]; + if (ch > max || ch < min) return false; + ch -= min; + if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false; + cursor++; + return true; } protected boolean in_grouping_b(char [] s, int min, int max) { - if (cursor <= limit_backward) return false; - char ch = current.charAt(cursor - 1); - if (ch > max || ch < min) return false; - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false; - cursor--; - return true; + if (cursor <= limit_backward) return false; + char ch = current[cursor - 1]; + if (ch > max || ch < min) return false; + ch -= min; + if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) return false; + cursor--; + return true; } protected boolean out_grouping(char [] s, int min, int max) { - if (cursor >= limit) return false; - char ch = current.charAt(cursor); - if (ch > max || ch < min) { - cursor++; - return true; - } - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) { - cursor ++; - return true; - } - return false; + if (cursor >= limit) return false; + char ch = current[cursor]; + if (ch > max || ch < min) { + cursor++; + return true; + } + ch -= min; + if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) { + cursor ++; + return true; + } + return false; } protected boolean out_grouping_b(char [] s, int min, int max) { - if (cursor <= limit_backward) return false; - char ch = current.charAt(cursor - 1); - if (ch > max || ch < min) { - cursor--; - return true; - } - ch -= min; - if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) { - cursor--; - return true; - } - return false; + if (cursor <= limit_backward) return false; + char ch = current[cursor - 1]; + if (ch > max || ch < min) { + cursor--; + return true; + } + ch -= min; + if ((s[ch >> 3] & (0X1 << (ch & 0X7))) == 0) { + cursor--; + return true; + } + return false; } protected boolean in_range(int min, int max) { - if (cursor >= limit) return false; - char ch = current.charAt(cursor); - if (ch > max || ch < min) return false; - cursor++; - return true; + if (cursor >= limit) return false; + char ch = current[cursor]; + if (ch > max || ch < min) return false; + cursor++; + return true; } protected boolean in_range_b(int min, int max) { - if (cursor <= limit_backward) return false; - char ch = current.charAt(cursor - 1); - if (ch > max || ch < min) return false; - cursor--; - return true; + if (cursor <= limit_backward) return false; + char ch = current[cursor - 1]; + if (ch > max || ch < min) return false; + cursor--; + return true; } protected boolean out_range(int min, int max) { - if (cursor >= limit) return false; - char ch = current.charAt(cursor); - if (!(ch > max || ch < min)) return false; - cursor++; - return true; + if (cursor >= limit) return false; + char ch = current[cursor]; + if (!(ch > max || ch < min)) return false; + cursor++; + return true; } protected boolean out_range_b(int min, int max) { - if (cursor <= limit_backward) return false; - char ch = current.charAt(cursor - 1); - if(!(ch > max || ch < min)) return false; - cursor--; - return true; + if (cursor <= limit_backward) return false; + char ch = current[cursor - 1]; + if(!(ch > max || ch < min)) return false; + cursor--; + return true; } - protected boolean eq_s(int s_size, String s) + protected boolean eq_s(int s_size, CharSequence s) { - if (limit - cursor < s_size) return false; - int i; - for (i = 0; i != s_size; i++) { - if (current.charAt(cursor + i) != s.charAt(i)) return false; - } - cursor += s_size; - return true; + if (limit - cursor < s_size) return false; + int i; + for (i = 0; i != s_size; i++) { + if (current[cursor + i] != s.charAt(i)) return false; + } + cursor += s_size; + return true; } - protected boolean eq_s_b(int s_size, String s) + protected boolean eq_s_b(int s_size, CharSequence s) { - if (cursor - limit_backward < s_size) return false; - int i; - for (i = 0; i != s_size; i++) { - if (current.charAt(cursor - s_size + i) != s.charAt(i)) return false; - } - cursor -= s_size; - return true; + if (cursor - limit_backward < s_size) return false; + int i; + for (i = 0; i != s_size; i++) { + if (current[cursor - s_size + i] != s.charAt(i)) return false; + } + cursor -= s_size; + return true; } - protected boolean eq_v(StringBuffer s) + protected boolean eq_v(CharSequence s) { - return eq_s(s.length(), s.toString()); + return eq_s(s.length(), s); } - protected boolean eq_v_b(StringBuffer s) - { return eq_s_b(s.length(), s.toString()); + protected boolean eq_v_b(CharSequence s) + { + return eq_s_b(s.length(), s); } protected int find_among(Among v[], int v_size) { - int i = 0; - int j = v_size; + int i = 0; + int j = v_size; - int c = cursor; - int l = limit; + int c = cursor; + int l = limit; - int common_i = 0; - int common_j = 0; + int common_i = 0; + int common_j = 0; - boolean first_key_inspected = false; + boolean first_key_inspected = false; - while(true) { - int k = i + ((j - i) >> 1); - int diff = 0; - int common = common_i < common_j ? common_i : common_j; // smaller - Among w = v[k]; - int i2; - for (i2 = common; i2 < w.s_size; i2++) { - if (c + common == l) { - diff = -1; - break; - } - diff = current.charAt(c + common) - w.s.charAt(i2); - if (diff != 0) break; - common++; - } - if (diff < 0) { - j = k; - common_j = common; - } else { - i = k; - common_i = common; - } - if (j - i <= 1) { - if (i > 0) break; // v->s has been inspected - if (j == i) break; // only one item in v + while (true) { + int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; // smaller + Among w = v[k]; + int i2; + for (i2 = common; i2 < w.s_size; i2++) { + if (c + common == l) { + diff = -1; + break; + } + diff = current[c + common] - w.s[i2]; + if (diff != 0) break; + common++; + } + if (diff < 0) { + j = k; + common_j = common; + } else { + i = k; + common_i = common; + } + if (j - i <= 1) { + if (i > 0) break; // v->s has been inspected + if (j == i) break; // only one item in v - // - but now we need to go round once more to get - // v->s inspected. This looks messy, but is actually - // the optimal approach. + // - but now we need to go round once more to get + // v->s inspected. This looks messy, but is actually + // the optimal approach. - if (first_key_inspected) break; - first_key_inspected = true; - } - } - while(true) { - Among w = v[i]; - if (common_i >= w.s_size) { - cursor = c + w.s_size; - if (w.method == null) return w.result; - boolean res; - try { - Object resobj = w.method.invoke(w.methodobject, - new Object[0]); - res = resobj.toString().equals("true"); - } catch (InvocationTargetException e) { - res = false; - // FIXME - debug message - } catch (IllegalAccessException e) { - res = false; - // FIXME - debug message - } - cursor = c + w.s_size; - if (res) return w.result; - } - i = w.substring_i; - if (i < 0) return 0; - } + if (first_key_inspected) break; + first_key_inspected = true; + } + } + while (true) { + Among w = v[i]; + if (common_i >= w.s_size) { + cursor = c + w.s_size; + if (w.method == null) return w.result; + boolean res; + try { + Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS); + res = resobj.toString().equals("true"); + } catch (InvocationTargetException e) { + res = false; + // FIXME - debug message + } catch (IllegalAccessException e) { + res = false; + // FIXME - debug message + } + cursor = c + w.s_size; + if (res) return w.result; + } + i = w.substring_i; + if (i < 0) return 0; + } } - // find_among_b is for backwards processing. Same comments apply + // find_among_b is for backwards processing. Same comments apply protected int find_among_b(Among v[], int v_size) { - int i = 0; - int j = v_size; + int i = 0; + int j = v_size; - int c = cursor; - int lb = limit_backward; + int c = cursor; + int lb = limit_backward; - int common_i = 0; - int common_j = 0; + int common_i = 0; + int common_j = 0; - boolean first_key_inspected = false; + boolean first_key_inspected = false; - while(true) { - int k = i + ((j - i) >> 1); - int diff = 0; - int common = common_i < common_j ? common_i : common_j; - Among w = v[k]; - int i2; - for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) { - if (c - common == lb) { - diff = -1; - break; - } - diff = current.charAt(c - 1 - common) - w.s.charAt(i2); - if (diff != 0) break; - common++; - } - if (diff < 0) { - j = k; - common_j = common; - } else { - i = k; - common_i = common; - } - if (j - i <= 1) { - if (i > 0) break; - if (j == i) break; - if (first_key_inspected) break; - first_key_inspected = true; - } - } - while(true) { - Among w = v[i]; - if (common_i >= w.s_size) { - cursor = c - w.s_size; - if (w.method == null) return w.result; + while (true) { + int k = i + ((j - i) >> 1); + int diff = 0; + int common = common_i < common_j ? common_i : common_j; + Among w = v[k]; + int i2; + for (i2 = w.s_size - 1 - common; i2 >= 0; i2--) { + if (c - common == lb) { + diff = -1; + break; + } + diff = current[c - 1 - common] - w.s[i2]; + if (diff != 0) break; + common++; + } + if (diff < 0) { + j = k; + common_j = common; + } else { + i = k; + common_i = common; + } + if (j - i <= 1) { + if (i > 0) break; + if (j == i) break; + if (first_key_inspected) break; + first_key_inspected = true; + } + } + while (true) { + Among w = v[i]; + if (common_i >= w.s_size) { + cursor = c - w.s_size; + if (w.method == null) return w.result; - boolean res; - try { - Object resobj = w.method.invoke(w.methodobject, - new Object[0]); - res = resobj.toString().equals("true"); - } catch (InvocationTargetException e) { - res = false; - // FIXME - debug message - } catch (IllegalAccessException e) { - res = false; - // FIXME - debug message - } - cursor = c - w.s_size; - if (res) return w.result; - } - i = w.substring_i; - if (i < 0) return 0; - } + boolean res; + try { + Object resobj = w.method.invoke(w.methodobject, EMPTY_ARGS); + res = resobj.toString().equals("true"); + } catch (InvocationTargetException e) { + res = false; + // FIXME - debug message + } catch (IllegalAccessException e) { + res = false; + // FIXME - debug message + } + cursor = c - w.s_size; + if (res) return w.result; + } + i = w.substring_i; + if (i < 0) return 0; + } } - /* to replace chars between c_bra and c_ket in current by the + /* to replace chars between c_bra and c_ket in current by the * chars in s. */ - protected int replace_s(int c_bra, int c_ket, String s) - { - int adjustment = s.length() - (c_ket - c_bra); - current.replace(c_bra, c_ket, s); - limit += adjustment; - if (cursor >= c_ket) cursor += adjustment; - else if (cursor > c_bra) cursor = c_bra; - return adjustment; + protected int replace_s(int c_bra, int c_ket, CharSequence s) { + final int adjustment = s.length() - (c_ket - c_bra); + final int newLength = limit + adjustment; + //resize if necessary + if (newLength > current.length) { + char newBuffer[] = new char[ArrayUtil.oversize(newLength, RamUsageEstimator.NUM_BYTES_CHAR)]; + System.arraycopy(current, 0, newBuffer, 0, limit); + current = newBuffer; } - - protected void slice_check() - { - if (bra < 0 || - bra > ket || - ket > limit || - limit > current.length()) // this line could be removed - { - System.err.println("faulty slice operation"); - // FIXME: report error somehow. - /* - fprintf(stderr, "faulty slice operation:\n"); - debug(z, -1, 0); - exit(1); - */ - } + // if the substring being replaced is longer or shorter than the + // replacement, need to shift things around + if (adjustment != 0 && c_ket < limit) { + System.arraycopy(current, c_ket, current, c_bra + s.length(), + limit - c_ket); } + // insert the replacement text + // Note, faster is s.getChars(0, s.length(), current, c_bra); + // but would have to duplicate this method for both String and StringBuilder + for (int i = 0; i < s.length(); i++) + current[c_bra + i] = s.charAt(i); - protected void slice_from(String s) - { - slice_check(); - replace_s(bra, ket, s); - } + limit += adjustment; + if (cursor >= c_ket) cursor += adjustment; + else if (cursor > c_bra) cursor = c_bra; + return adjustment; + } - protected void slice_from(StringBuffer s) - { - slice_from(s.toString()); + protected void slice_check() { + if (bra < 0 || + bra > ket || + ket > limit) { + throw new IllegalArgumentException("faulty slice operation: bra=" + bra + ",ket=" + ket + ",limit=" + limit); + // FIXME: report error somehow. + /* + fprintf(stderr, "faulty slice operation:\n"); + debug(z, -1, 0); + exit(1); + */ } + } - protected void slice_del() - { - slice_from(""); - } + protected void slice_from(CharSequence s) { + slice_check(); + replace_s(bra, ket, s); + } - protected void insert(int c_bra, int c_ket, String s) - { - int adjustment = replace_s(c_bra, c_ket, s); - if (c_bra <= bra) bra += adjustment; - if (c_bra <= ket) ket += adjustment; - } + protected void slice_del() { + slice_from((CharSequence) ""); + } - protected void insert(int c_bra, int c_ket, StringBuffer s) + protected void insert(int c_bra, int c_ket, CharSequence s) { - insert(c_bra, c_ket, s.toString()); + int adjustment = replace_s(c_bra, c_ket, s); + if (c_bra <= bra) bra += adjustment; + if (c_bra <= ket) ket += adjustment; } /* Copy the slice into the supplied StringBuffer */ - protected StringBuffer slice_to(StringBuffer s) + protected StringBuilder slice_to(StringBuilder s) { - slice_check(); - int len = ket - bra; - s.replace(0, s.length(), current.substring(bra, ket)); - return s; + slice_check(); + int len = ket - bra; + s.setLength(0); + s.append(current, bra, len); + return s; } - protected StringBuffer assign_to(StringBuffer s) + protected StringBuilder assign_to(StringBuilder s) { - s.replace(0, s.length(), current.substring(0, limit)); - return s; + s.setLength(0); + s.append(current, 0, limit); + return s; } /* Fisheye: Tag 1.1.2.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/TestApp.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/package.html'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/ext/ArmenianStemmer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/ext/BasqueStemmer.java'. Fisheye: No comparison available. Pass `N' to diff? Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/ext/CatalanStemmer.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/DanishStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/DanishStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/DanishStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/DanishStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,423 +1,442 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class DanishStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "hed", -1, 1, "", this), - new Among ( "ethed", 0, 1, "", this), - new Among ( "ered", -1, 1, "", this), - new Among ( "e", -1, 1, "", this), - new Among ( "erede", 3, 1, "", this), - new Among ( "ende", 3, 1, "", this), - new Among ( "erende", 5, 1, "", this), - new Among ( "ene", 3, 1, "", this), - new Among ( "erne", 3, 1, "", this), - new Among ( "ere", 3, 1, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "heden", 10, 1, "", this), - new Among ( "eren", 10, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "heder", 13, 1, "", this), - new Among ( "erer", 13, 1, "", this), - new Among ( "s", -1, 2, "", this), - new Among ( "heds", 16, 1, "", this), - new Among ( "es", 16, 1, "", this), - new Among ( "endes", 18, 1, "", this), - new Among ( "erendes", 19, 1, "", this), - new Among ( "enes", 18, 1, "", this), - new Among ( "ernes", 18, 1, "", this), - new Among ( "eres", 18, 1, "", this), - new Among ( "ens", 16, 1, "", this), - new Among ( "hedens", 24, 1, "", this), - new Among ( "erens", 24, 1, "", this), - new Among ( "ers", 16, 1, "", this), - new Among ( "ets", 16, 1, "", this), - new Among ( "erets", 28, 1, "", this), - new Among ( "et", -1, 1, "", this), - new Among ( "eret", 30, 1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "gd", -1, -1, "", this), - new Among ( "dt", -1, -1, "", this), - new Among ( "gt", -1, -1, "", this), - new Among ( "kt", -1, -1, "", this) - }; + private final static DanishStemmer methodObject = new DanishStemmer (); - private Among a_2[] = { - new Among ( "ig", -1, 1, "", this), - new Among ( "lig", 0, 1, "", this), - new Among ( "elig", 1, 1, "", this), - new Among ( "els", -1, 1, "", this), - new Among ( "l\u00F8st", -1, 2, "", this) - }; + private final static Among a_0[] = { + new Among ( "hed", -1, 1, "", methodObject ), + new Among ( "ethed", 0, 1, "", methodObject ), + new Among ( "ered", -1, 1, "", methodObject ), + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "erede", 3, 1, "", methodObject ), + new Among ( "ende", 3, 1, "", methodObject ), + new Among ( "erende", 5, 1, "", methodObject ), + new Among ( "ene", 3, 1, "", methodObject ), + new Among ( "erne", 3, 1, "", methodObject ), + new Among ( "ere", 3, 1, "", methodObject ), + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "heden", 10, 1, "", methodObject ), + new Among ( "eren", 10, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "heder", 13, 1, "", methodObject ), + new Among ( "erer", 13, 1, "", methodObject ), + new Among ( "s", -1, 2, "", methodObject ), + new Among ( "heds", 16, 1, "", methodObject ), + new Among ( "es", 16, 1, "", methodObject ), + new Among ( "endes", 18, 1, "", methodObject ), + new Among ( "erendes", 19, 1, "", methodObject ), + new Among ( "enes", 18, 1, "", methodObject ), + new Among ( "ernes", 18, 1, "", methodObject ), + new Among ( "eres", 18, 1, "", methodObject ), + new Among ( "ens", 16, 1, "", methodObject ), + new Among ( "hedens", 24, 1, "", methodObject ), + new Among ( "erens", 24, 1, "", methodObject ), + new Among ( "ers", 16, 1, "", methodObject ), + new Among ( "ets", 16, 1, "", methodObject ), + new Among ( "erets", 28, 1, "", methodObject ), + new Among ( "et", -1, 1, "", methodObject ), + new Among ( "eret", 30, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + private final static Among a_1[] = { + new Among ( "gd", -1, -1, "", methodObject ), + new Among ( "dt", -1, -1, "", methodObject ), + new Among ( "gt", -1, -1, "", methodObject ), + new Among ( "kt", -1, -1, "", methodObject ) + }; - private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; + private final static Among a_2[] = { + new Among ( "ig", -1, 1, "", methodObject ), + new Among ( "lig", 0, 1, "", methodObject ), + new Among ( "elig", 1, 1, "", methodObject ), + new Among ( "els", -1, 1, "", methodObject ), + new Among ( "l\u00F8st", -1, 2, "", methodObject ) + }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + + private static final char g_s_ending[] = {239, 254, 42, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16 }; + private int I_x; private int I_p1; - private StringBuffer S_ch = new StringBuffer(); + private java.lang.StringBuilder S_ch = new java.lang.StringBuilder(); - private void copy_from(DanishStemmer other) { - I_x = other.I_x; - I_p1 = other.I_p1; - S_ch = other.S_ch; - super.copy_from(other); - } + private void copy_from(DanishStemmer other) { + I_x = other.I_x; + I_p1 = other.I_p1; + S_ch = other.S_ch; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; - // (, line 29 - I_p1 = limit; - // test, line 33 - v_1 = cursor; - // (, line 33 - // hop, line 33 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - // setmark x, line 33 - I_x = cursor; - cursor = v_1; - // goto, line 34 - golab0: while(true) - { - v_2 = cursor; - lab1: do { - if (!(in_grouping(g_v, 97, 248))) + // (, line 29 + I_p1 = limit; + // test, line 33 + v_1 = cursor; + // (, line 33 + // hop, line 33 { - break lab1; + int c = cursor + 3; + if (0 > c || c > limit) + { + return false; + } + cursor = c; } - cursor = v_2; - break golab0; - } while (false); - cursor = v_2; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 34 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 248))) + // setmark x, line 33 + I_x = cursor; + cursor = v_1; + // goto, line 34 + golab0: while(true) { - break lab3; + v_2 = cursor; + lab1: do { + if (!(in_grouping(g_v, 97, 248))) + { + break lab1; + } + cursor = v_2; + break golab0; + } while (false); + cursor = v_2; + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; + // gopast, line 34 + golab2: while(true) + { + lab3: do { + if (!(out_grouping(g_v, 97, 248))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // setmark p1, line 34 + I_p1 = cursor; + // try, line 35 + lab4: do { + // (, line 35 + if (!(I_p1 < I_x)) + { + break lab4; + } + I_p1 = I_x; + } while (false); + return true; } - cursor++; - } - // setmark p1, line 34 - I_p1 = cursor; - // try, line 35 - lab4: do { - // (, line 35 - if (!(I_p1 < I_x)) - { - break lab4; - } - I_p1 = I_x; - } while (false); - return true; - } - private boolean r_main_suffix() { + private boolean r_main_suffix() { int among_var; int v_1; int v_2; - // (, line 40 - // setlimit, line 41 - v_1 = limit - cursor; - // tomark, line 41 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 41 - // [, line 41 - ket = cursor; - // substring, line 41 - among_var = find_among_b(a_0, 32); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 41 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 48 - // delete, line 48 - slice_del(); - break; - case 2: - // (, line 50 - if (!(in_grouping_b(g_s_ending, 97, 229))) + // (, line 40 + // setlimit, line 41 + v_1 = limit - cursor; + // tomark, line 41 + if (cursor < I_p1) { return false; } - // delete, line 50 - slice_del(); - break; - } - return true; - } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 41 + // [, line 41 + ket = cursor; + // substring, line 41 + among_var = find_among_b(a_0, 32); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 41 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 48 + // delete, line 48 + slice_del(); + break; + case 2: + // (, line 50 + if (!(in_grouping_b(g_s_ending, 97, 229))) + { + return false; + } + // delete, line 50 + slice_del(); + break; + } + return true; + } - private boolean r_consonant_pair() { + private boolean r_consonant_pair() { int v_1; int v_2; int v_3; - // (, line 54 - // test, line 55 - v_1 = limit - cursor; - // (, line 55 - // setlimit, line 56 - v_2 = limit - cursor; - // tomark, line 56 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_3 = limit_backward; - limit_backward = cursor; - cursor = limit - v_2; - // (, line 56 - // [, line 56 - ket = cursor; - // substring, line 56 - if (find_among_b(a_1, 4) == 0) - { - limit_backward = v_3; - return false; - } - // ], line 56 - bra = cursor; - limit_backward = v_3; - cursor = limit - v_1; - // next, line 62 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 62 - bra = cursor; - // delete, line 62 - slice_del(); - return true; - } + // (, line 54 + // test, line 55 + v_1 = limit - cursor; + // (, line 55 + // setlimit, line 56 + v_2 = limit - cursor; + // tomark, line 56 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_3 = limit_backward; + limit_backward = cursor; + cursor = limit - v_2; + // (, line 56 + // [, line 56 + ket = cursor; + // substring, line 56 + if (find_among_b(a_1, 4) == 0) + { + limit_backward = v_3; + return false; + } + // ], line 56 + bra = cursor; + limit_backward = v_3; + cursor = limit - v_1; + // next, line 62 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // ], line 62 + bra = cursor; + // delete, line 62 + slice_del(); + return true; + } - private boolean r_other_suffix() { + private boolean r_other_suffix() { int among_var; int v_1; int v_2; int v_3; int v_4; - // (, line 65 - // do, line 66 - v_1 = limit - cursor; - lab0: do { - // (, line 66 - // [, line 66 - ket = cursor; - // literal, line 66 - if (!(eq_s_b(2, "st"))) - { - break lab0; - } - // ], line 66 - bra = cursor; - // literal, line 66 - if (!(eq_s_b(2, "ig"))) - { - break lab0; - } - // delete, line 66 - slice_del(); - } while (false); - cursor = limit - v_1; - // setlimit, line 67 - v_2 = limit - cursor; - // tomark, line 67 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_3 = limit_backward; - limit_backward = cursor; - cursor = limit - v_2; - // (, line 67 - // [, line 67 - ket = cursor; - // substring, line 67 - among_var = find_among_b(a_2, 5); - if (among_var == 0) - { - limit_backward = v_3; - return false; - } - // ], line 67 - bra = cursor; - limit_backward = v_3; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 70 - // delete, line 70 - slice_del(); - // do, line 70 - v_4 = limit - cursor; - lab1: do { - // call consonant_pair, line 70 - if (!r_consonant_pair()) + // (, line 65 + // do, line 66 + v_1 = limit - cursor; + lab0: do { + // (, line 66 + // [, line 66 + ket = cursor; + // literal, line 66 + if (!(eq_s_b(2, "st"))) { - break lab1; + break lab0; } + // ], line 66 + bra = cursor; + // literal, line 66 + if (!(eq_s_b(2, "ig"))) + { + break lab0; + } + // delete, line 66 + slice_del(); } while (false); - cursor = limit - v_4; - break; - case 2: - // (, line 72 - // <-, line 72 - slice_from("l\u00F8s"); - break; - } - return true; - } + cursor = limit - v_1; + // setlimit, line 67 + v_2 = limit - cursor; + // tomark, line 67 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_3 = limit_backward; + limit_backward = cursor; + cursor = limit - v_2; + // (, line 67 + // [, line 67 + ket = cursor; + // substring, line 67 + among_var = find_among_b(a_2, 5); + if (among_var == 0) + { + limit_backward = v_3; + return false; + } + // ], line 67 + bra = cursor; + limit_backward = v_3; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 70 + // delete, line 70 + slice_del(); + // do, line 70 + v_4 = limit - cursor; + lab1: do { + // call consonant_pair, line 70 + if (!r_consonant_pair()) + { + break lab1; + } + } while (false); + cursor = limit - v_4; + break; + case 2: + // (, line 72 + // <-, line 72 + slice_from("l\u00F8s"); + break; + } + return true; + } - private boolean r_undouble() { + private boolean r_undouble() { int v_1; int v_2; - // (, line 75 - // setlimit, line 76 - v_1 = limit - cursor; - // tomark, line 76 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 76 - // [, line 76 - ket = cursor; - if (!(out_grouping_b(g_v, 97, 248))) - { - limit_backward = v_2; - return false; - } - // ], line 76 - bra = cursor; - // -> ch, line 76 - S_ch = slice_to(S_ch); - limit_backward = v_2; - // name ch, line 77 - if (!(eq_v_b(S_ch))) - { - return false; - } - // delete, line 78 - slice_del(); - return true; - } + // (, line 75 + // setlimit, line 76 + v_1 = limit - cursor; + // tomark, line 76 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 76 + // [, line 76 + ket = cursor; + if (!(out_grouping_b(g_v, 97, 248))) + { + limit_backward = v_2; + return false; + } + // ], line 76 + bra = cursor; + // -> ch, line 76 + S_ch = slice_to(S_ch); + limit_backward = v_2; + // name ch, line 77 + if (!(eq_v_b(S_ch))) + { + return false; + } + // delete, line 78 + slice_del(); + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 82 - // do, line 84 - v_1 = cursor; - lab0: do { - // call mark_regions, line 84 - if (!r_mark_regions()) - { - break lab0; + // (, line 82 + // do, line 84 + v_1 = cursor; + lab0: do { + // call mark_regions, line 84 + if (!r_mark_regions()) + { + break lab0; + } + } while (false); + cursor = v_1; + // backwards, line 85 + limit_backward = cursor; cursor = limit; + // (, line 85 + // do, line 86 + v_2 = limit - cursor; + lab1: do { + // call main_suffix, line 86 + if (!r_main_suffix()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 87 + v_3 = limit - cursor; + lab2: do { + // call consonant_pair, line 87 + if (!r_consonant_pair()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 88 + v_4 = limit - cursor; + lab3: do { + // call other_suffix, line 88 + if (!r_other_suffix()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // do, line 89 + v_5 = limit - cursor; + lab4: do { + // call undouble, line 89 + if (!r_undouble()) + { + break lab4; + } + } while (false); + cursor = limit - v_5; + cursor = limit_backward; return true; } - } while (false); - cursor = v_1; - // backwards, line 85 - limit_backward = cursor; cursor = limit; - // (, line 85 - // do, line 86 - v_2 = limit - cursor; - lab1: do { - // call main_suffix, line 86 - if (!r_main_suffix()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 87 - v_3 = limit - cursor; - lab2: do { - // call consonant_pair, line 87 - if (!r_consonant_pair()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 88 - v_4 = limit - cursor; - lab3: do { - // call other_suffix, line 88 - if (!r_other_suffix()) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - // do, line 89 - v_5 = limit - cursor; - lab4: do { - // call undouble, line 89 - if (!r_undouble()) - { - break lab4; - } - } while (false); - cursor = limit - v_5; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof DanishStemmer; } -} + @Override + public int hashCode() { + return DanishStemmer.class.getName().hashCode(); + } + + +} Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/DutchStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/DutchStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/DutchStemmer.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/DutchStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,490 +1,497 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class DutchStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 6, "", this), - new Among ( "\u00E1", 0, 1, "", this), - new Among ( "\u00E4", 0, 1, "", this), - new Among ( "\u00E9", 0, 2, "", this), - new Among ( "\u00EB", 0, 2, "", this), - new Among ( "\u00ED", 0, 3, "", this), - new Among ( "\u00EF", 0, 3, "", this), - new Among ( "\u00F3", 0, 4, "", this), - new Among ( "\u00F6", 0, 4, "", this), - new Among ( "\u00FA", 0, 5, "", this), - new Among ( "\u00FC", 0, 5, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "", -1, 3, "", this), - new Among ( "I", 0, 2, "", this), - new Among ( "Y", 0, 1, "", this) - }; + private final static DutchStemmer methodObject = new DutchStemmer (); - private Among a_2[] = { - new Among ( "dd", -1, -1, "", this), - new Among ( "kk", -1, -1, "", this), - new Among ( "tt", -1, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 6, "", methodObject ), + new Among ( "\u00E1", 0, 1, "", methodObject ), + new Among ( "\u00E4", 0, 1, "", methodObject ), + new Among ( "\u00E9", 0, 2, "", methodObject ), + new Among ( "\u00EB", 0, 2, "", methodObject ), + new Among ( "\u00ED", 0, 3, "", methodObject ), + new Among ( "\u00EF", 0, 3, "", methodObject ), + new Among ( "\u00F3", 0, 4, "", methodObject ), + new Among ( "\u00F6", 0, 4, "", methodObject ), + new Among ( "\u00FA", 0, 5, "", methodObject ), + new Among ( "\u00FC", 0, 5, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ene", -1, 2, "", this), - new Among ( "se", -1, 3, "", this), - new Among ( "en", -1, 2, "", this), - new Among ( "heden", 2, 1, "", this), - new Among ( "s", -1, 3, "", this) - }; + private final static Among a_1[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "I", 0, 2, "", methodObject ), + new Among ( "Y", 0, 1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "end", -1, 1, "", this), - new Among ( "ig", -1, 2, "", this), - new Among ( "ing", -1, 1, "", this), - new Among ( "lijk", -1, 3, "", this), - new Among ( "baar", -1, 4, "", this), - new Among ( "bar", -1, 5, "", this) - }; + private final static Among a_2[] = { + new Among ( "dd", -1, -1, "", methodObject ), + new Among ( "kk", -1, -1, "", methodObject ), + new Among ( "tt", -1, -1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "aa", -1, -1, "", this), - new Among ( "ee", -1, -1, "", this), - new Among ( "oo", -1, -1, "", this), - new Among ( "uu", -1, -1, "", this) - }; + private final static Among a_3[] = { + new Among ( "ene", -1, 2, "", methodObject ), + new Among ( "se", -1, 3, "", methodObject ), + new Among ( "en", -1, 2, "", methodObject ), + new Among ( "heden", 2, 1, "", methodObject ), + new Among ( "s", -1, 3, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private final static Among a_4[] = { + new Among ( "end", -1, 1, "", methodObject ), + new Among ( "ig", -1, 2, "", methodObject ), + new Among ( "ing", -1, 1, "", methodObject ), + new Among ( "lijk", -1, 3, "", methodObject ), + new Among ( "baar", -1, 4, "", methodObject ), + new Among ( "bar", -1, 5, "", methodObject ) + }; - private static final char g_v_I[] = {1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private final static Among a_5[] = { + new Among ( "aa", -1, -1, "", methodObject ), + new Among ( "ee", -1, -1, "", methodObject ), + new Among ( "oo", -1, -1, "", methodObject ), + new Among ( "uu", -1, -1, "", methodObject ) + }; - private static final char g_v_j[] = {17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private static final char g_v_I[] = {1, 0, 0, 17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + + private static final char g_v_j[] = {17, 67, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private int I_p2; private int I_p1; private boolean B_e_found; - private void copy_from(DutchStemmer other) { - I_p2 = other.I_p2; - I_p1 = other.I_p1; - B_e_found = other.B_e_found; - super.copy_from(other); - } + private void copy_from(DutchStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + B_e_found = other.B_e_found; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int among_var; int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; - // (, line 41 - // test, line 42 - v_1 = cursor; - // repeat, line 42 - replab0: while(true) - { - v_2 = cursor; - lab1: do { - // (, line 42 - // [, line 43 - bra = cursor; - // substring, line 43 - among_var = find_among(a_0, 11); - if (among_var == 0) + // (, line 41 + // test, line 42 + v_1 = cursor; + // repeat, line 42 + replab0: while(true) { - break lab1; - } - // ], line 43 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 45 - // <-, line 45 - slice_from("a"); - break; - case 2: - // (, line 47 - // <-, line 47 - slice_from("e"); - break; - case 3: - // (, line 49 - // <-, line 49 - slice_from("i"); - break; - case 4: - // (, line 51 - // <-, line 51 - slice_from("o"); - break; - case 5: - // (, line 53 - // <-, line 53 - slice_from("u"); - break; - case 6: - // (, line 54 - // next, line 54 - if (cursor >= limit) + v_2 = cursor; + lab1: do { + // (, line 42 + // [, line 43 + bra = cursor; + // substring, line 43 + among_var = find_among(a_0, 11); + if (among_var == 0) { break lab1; } - cursor++; - break; + // ], line 43 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 45 + // <-, line 45 + slice_from("a"); + break; + case 2: + // (, line 47 + // <-, line 47 + slice_from("e"); + break; + case 3: + // (, line 49 + // <-, line 49 + slice_from("i"); + break; + case 4: + // (, line 51 + // <-, line 51 + slice_from("o"); + break; + case 5: + // (, line 53 + // <-, line 53 + slice_from("u"); + break; + case 6: + // (, line 54 + // next, line 54 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_2; + break replab0; } - continue replab0; - } while (false); - cursor = v_2; - break replab0; - } - cursor = v_1; - // try, line 57 - v_3 = cursor; - lab2: do { - // (, line 57 - // [, line 57 - bra = cursor; - // literal, line 57 - if (!(eq_s(1, "y"))) - { - cursor = v_3; - break lab2; - } - // ], line 57 - ket = cursor; - // <-, line 57 - slice_from("Y"); - } while (false); - // repeat, line 58 - replab3: while(true) - { - v_4 = cursor; - lab4: do { - // goto, line 58 - golab5: while(true) + cursor = v_1; + // try, line 57 + v_3 = cursor; + lab2: do { + // (, line 57 + // [, line 57 + bra = cursor; + // literal, line 57 + if (!(eq_s(1, "y"))) + { + cursor = v_3; + break lab2; + } + // ], line 57 + ket = cursor; + // <-, line 57 + slice_from("Y"); + } while (false); + // repeat, line 58 + replab3: while(true) { - v_5 = cursor; - lab6: do { - // (, line 58 - if (!(in_grouping(g_v, 97, 232))) + v_4 = cursor; + lab4: do { + // goto, line 58 + golab5: while(true) { - break lab6; - } - // [, line 59 - bra = cursor; - // or, line 59 - lab7: do { - v_6 = cursor; - lab8: do { - // (, line 59 - // literal, line 59 - if (!(eq_s(1, "i"))) - { - break lab8; - } - // ], line 59 - ket = cursor; + v_5 = cursor; + lab6: do { + // (, line 58 if (!(in_grouping(g_v, 97, 232))) { - break lab8; + break lab6; } - // <-, line 59 - slice_from("I"); - break lab7; + // [, line 59 + bra = cursor; + // or, line 59 + lab7: do { + v_6 = cursor; + lab8: do { + // (, line 59 + // literal, line 59 + if (!(eq_s(1, "i"))) + { + break lab8; + } + // ], line 59 + ket = cursor; + if (!(in_grouping(g_v, 97, 232))) + { + break lab8; + } + // <-, line 59 + slice_from("I"); + break lab7; + } while (false); + cursor = v_6; + // (, line 60 + // literal, line 60 + if (!(eq_s(1, "y"))) + { + break lab6; + } + // ], line 60 + ket = cursor; + // <-, line 60 + slice_from("Y"); + } while (false); + cursor = v_5; + break golab5; } while (false); - cursor = v_6; - // (, line 60 - // literal, line 60 - if (!(eq_s(1, "y"))) + cursor = v_5; + if (cursor >= limit) { - break lab6; + break lab4; } - // ], line 60 - ket = cursor; - // <-, line 60 - slice_from("Y"); - } while (false); - cursor = v_5; - break golab5; + cursor++; + } + continue replab3; } while (false); - cursor = v_5; + cursor = v_4; + break replab3; + } + return true; + } + + private boolean r_mark_regions() { + // (, line 64 + I_p1 = limit; + I_p2 = limit; + // gopast, line 69 + golab0: while(true) + { + lab1: do { + if (!(in_grouping(g_v, 97, 232))) + { + break lab1; + } + break golab0; + } while (false); if (cursor >= limit) { + return false; + } + cursor++; + } + // gopast, line 69 + golab2: while(true) + { + lab3: do { + if (!(out_grouping(g_v, 97, 232))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // setmark p1, line 69 + I_p1 = cursor; + // try, line 70 + lab4: do { + // (, line 70 + if (!(I_p1 < 3)) + { break lab4; } + I_p1 = 3; + } while (false); + // gopast, line 71 + golab5: while(true) + { + lab6: do { + if (!(in_grouping(g_v, 97, 232))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + return false; + } cursor++; } - continue replab3; - } while (false); - cursor = v_4; - break replab3; - } - return true; - } - - private boolean r_mark_regions() { - // (, line 64 - I_p1 = limit; - I_p2 = limit; - // gopast, line 69 - golab0: while(true) - { - lab1: do { - if (!(in_grouping(g_v, 97, 232))) + // gopast, line 71 + golab7: while(true) { - break lab1; + lab8: do { + if (!(out_grouping(g_v, 97, 232))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab0; - } while (false); - if (cursor >= limit) - { - return false; + // setmark p2, line 71 + I_p2 = cursor; + return true; } - cursor++; - } - // gopast, line 69 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 232))) + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 75 + replab0: while(true) { - break lab3; + v_1 = cursor; + lab1: do { + // (, line 75 + // [, line 77 + bra = cursor; + // substring, line 77 + among_var = find_among(a_1, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 77 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 78 + // <-, line 78 + slice_from("y"); + break; + case 2: + // (, line 79 + // <-, line 79 + slice_from("i"); + break; + case 3: + // (, line 80 + // next, line 80 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; + return true; } - cursor++; - } - // setmark p1, line 69 - I_p1 = cursor; - // try, line 70 - lab4: do { - // (, line 70 - if (!(I_p1 < 3)) - { - break lab4; - } - I_p1 = 3; - } while (false); - // gopast, line 71 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 232))) + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab6; + return false; } - break golab5; - } while (false); - if (cursor >= limit) - { - return false; + return true; } - cursor++; - } - // gopast, line 71 - golab7: while(true) - { - lab8: do { - if (!(out_grouping(g_v, 97, 232))) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab8; + return false; } - break golab7; - } while (false); - if (cursor >= limit) - { - return false; + return true; } - cursor++; - } - // setmark p2, line 71 - I_p2 = cursor; - return true; - } - private boolean r_postlude() { - int among_var; + private boolean r_undouble() { int v_1; - // repeat, line 75 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 75 - // [, line 77 - bra = cursor; - // substring, line 77 - among_var = find_among(a_1, 3); - if (among_var == 0) + // (, line 90 + // test, line 91 + v_1 = limit - cursor; + // among, line 91 + if (find_among_b(a_2, 3) == 0) { - break lab1; + return false; } - // ], line 77 + cursor = limit - v_1; + // [, line 91 ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 78 - // <-, line 78 - slice_from("y"); - break; - case 2: - // (, line 79 - // <-, line 79 - slice_from("i"); - break; - case 3: - // (, line 80 - // next, line 80 - if (cursor >= limit) - { - break lab1; - } - cursor++; - break; + // next, line 91 + if (cursor <= limit_backward) + { + return false; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + cursor--; + // ], line 91 + bra = cursor; + // delete, line 91 + slice_del(); + return true; + } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_undouble() { + private boolean r_e_ending() { int v_1; - // (, line 90 - // test, line 91 - v_1 = limit - cursor; - // among, line 91 - if (find_among_b(a_2, 3) == 0) - { - return false; - } - cursor = limit - v_1; - // [, line 91 - ket = cursor; - // next, line 91 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 91 - bra = cursor; - // delete, line 91 - slice_del(); - return true; - } + // (, line 94 + // unset e_found, line 95 + B_e_found = false; + // [, line 96 + ket = cursor; + // literal, line 96 + if (!(eq_s_b(1, "e"))) + { + return false; + } + // ], line 96 + bra = cursor; + // call R1, line 96 + if (!r_R1()) + { + return false; + } + // test, line 96 + v_1 = limit - cursor; + if (!(out_grouping_b(g_v, 97, 232))) + { + return false; + } + cursor = limit - v_1; + // delete, line 96 + slice_del(); + // set e_found, line 97 + B_e_found = true; + // call undouble, line 98 + if (!r_undouble()) + { + return false; + } + return true; + } - private boolean r_e_ending() { + private boolean r_en_ending() { int v_1; - // (, line 94 - // unset e_found, line 95 - B_e_found = false; - // [, line 96 - ket = cursor; - // literal, line 96 - if (!(eq_s_b(1, "e"))) - { - return false; - } - // ], line 96 - bra = cursor; - // call R1, line 96 - if (!r_R1()) - { - return false; - } - // test, line 96 - v_1 = limit - cursor; - if (!(out_grouping_b(g_v, 97, 232))) - { - return false; - } - cursor = limit - v_1; - // delete, line 96 - slice_del(); - // set e_found, line 97 - B_e_found = true; - // call undouble, line 98 - if (!r_undouble()) - { - return false; - } - return true; - } - - private boolean r_en_ending() { - int v_1; int v_2; - // (, line 101 - // call R1, line 102 - if (!r_R1()) - { - return false; - } - // and, line 102 - v_1 = limit - cursor; - if (!(out_grouping_b(g_v, 97, 232))) - { - return false; - } - cursor = limit - v_1; - // not, line 102 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 102 - if (!(eq_s_b(3, "gem"))) + // (, line 101 + // call R1, line 102 + if (!r_R1()) { - break lab0; + return false; } - return false; - } while (false); - cursor = limit - v_2; - } - // delete, line 102 - slice_del(); - // call undouble, line 103 - if (!r_undouble()) - { - return false; - } - return true; - } + // and, line 102 + v_1 = limit - cursor; + if (!(out_grouping_b(g_v, 97, 232))) + { + return false; + } + cursor = limit - v_1; + // not, line 102 + { + v_2 = limit - cursor; + lab0: do { + // literal, line 102 + if (!(eq_s_b(3, "gem"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; + } + // delete, line 102 + slice_del(); + // call undouble, line 103 + if (!r_undouble()) + { + return false; + } + return true; + } - private boolean r_standard_suffix() { + private boolean r_standard_suffix() { int among_var; int v_1; int v_2; @@ -496,342 +503,355 @@ int v_8; int v_9; int v_10; - // (, line 106 - // do, line 107 - v_1 = limit - cursor; - lab0: do { - // (, line 107 - // [, line 108 - ket = cursor; - // substring, line 108 - among_var = find_among_b(a_3, 5); - if (among_var == 0) - { - break lab0; - } - // ], line 108 - bra = cursor; - switch(among_var) { - case 0: - break lab0; - case 1: - // (, line 110 - // call R1, line 110 - if (!r_R1()) + // (, line 106 + // do, line 107 + v_1 = limit - cursor; + lab0: do { + // (, line 107 + // [, line 108 + ket = cursor; + // substring, line 108 + among_var = find_among_b(a_3, 5); + if (among_var == 0) { break lab0; } - // <-, line 110 - slice_from("heid"); - break; - case 2: - // (, line 113 - // call en_ending, line 113 - if (!r_en_ending()) + // ], line 108 + bra = cursor; + switch(among_var) { + case 0: + break lab0; + case 1: + // (, line 110 + // call R1, line 110 + if (!r_R1()) + { + break lab0; + } + // <-, line 110 + slice_from("heid"); + break; + case 2: + // (, line 113 + // call en_ending, line 113 + if (!r_en_ending()) + { + break lab0; + } + break; + case 3: + // (, line 116 + // call R1, line 116 + if (!r_R1()) + { + break lab0; + } + if (!(out_grouping_b(g_v_j, 97, 232))) + { + break lab0; + } + // delete, line 116 + slice_del(); + break; + } + } while (false); + cursor = limit - v_1; + // do, line 120 + v_2 = limit - cursor; + lab1: do { + // call e_ending, line 120 + if (!r_e_ending()) { - break lab0; + break lab1; } - break; - case 3: - // (, line 116 - // call R1, line 116 - if (!r_R1()) + } while (false); + cursor = limit - v_2; + // do, line 122 + v_3 = limit - cursor; + lab2: do { + // (, line 122 + // [, line 122 + ket = cursor; + // literal, line 122 + if (!(eq_s_b(4, "heid"))) { - break lab0; + break lab2; } - if (!(out_grouping_b(g_v_j, 97, 232))) + // ], line 122 + bra = cursor; + // call R2, line 122 + if (!r_R2()) { - break lab0; + break lab2; } - // delete, line 116 + // not, line 122 + { + v_4 = limit - cursor; + lab3: do { + // literal, line 122 + if (!(eq_s_b(1, "c"))) + { + break lab3; + } + break lab2; + } while (false); + cursor = limit - v_4; + } + // delete, line 122 slice_del(); - break; - } - } while (false); - cursor = limit - v_1; - // do, line 120 - v_2 = limit - cursor; - lab1: do { - // call e_ending, line 120 - if (!r_e_ending()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 122 - v_3 = limit - cursor; - lab2: do { - // (, line 122 - // [, line 122 - ket = cursor; - // literal, line 122 - if (!(eq_s_b(4, "heid"))) - { - break lab2; - } - // ], line 122 - bra = cursor; - // call R2, line 122 - if (!r_R2()) - { - break lab2; - } - // not, line 122 - { - v_4 = limit - cursor; - lab3: do { - // literal, line 122 - if (!(eq_s_b(1, "c"))) + // [, line 123 + ket = cursor; + // literal, line 123 + if (!(eq_s_b(2, "en"))) { - break lab3; + break lab2; } - break lab2; + // ], line 123 + bra = cursor; + // call en_ending, line 123 + if (!r_en_ending()) + { + break lab2; + } } while (false); - cursor = limit - v_4; - } - // delete, line 122 - slice_del(); - // [, line 123 - ket = cursor; - // literal, line 123 - if (!(eq_s_b(2, "en"))) - { - break lab2; - } - // ], line 123 - bra = cursor; - // call en_ending, line 123 - if (!r_en_ending()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 126 - v_5 = limit - cursor; - lab4: do { - // (, line 126 - // [, line 127 - ket = cursor; - // substring, line 127 - among_var = find_among_b(a_4, 6); - if (among_var == 0) - { - break lab4; - } - // ], line 127 - bra = cursor; - switch(among_var) { - case 0: - break lab4; - case 1: - // (, line 129 - // call R2, line 129 - if (!r_R2()) + cursor = limit - v_3; + // do, line 126 + v_5 = limit - cursor; + lab4: do { + // (, line 126 + // [, line 127 + ket = cursor; + // substring, line 127 + among_var = find_among_b(a_4, 6); + if (among_var == 0) { break lab4; } - // delete, line 129 - slice_del(); - // or, line 130 - lab5: do { - v_6 = limit - cursor; - lab6: do { - // (, line 130 - // [, line 130 - ket = cursor; - // literal, line 130 - if (!(eq_s_b(2, "ig"))) + // ], line 127 + bra = cursor; + switch(among_var) { + case 0: + break lab4; + case 1: + // (, line 129 + // call R2, line 129 + if (!r_R2()) { - break lab6; + break lab4; } - // ], line 130 - bra = cursor; - // call R2, line 130 + // delete, line 129 + slice_del(); + // or, line 130 + lab5: do { + v_6 = limit - cursor; + lab6: do { + // (, line 130 + // [, line 130 + ket = cursor; + // literal, line 130 + if (!(eq_s_b(2, "ig"))) + { + break lab6; + } + // ], line 130 + bra = cursor; + // call R2, line 130 + if (!r_R2()) + { + break lab6; + } + // not, line 130 + { + v_7 = limit - cursor; + lab7: do { + // literal, line 130 + if (!(eq_s_b(1, "e"))) + { + break lab7; + } + break lab6; + } while (false); + cursor = limit - v_7; + } + // delete, line 130 + slice_del(); + break lab5; + } while (false); + cursor = limit - v_6; + // call undouble, line 130 + if (!r_undouble()) + { + break lab4; + } + } while (false); + break; + case 2: + // (, line 133 + // call R2, line 133 if (!r_R2()) { - break lab6; + break lab4; } - // not, line 130 + // not, line 133 { - v_7 = limit - cursor; - lab7: do { - // literal, line 130 + v_8 = limit - cursor; + lab8: do { + // literal, line 133 if (!(eq_s_b(1, "e"))) { - break lab7; + break lab8; } - break lab6; + break lab4; } while (false); - cursor = limit - v_7; + cursor = limit - v_8; } - // delete, line 130 + // delete, line 133 slice_del(); - break lab5; - } while (false); - cursor = limit - v_6; - // call undouble, line 130 - if (!r_undouble()) - { - break lab4; - } - } while (false); - break; - case 2: - // (, line 133 - // call R2, line 133 - if (!r_R2()) - { - break lab4; - } - // not, line 133 - { - v_8 = limit - cursor; - lab8: do { - // literal, line 133 - if (!(eq_s_b(1, "e"))) + break; + case 3: + // (, line 136 + // call R2, line 136 + if (!r_R2()) { - break lab8; + break lab4; } - break lab4; - } while (false); - cursor = limit - v_8; + // delete, line 136 + slice_del(); + // call e_ending, line 136 + if (!r_e_ending()) + { + break lab4; + } + break; + case 4: + // (, line 139 + // call R2, line 139 + if (!r_R2()) + { + break lab4; + } + // delete, line 139 + slice_del(); + break; + case 5: + // (, line 142 + // call R2, line 142 + if (!r_R2()) + { + break lab4; + } + // Boolean test e_found, line 142 + if (!(B_e_found)) + { + break lab4; + } + // delete, line 142 + slice_del(); + break; } - // delete, line 133 - slice_del(); - break; - case 3: - // (, line 136 - // call R2, line 136 - if (!r_R2()) + } while (false); + cursor = limit - v_5; + // do, line 146 + v_9 = limit - cursor; + lab9: do { + // (, line 146 + if (!(out_grouping_b(g_v_I, 73, 232))) { - break lab4; + break lab9; } - // delete, line 136 - slice_del(); - // call e_ending, line 136 - if (!r_e_ending()) + // test, line 148 + v_10 = limit - cursor; + // (, line 148 + // among, line 149 + if (find_among_b(a_5, 4) == 0) { - break lab4; + break lab9; } - break; - case 4: - // (, line 139 - // call R2, line 139 - if (!r_R2()) + if (!(out_grouping_b(g_v, 97, 232))) { - break lab4; + break lab9; } - // delete, line 139 - slice_del(); - break; - case 5: - // (, line 142 - // call R2, line 142 - if (!r_R2()) + cursor = limit - v_10; + // [, line 152 + ket = cursor; + // next, line 152 + if (cursor <= limit_backward) { - break lab4; + break lab9; } - // Boolean test e_found, line 142 - if (!(B_e_found)) - { - break lab4; - } - // delete, line 142 + cursor--; + // ], line 152 + bra = cursor; + // delete, line 152 slice_del(); - break; + } while (false); + cursor = limit - v_9; + return true; } - } while (false); - cursor = limit - v_5; - // do, line 146 - v_9 = limit - cursor; - lab9: do { - // (, line 146 - if (!(out_grouping_b(g_v_I, 73, 232))) - { - break lab9; - } - // test, line 148 - v_10 = limit - cursor; - // (, line 148 - // among, line 149 - if (find_among_b(a_5, 4) == 0) - { - break lab9; - } - if (!(out_grouping_b(g_v, 97, 232))) - { - break lab9; - } - cursor = limit - v_10; - // [, line 152 - ket = cursor; - // next, line 152 - if (cursor <= limit_backward) - { - break lab9; - } - cursor--; - // ], line 152 - bra = cursor; - // delete, line 152 - slice_del(); - } while (false); - cursor = limit - v_9; - return true; - } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; int v_4; - // (, line 157 - // do, line 159 - v_1 = cursor; - lab0: do { - // call prelude, line 159 - if (!r_prelude()) - { - break lab0; + // (, line 157 + // do, line 159 + v_1 = cursor; + lab0: do { + // call prelude, line 159 + if (!r_prelude()) + { + break lab0; + } + } while (false); + cursor = v_1; + // do, line 160 + v_2 = cursor; + lab1: do { + // call mark_regions, line 160 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 161 + limit_backward = cursor; cursor = limit; + // do, line 162 + v_3 = limit - cursor; + lab2: do { + // call standard_suffix, line 162 + if (!r_standard_suffix()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + cursor = limit_backward; // do, line 163 + v_4 = cursor; + lab3: do { + // call postlude, line 163 + if (!r_postlude()) + { + break lab3; + } + } while (false); + cursor = v_4; + return true; } - } while (false); - cursor = v_1; - // do, line 160 - v_2 = cursor; - lab1: do { - // call mark_regions, line 160 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 161 - limit_backward = cursor; cursor = limit; - // do, line 162 - v_3 = limit - cursor; - lab2: do { - // call standard_suffix, line 162 - if (!r_standard_suffix()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - cursor = limit_backward; // do, line 163 - v_4 = cursor; - lab3: do { - // call postlude, line 163 - if (!r_postlude()) - { - break lab3; - } - } while (false); - cursor = v_4; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof DutchStemmer; } + @Override + public int hashCode() { + return DutchStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/EnglishStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/EnglishStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/EnglishStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/EnglishStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,1146 +1,1154 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class EnglishStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "arsen", -1, -1, "", this), - new Among ( "commun", -1, -1, "", this), - new Among ( "gener", -1, -1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "'", -1, 1, "", this), - new Among ( "'s'", 0, 1, "", this), - new Among ( "'s", -1, 1, "", this) - }; + private final static EnglishStemmer methodObject = new EnglishStemmer (); - private Among a_2[] = { - new Among ( "ied", -1, 2, "", this), - new Among ( "s", -1, 3, "", this), - new Among ( "ies", 1, 2, "", this), - new Among ( "sses", 1, 1, "", this), - new Among ( "ss", 1, -1, "", this), - new Among ( "us", 1, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "arsen", -1, -1, "", methodObject ), + new Among ( "commun", -1, -1, "", methodObject ), + new Among ( "gener", -1, -1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "", -1, 3, "", this), - new Among ( "bb", 0, 2, "", this), - new Among ( "dd", 0, 2, "", this), - new Among ( "ff", 0, 2, "", this), - new Among ( "gg", 0, 2, "", this), - new Among ( "bl", 0, 1, "", this), - new Among ( "mm", 0, 2, "", this), - new Among ( "nn", 0, 2, "", this), - new Among ( "pp", 0, 2, "", this), - new Among ( "rr", 0, 2, "", this), - new Among ( "at", 0, 1, "", this), - new Among ( "tt", 0, 2, "", this), - new Among ( "iz", 0, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "'", -1, 1, "", methodObject ), + new Among ( "'s'", 0, 1, "", methodObject ), + new Among ( "'s", -1, 1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ed", -1, 2, "", this), - new Among ( "eed", 0, 1, "", this), - new Among ( "ing", -1, 2, "", this), - new Among ( "edly", -1, 2, "", this), - new Among ( "eedly", 3, 1, "", this), - new Among ( "ingly", -1, 2, "", this) - }; + private final static Among a_2[] = { + new Among ( "ied", -1, 2, "", methodObject ), + new Among ( "s", -1, 3, "", methodObject ), + new Among ( "ies", 1, 2, "", methodObject ), + new Among ( "sses", 1, 1, "", methodObject ), + new Among ( "ss", 1, -1, "", methodObject ), + new Among ( "us", 1, -1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "anci", -1, 3, "", this), - new Among ( "enci", -1, 2, "", this), - new Among ( "ogi", -1, 13, "", this), - new Among ( "li", -1, 16, "", this), - new Among ( "bli", 3, 12, "", this), - new Among ( "abli", 4, 4, "", this), - new Among ( "alli", 3, 8, "", this), - new Among ( "fulli", 3, 14, "", this), - new Among ( "lessli", 3, 15, "", this), - new Among ( "ousli", 3, 10, "", this), - new Among ( "entli", 3, 5, "", this), - new Among ( "aliti", -1, 8, "", this), - new Among ( "biliti", -1, 12, "", this), - new Among ( "iviti", -1, 11, "", this), - new Among ( "tional", -1, 1, "", this), - new Among ( "ational", 14, 7, "", this), - new Among ( "alism", -1, 8, "", this), - new Among ( "ation", -1, 7, "", this), - new Among ( "ization", 17, 6, "", this), - new Among ( "izer", -1, 6, "", this), - new Among ( "ator", -1, 7, "", this), - new Among ( "iveness", -1, 11, "", this), - new Among ( "fulness", -1, 9, "", this), - new Among ( "ousness", -1, 10, "", this) - }; + private final static Among a_3[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "bb", 0, 2, "", methodObject ), + new Among ( "dd", 0, 2, "", methodObject ), + new Among ( "ff", 0, 2, "", methodObject ), + new Among ( "gg", 0, 2, "", methodObject ), + new Among ( "bl", 0, 1, "", methodObject ), + new Among ( "mm", 0, 2, "", methodObject ), + new Among ( "nn", 0, 2, "", methodObject ), + new Among ( "pp", 0, 2, "", methodObject ), + new Among ( "rr", 0, 2, "", methodObject ), + new Among ( "at", 0, 1, "", methodObject ), + new Among ( "tt", 0, 2, "", methodObject ), + new Among ( "iz", 0, 1, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "icate", -1, 4, "", this), - new Among ( "ative", -1, 6, "", this), - new Among ( "alize", -1, 3, "", this), - new Among ( "iciti", -1, 4, "", this), - new Among ( "ical", -1, 4, "", this), - new Among ( "tional", -1, 1, "", this), - new Among ( "ational", 5, 2, "", this), - new Among ( "ful", -1, 5, "", this), - new Among ( "ness", -1, 5, "", this) - }; + private final static Among a_4[] = { + new Among ( "ed", -1, 2, "", methodObject ), + new Among ( "eed", 0, 1, "", methodObject ), + new Among ( "ing", -1, 2, "", methodObject ), + new Among ( "edly", -1, 2, "", methodObject ), + new Among ( "eedly", 3, 1, "", methodObject ), + new Among ( "ingly", -1, 2, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "ic", -1, 1, "", this), - new Among ( "ance", -1, 1, "", this), - new Among ( "ence", -1, 1, "", this), - new Among ( "able", -1, 1, "", this), - new Among ( "ible", -1, 1, "", this), - new Among ( "ate", -1, 1, "", this), - new Among ( "ive", -1, 1, "", this), - new Among ( "ize", -1, 1, "", this), - new Among ( "iti", -1, 1, "", this), - new Among ( "al", -1, 1, "", this), - new Among ( "ism", -1, 1, "", this), - new Among ( "ion", -1, 2, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "ous", -1, 1, "", this), - new Among ( "ant", -1, 1, "", this), - new Among ( "ent", -1, 1, "", this), - new Among ( "ment", 15, 1, "", this), - new Among ( "ement", 16, 1, "", this) - }; + private final static Among a_5[] = { + new Among ( "anci", -1, 3, "", methodObject ), + new Among ( "enci", -1, 2, "", methodObject ), + new Among ( "ogi", -1, 13, "", methodObject ), + new Among ( "li", -1, 16, "", methodObject ), + new Among ( "bli", 3, 12, "", methodObject ), + new Among ( "abli", 4, 4, "", methodObject ), + new Among ( "alli", 3, 8, "", methodObject ), + new Among ( "fulli", 3, 14, "", methodObject ), + new Among ( "lessli", 3, 15, "", methodObject ), + new Among ( "ousli", 3, 10, "", methodObject ), + new Among ( "entli", 3, 5, "", methodObject ), + new Among ( "aliti", -1, 8, "", methodObject ), + new Among ( "biliti", -1, 12, "", methodObject ), + new Among ( "iviti", -1, 11, "", methodObject ), + new Among ( "tional", -1, 1, "", methodObject ), + new Among ( "ational", 14, 7, "", methodObject ), + new Among ( "alism", -1, 8, "", methodObject ), + new Among ( "ation", -1, 7, "", methodObject ), + new Among ( "ization", 17, 6, "", methodObject ), + new Among ( "izer", -1, 6, "", methodObject ), + new Among ( "ator", -1, 7, "", methodObject ), + new Among ( "iveness", -1, 11, "", methodObject ), + new Among ( "fulness", -1, 9, "", methodObject ), + new Among ( "ousness", -1, 10, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "e", -1, 1, "", this), - new Among ( "l", -1, 2, "", this) - }; + private final static Among a_6[] = { + new Among ( "icate", -1, 4, "", methodObject ), + new Among ( "ative", -1, 6, "", methodObject ), + new Among ( "alize", -1, 3, "", methodObject ), + new Among ( "iciti", -1, 4, "", methodObject ), + new Among ( "ical", -1, 4, "", methodObject ), + new Among ( "tional", -1, 1, "", methodObject ), + new Among ( "ational", 5, 2, "", methodObject ), + new Among ( "ful", -1, 5, "", methodObject ), + new Among ( "ness", -1, 5, "", methodObject ) + }; - private Among a_9[] = { - new Among ( "succeed", -1, -1, "", this), - new Among ( "proceed", -1, -1, "", this), - new Among ( "exceed", -1, -1, "", this), - new Among ( "canning", -1, -1, "", this), - new Among ( "inning", -1, -1, "", this), - new Among ( "earring", -1, -1, "", this), - new Among ( "herring", -1, -1, "", this), - new Among ( "outing", -1, -1, "", this) - }; + private final static Among a_7[] = { + new Among ( "ic", -1, 1, "", methodObject ), + new Among ( "ance", -1, 1, "", methodObject ), + new Among ( "ence", -1, 1, "", methodObject ), + new Among ( "able", -1, 1, "", methodObject ), + new Among ( "ible", -1, 1, "", methodObject ), + new Among ( "ate", -1, 1, "", methodObject ), + new Among ( "ive", -1, 1, "", methodObject ), + new Among ( "ize", -1, 1, "", methodObject ), + new Among ( "iti", -1, 1, "", methodObject ), + new Among ( "al", -1, 1, "", methodObject ), + new Among ( "ism", -1, 1, "", methodObject ), + new Among ( "ion", -1, 2, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "ous", -1, 1, "", methodObject ), + new Among ( "ant", -1, 1, "", methodObject ), + new Among ( "ent", -1, 1, "", methodObject ), + new Among ( "ment", 15, 1, "", methodObject ), + new Among ( "ement", 16, 1, "", methodObject ) + }; - private Among a_10[] = { - new Among ( "andes", -1, -1, "", this), - new Among ( "atlas", -1, -1, "", this), - new Among ( "bias", -1, -1, "", this), - new Among ( "cosmos", -1, -1, "", this), - new Among ( "dying", -1, 3, "", this), - new Among ( "early", -1, 9, "", this), - new Among ( "gently", -1, 7, "", this), - new Among ( "howe", -1, -1, "", this), - new Among ( "idly", -1, 6, "", this), - new Among ( "lying", -1, 4, "", this), - new Among ( "news", -1, -1, "", this), - new Among ( "only", -1, 10, "", this), - new Among ( "singly", -1, 11, "", this), - new Among ( "skies", -1, 2, "", this), - new Among ( "skis", -1, 1, "", this), - new Among ( "sky", -1, -1, "", this), - new Among ( "tying", -1, 5, "", this), - new Among ( "ugly", -1, 8, "", this) - }; + private final static Among a_8[] = { + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "l", -1, 2, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1 }; + private final static Among a_9[] = { + new Among ( "succeed", -1, -1, "", methodObject ), + new Among ( "proceed", -1, -1, "", methodObject ), + new Among ( "exceed", -1, -1, "", methodObject ), + new Among ( "canning", -1, -1, "", methodObject ), + new Among ( "inning", -1, -1, "", methodObject ), + new Among ( "earring", -1, -1, "", methodObject ), + new Among ( "herring", -1, -1, "", methodObject ), + new Among ( "outing", -1, -1, "", methodObject ) + }; - private static final char g_v_WXY[] = {1, 17, 65, 208, 1 }; + private final static Among a_10[] = { + new Among ( "andes", -1, -1, "", methodObject ), + new Among ( "atlas", -1, -1, "", methodObject ), + new Among ( "bias", -1, -1, "", methodObject ), + new Among ( "cosmos", -1, -1, "", methodObject ), + new Among ( "dying", -1, 3, "", methodObject ), + new Among ( "early", -1, 9, "", methodObject ), + new Among ( "gently", -1, 7, "", methodObject ), + new Among ( "howe", -1, -1, "", methodObject ), + new Among ( "idly", -1, 6, "", methodObject ), + new Among ( "lying", -1, 4, "", methodObject ), + new Among ( "news", -1, -1, "", methodObject ), + new Among ( "only", -1, 10, "", methodObject ), + new Among ( "singly", -1, 11, "", methodObject ), + new Among ( "skies", -1, 2, "", methodObject ), + new Among ( "skis", -1, 1, "", methodObject ), + new Among ( "sky", -1, -1, "", methodObject ), + new Among ( "tying", -1, 5, "", methodObject ), + new Among ( "ugly", -1, 8, "", methodObject ) + }; - private static final char g_valid_LI[] = {55, 141, 2 }; + private static final char g_v[] = {17, 65, 16, 1 }; + private static final char g_v_WXY[] = {1, 17, 65, 208, 1 }; + + private static final char g_valid_LI[] = {55, 141, 2 }; + private boolean B_Y_found; private int I_p2; private int I_p1; - private void copy_from(EnglishStemmer other) { - B_Y_found = other.B_Y_found; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(EnglishStemmer other) { + B_Y_found = other.B_Y_found; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 25 - // unset Y_found, line 26 - B_Y_found = false; - // do, line 27 - v_1 = cursor; - lab0: do { - // (, line 27 - // [, line 27 - bra = cursor; - // literal, line 27 - if (!(eq_s(1, "'"))) - { - break lab0; - } - // ], line 27 - ket = cursor; - // delete, line 27 - slice_del(); - } while (false); - cursor = v_1; - // do, line 28 - v_2 = cursor; - lab1: do { - // (, line 28 - // [, line 28 - bra = cursor; - // literal, line 28 - if (!(eq_s(1, "y"))) - { - break lab1; - } - // ], line 28 - ket = cursor; - // <-, line 28 - slice_from("Y"); - // set Y_found, line 28 - B_Y_found = true; - } while (false); - cursor = v_2; - // do, line 29 - v_3 = cursor; - lab2: do { - // repeat, line 29 - replab3: while(true) - { - v_4 = cursor; - lab4: do { - // (, line 29 - // goto, line 29 - golab5: while(true) + // (, line 25 + // unset Y_found, line 26 + B_Y_found = false; + // do, line 27 + v_1 = cursor; + lab0: do { + // (, line 27 + // [, line 27 + bra = cursor; + // literal, line 27 + if (!(eq_s(1, "'"))) { - v_5 = cursor; - lab6: do { + break lab0; + } + // ], line 27 + ket = cursor; + // delete, line 27 + slice_del(); + } while (false); + cursor = v_1; + // do, line 28 + v_2 = cursor; + lab1: do { + // (, line 28 + // [, line 28 + bra = cursor; + // literal, line 28 + if (!(eq_s(1, "y"))) + { + break lab1; + } + // ], line 28 + ket = cursor; + // <-, line 28 + slice_from("Y"); + // set Y_found, line 28 + B_Y_found = true; + } while (false); + cursor = v_2; + // do, line 29 + v_3 = cursor; + lab2: do { + // repeat, line 29 + replab3: while(true) + { + v_4 = cursor; + lab4: do { // (, line 29 + // goto, line 29 + golab5: while(true) + { + v_5 = cursor; + lab6: do { + // (, line 29 + if (!(in_grouping(g_v, 97, 121))) + { + break lab6; + } + // [, line 29 + bra = cursor; + // literal, line 29 + if (!(eq_s(1, "y"))) + { + break lab6; + } + // ], line 29 + ket = cursor; + cursor = v_5; + break golab5; + } while (false); + cursor = v_5; + if (cursor >= limit) + { + break lab4; + } + cursor++; + } + // <-, line 29 + slice_from("Y"); + // set Y_found, line 29 + B_Y_found = true; + continue replab3; + } while (false); + cursor = v_4; + break replab3; + } + } while (false); + cursor = v_3; + return true; + } + + private boolean r_mark_regions() { + int v_1; + int v_2; + // (, line 32 + I_p1 = limit; + I_p2 = limit; + // do, line 35 + v_1 = cursor; + lab0: do { + // (, line 35 + // or, line 41 + lab1: do { + v_2 = cursor; + lab2: do { + // among, line 36 + if (find_among(a_0, 3) == 0) + { + break lab2; + } + break lab1; + } while (false); + cursor = v_2; + // (, line 41 + // gopast, line 41 + golab3: while(true) + { + lab4: do { + if (!(in_grouping(g_v, 97, 121))) + { + break lab4; + } + break golab3; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // gopast, line 41 + golab5: while(true) + { + lab6: do { + if (!(out_grouping(g_v, 97, 121))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + } while (false); + // setmark p1, line 42 + I_p1 = cursor; + // gopast, line 43 + golab7: while(true) + { + lab8: do { if (!(in_grouping(g_v, 97, 121))) { - break lab6; + break lab8; } - // [, line 29 - bra = cursor; - // literal, line 29 - if (!(eq_s(1, "y"))) + break golab7; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } + // gopast, line 43 + golab9: while(true) + { + lab10: do { + if (!(out_grouping(g_v, 97, 121))) { - break lab6; + break lab10; } - // ], line 29 - ket = cursor; - cursor = v_5; - break golab5; + break golab9; } while (false); - cursor = v_5; if (cursor >= limit) { - break lab4; + break lab0; } cursor++; } - // <-, line 29 - slice_from("Y"); - // set Y_found, line 29 - B_Y_found = true; - continue replab3; + // setmark p2, line 43 + I_p2 = cursor; } while (false); - cursor = v_4; - break replab3; + cursor = v_1; + return true; } - } while (false); - cursor = v_3; - return true; - } - private boolean r_mark_regions() { + private boolean r_shortv() { int v_1; - int v_2; - // (, line 32 - I_p1 = limit; - I_p2 = limit; - // do, line 35 - v_1 = cursor; - lab0: do { - // (, line 35 - // or, line 41 - lab1: do { - v_2 = cursor; - lab2: do { - // among, line 36 - if (find_among(a_0, 3) == 0) - { - break lab2; - } - break lab1; - } while (false); - cursor = v_2; - // (, line 41 - // gopast, line 41 - golab3: while(true) - { - lab4: do { - if (!(in_grouping(g_v, 97, 121))) + // (, line 49 + // or, line 51 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 50 + if (!(out_grouping_b(g_v_WXY, 89, 121))) { - break lab4; + break lab1; } - break golab3; - } while (false); - if (cursor >= limit) - { - break lab0; - } - cursor++; - } - // gopast, line 41 - golab5: while(true) - { - lab6: do { - if (!(out_grouping(g_v, 97, 121))) + if (!(in_grouping_b(g_v, 97, 121))) { - break lab6; + break lab1; } - break golab5; + if (!(out_grouping_b(g_v, 97, 121))) + { + break lab1; + } + break lab0; } while (false); - if (cursor >= limit) + cursor = limit - v_1; + // (, line 52 + if (!(out_grouping_b(g_v, 97, 121))) { - break lab0; + return false; } - cursor++; - } - } while (false); - // setmark p1, line 42 - I_p1 = cursor; - // gopast, line 43 - golab7: while(true) - { - lab8: do { - if (!(in_grouping(g_v, 97, 121))) + if (!(in_grouping_b(g_v, 97, 121))) { - break lab8; + return false; } - break golab7; - } while (false); - if (cursor >= limit) - { - break lab0; - } - cursor++; - } - // gopast, line 43 - golab9: while(true) - { - lab10: do { - if (!(out_grouping(g_v, 97, 121))) + // atlimit, line 52 + if (cursor > limit_backward) { - break lab10; + return false; } - break golab9; } while (false); - if (cursor >= limit) + return true; + } + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab0; + return false; } - cursor++; + return true; } - // setmark p2, line 43 - I_p2 = cursor; - } while (false); - cursor = v_1; - return true; - } - private boolean r_shortv() { - int v_1; - // (, line 49 - // or, line 51 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 50 - if (!(out_grouping_b(g_v_WXY, 89, 121))) + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab1; + return false; } - if (!(in_grouping_b(g_v, 97, 121))) - { - break lab1; - } - if (!(out_grouping_b(g_v, 97, 121))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 52 - if (!(out_grouping_b(g_v, 97, 121))) - { - return false; + return true; } - if (!(in_grouping_b(g_v, 97, 121))) - { - return false; - } - // atlimit, line 52 - if (cursor > limit_backward) - { - return false; - } - } while (false); - return true; - } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_Step_1a() { + private boolean r_Step_1a() { int among_var; int v_1; int v_2; - // (, line 58 - // try, line 59 - v_1 = limit - cursor; - lab0: do { - // (, line 59 - // [, line 60 - ket = cursor; - // substring, line 60 - among_var = find_among_b(a_1, 3); - if (among_var == 0) - { - cursor = limit - v_1; - break lab0; - } - // ], line 60 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_1; - break lab0; - case 1: - // (, line 62 - // delete, line 62 - slice_del(); - break; - } - } while (false); - // [, line 65 - ket = cursor; - // substring, line 65 - among_var = find_among_b(a_2, 6); - if (among_var == 0) - { - return false; - } - // ], line 65 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 66 - // <-, line 66 - slice_from("ss"); - break; - case 2: - // (, line 68 - // or, line 68 - lab1: do { - v_2 = limit - cursor; - lab2: do { - // (, line 68 - // hop, line 68 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - break lab2; - } - cursor = c; - } - // <-, line 68 - slice_from("i"); - break lab1; - } while (false); - cursor = limit - v_2; - // <-, line 68 - slice_from("ie"); + // (, line 58 + // try, line 59 + v_1 = limit - cursor; + lab0: do { + // (, line 59 + // [, line 60 + ket = cursor; + // substring, line 60 + among_var = find_among_b(a_1, 3); + if (among_var == 0) + { + cursor = limit - v_1; + break lab0; + } + // ], line 60 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_1; + break lab0; + case 1: + // (, line 62 + // delete, line 62 + slice_del(); + break; + } } while (false); - break; - case 3: - // (, line 69 - // next, line 69 - if (cursor <= limit_backward) + // [, line 65 + ket = cursor; + // substring, line 65 + among_var = find_among_b(a_2, 6); + if (among_var == 0) { return false; } - cursor--; - // gopast, line 69 - golab3: while(true) - { - lab4: do { - if (!(in_grouping_b(g_v, 97, 121))) + // ], line 65 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 66 + // <-, line 66 + slice_from("ss"); + break; + case 2: + // (, line 68 + // or, line 68 + lab1: do { + v_2 = limit - cursor; + lab2: do { + // (, line 68 + // hop, line 68 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + break lab2; + } + cursor = c; + } + // <-, line 68 + slice_from("i"); + break lab1; + } while (false); + cursor = limit - v_2; + // <-, line 68 + slice_from("ie"); + } while (false); + break; + case 3: + // (, line 69 + // next, line 69 + if (cursor <= limit_backward) { - break lab4; + return false; } - break golab3; - } while (false); - if (cursor <= limit_backward) - { - return false; - } - cursor--; + cursor--; + // gopast, line 69 + golab3: while(true) + { + lab4: do { + if (!(in_grouping_b(g_v, 97, 121))) + { + break lab4; + } + break golab3; + } while (false); + if (cursor <= limit_backward) + { + return false; + } + cursor--; + } + // delete, line 69 + slice_del(); + break; } - // delete, line 69 - slice_del(); - break; - } - return true; - } + return true; + } - private boolean r_Step_1b() { + private boolean r_Step_1b() { int among_var; int v_1; int v_3; int v_4; - // (, line 74 - // [, line 75 - ket = cursor; - // substring, line 75 - among_var = find_among_b(a_4, 6); - if (among_var == 0) - { - return false; - } - // ], line 75 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 77 - // call R1, line 77 - if (!r_R1()) + // (, line 74 + // [, line 75 + ket = cursor; + // substring, line 75 + among_var = find_among_b(a_4, 6); + if (among_var == 0) { return false; } - // <-, line 77 - slice_from("ee"); - break; - case 2: - // (, line 79 - // test, line 80 - v_1 = limit - cursor; - // gopast, line 80 - golab0: while(true) - { + // ], line 75 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 77 + // call R1, line 77 + if (!r_R1()) + { + return false; + } + // <-, line 77 + slice_from("ee"); + break; + case 2: + // (, line 79 + // test, line 80 + v_1 = limit - cursor; + // gopast, line 80 + golab0: while(true) + { + lab1: do { + if (!(in_grouping_b(g_v, 97, 121))) + { + break lab1; + } + break golab0; + } while (false); + if (cursor <= limit_backward) + { + return false; + } + cursor--; + } + cursor = limit - v_1; + // delete, line 80 + slice_del(); + // test, line 81 + v_3 = limit - cursor; + // substring, line 81 + among_var = find_among_b(a_3, 13); + if (among_var == 0) + { + return false; + } + cursor = limit - v_3; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 83 + // <+, line 83 + { + int c = cursor; + insert(cursor, cursor, "e"); + cursor = c; + } + break; + case 2: + // (, line 86 + // [, line 86 + ket = cursor; + // next, line 86 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // ], line 86 + bra = cursor; + // delete, line 86 + slice_del(); + break; + case 3: + // (, line 87 + // atmark, line 87 + if (cursor != I_p1) + { + return false; + } + // test, line 87 + v_4 = limit - cursor; + // call shortv, line 87 + if (!r_shortv()) + { + return false; + } + cursor = limit - v_4; + // <+, line 87 + { + int c = cursor; + insert(cursor, cursor, "e"); + cursor = c; + } + break; + } + break; + } + return true; + } + + private boolean r_Step_1c() { + int v_1; + int v_2; + // (, line 93 + // [, line 94 + ket = cursor; + // or, line 94 + lab0: do { + v_1 = limit - cursor; lab1: do { - if (!(in_grouping_b(g_v, 97, 121))) + // literal, line 94 + if (!(eq_s_b(1, "y"))) { break lab1; } - break golab0; + break lab0; } while (false); - if (cursor <= limit_backward) + cursor = limit - v_1; + // literal, line 94 + if (!(eq_s_b(1, "Y"))) { return false; } - cursor--; + } while (false); + // ], line 94 + bra = cursor; + if (!(out_grouping_b(g_v, 97, 121))) + { + return false; } - cursor = limit - v_1; - // delete, line 80 - slice_del(); - // test, line 81 - v_3 = limit - cursor; - // substring, line 81 - among_var = find_among_b(a_3, 13); + // not, line 95 + { + v_2 = limit - cursor; + lab2: do { + // atlimit, line 95 + if (cursor > limit_backward) + { + break lab2; + } + return false; + } while (false); + cursor = limit - v_2; + } + // <-, line 96 + slice_from("i"); + return true; + } + + private boolean r_Step_2() { + int among_var; + // (, line 99 + // [, line 100 + ket = cursor; + // substring, line 100 + among_var = find_among_b(a_5, 24); if (among_var == 0) { return false; } - cursor = limit - v_3; + // ], line 100 + bra = cursor; + // call R1, line 100 + if (!r_R1()) + { + return false; + } switch(among_var) { case 0: return false; case 1: - // (, line 83 - // <+, line 83 - { - int c = cursor; - insert(cursor, cursor, "e"); - cursor = c; - } + // (, line 101 + // <-, line 101 + slice_from("tion"); break; case 2: - // (, line 86 - // [, line 86 - ket = cursor; - // next, line 86 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 86 - bra = cursor; - // delete, line 86 - slice_del(); + // (, line 102 + // <-, line 102 + slice_from("ence"); break; case 3: - // (, line 87 - // atmark, line 87 - if (cursor != I_p1) + // (, line 103 + // <-, line 103 + slice_from("ance"); + break; + case 4: + // (, line 104 + // <-, line 104 + slice_from("able"); + break; + case 5: + // (, line 105 + // <-, line 105 + slice_from("ent"); + break; + case 6: + // (, line 107 + // <-, line 107 + slice_from("ize"); + break; + case 7: + // (, line 109 + // <-, line 109 + slice_from("ate"); + break; + case 8: + // (, line 111 + // <-, line 111 + slice_from("al"); + break; + case 9: + // (, line 112 + // <-, line 112 + slice_from("ful"); + break; + case 10: + // (, line 114 + // <-, line 114 + slice_from("ous"); + break; + case 11: + // (, line 116 + // <-, line 116 + slice_from("ive"); + break; + case 12: + // (, line 118 + // <-, line 118 + slice_from("ble"); + break; + case 13: + // (, line 119 + // literal, line 119 + if (!(eq_s_b(1, "l"))) { return false; } - // test, line 87 - v_4 = limit - cursor; - // call shortv, line 87 - if (!r_shortv()) + // <-, line 119 + slice_from("og"); + break; + case 14: + // (, line 120 + // <-, line 120 + slice_from("ful"); + break; + case 15: + // (, line 121 + // <-, line 121 + slice_from("less"); + break; + case 16: + // (, line 122 + if (!(in_grouping_b(g_valid_LI, 99, 116))) { return false; } - cursor = limit - v_4; - // <+, line 87 - { - int c = cursor; - insert(cursor, cursor, "e"); - cursor = c; - } + // delete, line 122 + slice_del(); break; } - break; - } - return true; - } - - private boolean r_Step_1c() { - int v_1; - int v_2; - // (, line 93 - // [, line 94 - ket = cursor; - // or, line 94 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // literal, line 94 - if (!(eq_s_b(1, "y"))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_1; - // literal, line 94 - if (!(eq_s_b(1, "Y"))) - { - return false; + return true; } - } while (false); - // ], line 94 - bra = cursor; - if (!(out_grouping_b(g_v, 97, 121))) - { - return false; - } - // not, line 95 - { - v_2 = limit - cursor; - lab2: do { - // atlimit, line 95 - if (cursor > limit_backward) - { - break lab2; - } - return false; - } while (false); - cursor = limit - v_2; - } - // <-, line 96 - slice_from("i"); - return true; - } - private boolean r_Step_2() { + private boolean r_Step_3() { int among_var; - // (, line 99 - // [, line 100 - ket = cursor; - // substring, line 100 - among_var = find_among_b(a_5, 24); - if (among_var == 0) - { - return false; - } - // ], line 100 - bra = cursor; - // call R1, line 100 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 101 - // <-, line 101 - slice_from("tion"); - break; - case 2: - // (, line 102 - // <-, line 102 - slice_from("ence"); - break; - case 3: - // (, line 103 - // <-, line 103 - slice_from("ance"); - break; - case 4: - // (, line 104 - // <-, line 104 - slice_from("able"); - break; - case 5: - // (, line 105 - // <-, line 105 - slice_from("ent"); - break; - case 6: - // (, line 107 - // <-, line 107 - slice_from("ize"); - break; - case 7: - // (, line 109 - // <-, line 109 - slice_from("ate"); - break; - case 8: - // (, line 111 - // <-, line 111 - slice_from("al"); - break; - case 9: - // (, line 112 - // <-, line 112 - slice_from("ful"); - break; - case 10: - // (, line 114 - // <-, line 114 - slice_from("ous"); - break; - case 11: - // (, line 116 - // <-, line 116 - slice_from("ive"); - break; - case 12: - // (, line 118 - // <-, line 118 - slice_from("ble"); - break; - case 13: - // (, line 119 - // literal, line 119 - if (!(eq_s_b(1, "l"))) + // (, line 126 + // [, line 127 + ket = cursor; + // substring, line 127 + among_var = find_among_b(a_6, 9); + if (among_var == 0) { return false; } - // <-, line 119 - slice_from("og"); - break; - case 14: - // (, line 120 - // <-, line 120 - slice_from("ful"); - break; - case 15: - // (, line 121 - // <-, line 121 - slice_from("less"); - break; - case 16: - // (, line 122 - if (!(in_grouping_b(g_valid_LI, 99, 116))) + // ], line 127 + bra = cursor; + // call R1, line 127 + if (!r_R1()) { return false; } - // delete, line 122 - slice_del(); - break; - } - return true; - } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 128 + // <-, line 128 + slice_from("tion"); + break; + case 2: + // (, line 129 + // <-, line 129 + slice_from("ate"); + break; + case 3: + // (, line 130 + // <-, line 130 + slice_from("al"); + break; + case 4: + // (, line 132 + // <-, line 132 + slice_from("ic"); + break; + case 5: + // (, line 134 + // delete, line 134 + slice_del(); + break; + case 6: + // (, line 136 + // call R2, line 136 + if (!r_R2()) + { + return false; + } + // delete, line 136 + slice_del(); + break; + } + return true; + } - private boolean r_Step_3() { + private boolean r_Step_4() { int among_var; - // (, line 126 - // [, line 127 - ket = cursor; - // substring, line 127 - among_var = find_among_b(a_6, 9); - if (among_var == 0) - { - return false; - } - // ], line 127 - bra = cursor; - // call R1, line 127 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 128 - // <-, line 128 - slice_from("tion"); - break; - case 2: - // (, line 129 - // <-, line 129 - slice_from("ate"); - break; - case 3: - // (, line 130 - // <-, line 130 - slice_from("al"); - break; - case 4: - // (, line 132 - // <-, line 132 - slice_from("ic"); - break; - case 5: - // (, line 134 - // delete, line 134 - slice_del(); - break; - case 6: - // (, line 136 - // call R2, line 136 + int v_1; + // (, line 140 + // [, line 141 + ket = cursor; + // substring, line 141 + among_var = find_among_b(a_7, 18); + if (among_var == 0) + { + return false; + } + // ], line 141 + bra = cursor; + // call R2, line 141 if (!r_R2()) { return false; } - // delete, line 136 - slice_del(); - break; - } - return true; - } - - private boolean r_Step_4() { - int among_var; - int v_1; - // (, line 140 - // [, line 141 - ket = cursor; - // substring, line 141 - among_var = find_among_b(a_7, 18); - if (among_var == 0) - { - return false; - } - // ], line 141 - bra = cursor; - // call R2, line 141 - if (!r_R2()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 144 - // delete, line 144 - slice_del(); - break; - case 2: - // (, line 145 - // or, line 145 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // literal, line 145 - if (!(eq_s_b(1, "s"))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_1; - // literal, line 145 - if (!(eq_s_b(1, "t"))) - { + switch(among_var) { + case 0: return false; - } - } while (false); - // delete, line 145 - slice_del(); - break; - } - return true; - } + case 1: + // (, line 144 + // delete, line 144 + slice_del(); + break; + case 2: + // (, line 145 + // or, line 145 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // literal, line 145 + if (!(eq_s_b(1, "s"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_1; + // literal, line 145 + if (!(eq_s_b(1, "t"))) + { + return false; + } + } while (false); + // delete, line 145 + slice_del(); + break; + } + return true; + } - private boolean r_Step_5() { + private boolean r_Step_5() { int among_var; int v_1; int v_2; - // (, line 149 - // [, line 150 - ket = cursor; - // substring, line 150 - among_var = find_among_b(a_8, 2); - if (among_var == 0) - { - return false; - } - // ], line 150 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 151 - // or, line 151 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // call R2, line 151 - if (!r_R2()) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 151 - // call R1, line 151 - if (!r_R1()) - { + // (, line 149 + // [, line 150 + ket = cursor; + // substring, line 150 + among_var = find_among_b(a_8, 2); + if (among_var == 0) + { + return false; + } + // ], line 150 + bra = cursor; + switch(among_var) { + case 0: return false; - } - // not, line 151 - { - v_2 = limit - cursor; - lab2: do { - // call shortv, line 151 - if (!r_shortv()) + case 1: + // (, line 151 + // or, line 151 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // call R2, line 151 + if (!r_R2()) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_1; + // (, line 151 + // call R1, line 151 + if (!r_R1()) { - break lab2; + return false; } - return false; + // not, line 151 + { + v_2 = limit - cursor; + lab2: do { + // call shortv, line 151 + if (!r_shortv()) + { + break lab2; + } + return false; + } while (false); + cursor = limit - v_2; + } } while (false); - cursor = limit - v_2; - } - } while (false); - // delete, line 151 - slice_del(); - break; - case 2: - // (, line 152 - // call R2, line 152 - if (!r_R2()) + // delete, line 151 + slice_del(); + break; + case 2: + // (, line 152 + // call R2, line 152 + if (!r_R2()) + { + return false; + } + // literal, line 152 + if (!(eq_s_b(1, "l"))) + { + return false; + } + // delete, line 152 + slice_del(); + break; + } + return true; + } + + private boolean r_exception2() { + // (, line 156 + // [, line 158 + ket = cursor; + // substring, line 158 + if (find_among_b(a_9, 8) == 0) { return false; } - // literal, line 152 - if (!(eq_s_b(1, "l"))) + // ], line 158 + bra = cursor; + // atlimit, line 158 + if (cursor > limit_backward) { return false; } - // delete, line 152 - slice_del(); - break; - } - return true; - } + return true; + } - private boolean r_exception2() { - // (, line 156 - // [, line 158 - ket = cursor; - // substring, line 158 - if (find_among_b(a_9, 8) == 0) - { - return false; - } - // ], line 158 - bra = cursor; - // atlimit, line 158 - if (cursor > limit_backward) - { - return false; - } - return true; - } - - private boolean r_exception1() { + private boolean r_exception1() { int among_var; - // (, line 168 - // [, line 170 - bra = cursor; - // substring, line 170 - among_var = find_among(a_10, 18); - if (among_var == 0) - { - return false; - } - // ], line 170 - ket = cursor; - // atlimit, line 170 - if (cursor < limit) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 174 - // <-, line 174 - slice_from("ski"); - break; - case 2: - // (, line 175 - // <-, line 175 - slice_from("sky"); - break; - case 3: - // (, line 176 - // <-, line 176 - slice_from("die"); - break; - case 4: - // (, line 177 - // <-, line 177 - slice_from("lie"); - break; - case 5: - // (, line 178 - // <-, line 178 - slice_from("tie"); - break; - case 6: - // (, line 182 - // <-, line 182 - slice_from("idl"); - break; - case 7: - // (, line 183 - // <-, line 183 - slice_from("gentl"); - break; - case 8: - // (, line 184 - // <-, line 184 - slice_from("ugli"); - break; - case 9: - // (, line 185 - // <-, line 185 - slice_from("earli"); - break; - case 10: - // (, line 186 - // <-, line 186 - slice_from("onli"); - break; - case 11: - // (, line 187 - // <-, line 187 - slice_from("singl"); - break; - } - return true; - } + // (, line 168 + // [, line 170 + bra = cursor; + // substring, line 170 + among_var = find_among(a_10, 18); + if (among_var == 0) + { + return false; + } + // ], line 170 + ket = cursor; + // atlimit, line 170 + if (cursor < limit) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 174 + // <-, line 174 + slice_from("ski"); + break; + case 2: + // (, line 175 + // <-, line 175 + slice_from("sky"); + break; + case 3: + // (, line 176 + // <-, line 176 + slice_from("die"); + break; + case 4: + // (, line 177 + // <-, line 177 + slice_from("lie"); + break; + case 5: + // (, line 178 + // <-, line 178 + slice_from("tie"); + break; + case 6: + // (, line 182 + // <-, line 182 + slice_from("idl"); + break; + case 7: + // (, line 183 + // <-, line 183 + slice_from("gentl"); + break; + case 8: + // (, line 184 + // <-, line 184 + slice_from("ugli"); + break; + case 9: + // (, line 185 + // <-, line 185 + slice_from("earli"); + break; + case 10: + // (, line 186 + // <-, line 186 + slice_from("onli"); + break; + case 11: + // (, line 187 + // <-, line 187 + slice_from("singl"); + break; + } + return true; + } - private boolean r_postlude() { + private boolean r_postlude() { int v_1; int v_2; - // (, line 203 - // Boolean test Y_found, line 203 - if (!(B_Y_found)) - { - return false; - } - // repeat, line 203 - replab0: while(true) - { - v_1 = cursor; - lab1: do { // (, line 203 - // goto, line 203 - golab2: while(true) + // Boolean test Y_found, line 203 + if (!(B_Y_found)) { - v_2 = cursor; - lab3: do { + return false; + } + // repeat, line 203 + replab0: while(true) + { + v_1 = cursor; + lab1: do { // (, line 203 - // [, line 203 - bra = cursor; - // literal, line 203 - if (!(eq_s(1, "Y"))) + // goto, line 203 + golab2: while(true) { - break lab3; + v_2 = cursor; + lab3: do { + // (, line 203 + // [, line 203 + bra = cursor; + // literal, line 203 + if (!(eq_s(1, "Y"))) + { + break lab3; + } + // ], line 203 + ket = cursor; + cursor = v_2; + break golab2; + } while (false); + cursor = v_2; + if (cursor >= limit) + { + break lab1; + } + cursor++; } - // ], line 203 - ket = cursor; - cursor = v_2; - break golab2; + // <-, line 203 + slice_from("y"); + continue replab0; } while (false); - cursor = v_2; - if (cursor >= limit) - { - break lab1; - } - cursor++; + cursor = v_1; + break replab0; } - // <-, line 203 - slice_from("y"); - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -1154,161 +1162,173 @@ int v_11; int v_12; int v_13; - // (, line 205 - // or, line 207 - lab0: do { - v_1 = cursor; - lab1: do { - // call exception1, line 207 - if (!r_exception1()) - { - break lab1; - } - break lab0; - } while (false); - cursor = v_1; - lab2: do { - // not, line 208 - { - v_2 = cursor; - lab3: do { - // hop, line 208 + // (, line 205 + // or, line 207 + lab0: do { + v_1 = cursor; + lab1: do { + // call exception1, line 207 + if (!r_exception1()) { - int c = cursor + 3; - if (0 > c || c > limit) + break lab1; + } + break lab0; + } while (false); + cursor = v_1; + lab2: do { + // not, line 208 + { + v_2 = cursor; + lab3: do { + // hop, line 208 + { + int c = cursor + 3; + if (0 > c || c > limit) + { + break lab3; + } + cursor = c; + } + break lab2; + } while (false); + cursor = v_2; + } + break lab0; + } while (false); + cursor = v_1; + // (, line 208 + // do, line 209 + v_3 = cursor; + lab4: do { + // call prelude, line 209 + if (!r_prelude()) + { + break lab4; + } + } while (false); + cursor = v_3; + // do, line 210 + v_4 = cursor; + lab5: do { + // call mark_regions, line 210 + if (!r_mark_regions()) + { + break lab5; + } + } while (false); + cursor = v_4; + // backwards, line 211 + limit_backward = cursor; cursor = limit; + // (, line 211 + // do, line 213 + v_5 = limit - cursor; + lab6: do { + // call Step_1a, line 213 + if (!r_Step_1a()) + { + break lab6; + } + } while (false); + cursor = limit - v_5; + // or, line 215 + lab7: do { + v_6 = limit - cursor; + lab8: do { + // call exception2, line 215 + if (!r_exception2()) { - break lab3; + break lab8; } - cursor = c; + break lab7; + } while (false); + cursor = limit - v_6; + // (, line 215 + // do, line 217 + v_7 = limit - cursor; + lab9: do { + // call Step_1b, line 217 + if (!r_Step_1b()) + { + break lab9; + } + } while (false); + cursor = limit - v_7; + // do, line 218 + v_8 = limit - cursor; + lab10: do { + // call Step_1c, line 218 + if (!r_Step_1c()) + { + break lab10; + } + } while (false); + cursor = limit - v_8; + // do, line 220 + v_9 = limit - cursor; + lab11: do { + // call Step_2, line 220 + if (!r_Step_2()) + { + break lab11; + } + } while (false); + cursor = limit - v_9; + // do, line 221 + v_10 = limit - cursor; + lab12: do { + // call Step_3, line 221 + if (!r_Step_3()) + { + break lab12; + } + } while (false); + cursor = limit - v_10; + // do, line 222 + v_11 = limit - cursor; + lab13: do { + // call Step_4, line 222 + if (!r_Step_4()) + { + break lab13; + } + } while (false); + cursor = limit - v_11; + // do, line 224 + v_12 = limit - cursor; + lab14: do { + // call Step_5, line 224 + if (!r_Step_5()) + { + break lab14; + } + } while (false); + cursor = limit - v_12; + } while (false); + cursor = limit_backward; // do, line 227 + v_13 = cursor; + lab15: do { + // call postlude, line 227 + if (!r_postlude()) + { + break lab15; } - break lab2; } while (false); - cursor = v_2; - } - break lab0; - } while (false); - cursor = v_1; - // (, line 208 - // do, line 209 - v_3 = cursor; - lab4: do { - // call prelude, line 209 - if (!r_prelude()) - { - break lab4; - } - } while (false); - cursor = v_3; - // do, line 210 - v_4 = cursor; - lab5: do { - // call mark_regions, line 210 - if (!r_mark_regions()) - { - break lab5; - } - } while (false); - cursor = v_4; - // backwards, line 211 - limit_backward = cursor; cursor = limit; - // (, line 211 - // do, line 213 - v_5 = limit - cursor; - lab6: do { - // call Step_1a, line 213 - if (!r_Step_1a()) - { - break lab6; - } - } while (false); - cursor = limit - v_5; - // or, line 215 - lab7: do { - v_6 = limit - cursor; - lab8: do { - // call exception2, line 215 - if (!r_exception2()) - { - break lab8; - } - break lab7; + cursor = v_13; } while (false); - cursor = limit - v_6; - // (, line 215 - // do, line 217 - v_7 = limit - cursor; - lab9: do { - // call Step_1b, line 217 - if (!r_Step_1b()) - { - break lab9; - } - } while (false); - cursor = limit - v_7; - // do, line 218 - v_8 = limit - cursor; - lab10: do { - // call Step_1c, line 218 - if (!r_Step_1c()) - { - break lab10; - } - } while (false); - cursor = limit - v_8; - // do, line 220 - v_9 = limit - cursor; - lab11: do { - // call Step_2, line 220 - if (!r_Step_2()) - { - break lab11; - } - } while (false); - cursor = limit - v_9; - // do, line 221 - v_10 = limit - cursor; - lab12: do { - // call Step_3, line 221 - if (!r_Step_3()) - { - break lab12; - } - } while (false); - cursor = limit - v_10; - // do, line 222 - v_11 = limit - cursor; - lab13: do { - // call Step_4, line 222 - if (!r_Step_4()) - { - break lab13; - } - } while (false); - cursor = limit - v_11; - // do, line 224 - v_12 = limit - cursor; - lab14: do { - // call Step_5, line 224 - if (!r_Step_5()) - { - break lab14; - } - } while (false); - cursor = limit - v_12; - } while (false); - cursor = limit_backward; // do, line 227 - v_13 = cursor; - lab15: do { - // call postlude, line 227 - if (!r_postlude()) - { - break lab15; - } - } while (false); - cursor = v_13; - } while (false); - return true; + return true; + } + + @Override + public boolean equals( Object o ) { + return o instanceof EnglishStemmer; } + @Override + public int hashCode() { + return EnglishStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/FinnishStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/FinnishStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/FinnishStemmer.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/FinnishStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,754 +1,761 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class FinnishStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "pa", -1, 1, "", this), - new Among ( "sti", -1, 2, "", this), - new Among ( "kaan", -1, 1, "", this), - new Among ( "han", -1, 1, "", this), - new Among ( "kin", -1, 1, "", this), - new Among ( "h\u00E4n", -1, 1, "", this), - new Among ( "k\u00E4\u00E4n", -1, 1, "", this), - new Among ( "ko", -1, 1, "", this), - new Among ( "p\u00E4", -1, 1, "", this), - new Among ( "k\u00F6", -1, 1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "lla", -1, -1, "", this), - new Among ( "na", -1, -1, "", this), - new Among ( "ssa", -1, -1, "", this), - new Among ( "ta", -1, -1, "", this), - new Among ( "lta", 3, -1, "", this), - new Among ( "sta", 3, -1, "", this) - }; + private final static FinnishStemmer methodObject = new FinnishStemmer (); - private Among a_2[] = { - new Among ( "ll\u00E4", -1, -1, "", this), - new Among ( "n\u00E4", -1, -1, "", this), - new Among ( "ss\u00E4", -1, -1, "", this), - new Among ( "t\u00E4", -1, -1, "", this), - new Among ( "lt\u00E4", 3, -1, "", this), - new Among ( "st\u00E4", 3, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "pa", -1, 1, "", methodObject ), + new Among ( "sti", -1, 2, "", methodObject ), + new Among ( "kaan", -1, 1, "", methodObject ), + new Among ( "han", -1, 1, "", methodObject ), + new Among ( "kin", -1, 1, "", methodObject ), + new Among ( "h\u00E4n", -1, 1, "", methodObject ), + new Among ( "k\u00E4\u00E4n", -1, 1, "", methodObject ), + new Among ( "ko", -1, 1, "", methodObject ), + new Among ( "p\u00E4", -1, 1, "", methodObject ), + new Among ( "k\u00F6", -1, 1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "lle", -1, -1, "", this), - new Among ( "ine", -1, -1, "", this) - }; + private final static Among a_1[] = { + new Among ( "lla", -1, -1, "", methodObject ), + new Among ( "na", -1, -1, "", methodObject ), + new Among ( "ssa", -1, -1, "", methodObject ), + new Among ( "ta", -1, -1, "", methodObject ), + new Among ( "lta", 3, -1, "", methodObject ), + new Among ( "sta", 3, -1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "nsa", -1, 3, "", this), - new Among ( "mme", -1, 3, "", this), - new Among ( "nne", -1, 3, "", this), - new Among ( "ni", -1, 2, "", this), - new Among ( "si", -1, 1, "", this), - new Among ( "an", -1, 4, "", this), - new Among ( "en", -1, 6, "", this), - new Among ( "\u00E4n", -1, 5, "", this), - new Among ( "ns\u00E4", -1, 3, "", this) - }; + private final static Among a_2[] = { + new Among ( "ll\u00E4", -1, -1, "", methodObject ), + new Among ( "n\u00E4", -1, -1, "", methodObject ), + new Among ( "ss\u00E4", -1, -1, "", methodObject ), + new Among ( "t\u00E4", -1, -1, "", methodObject ), + new Among ( "lt\u00E4", 3, -1, "", methodObject ), + new Among ( "st\u00E4", 3, -1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "aa", -1, -1, "", this), - new Among ( "ee", -1, -1, "", this), - new Among ( "ii", -1, -1, "", this), - new Among ( "oo", -1, -1, "", this), - new Among ( "uu", -1, -1, "", this), - new Among ( "\u00E4\u00E4", -1, -1, "", this), - new Among ( "\u00F6\u00F6", -1, -1, "", this) - }; + private final static Among a_3[] = { + new Among ( "lle", -1, -1, "", methodObject ), + new Among ( "ine", -1, -1, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "a", -1, 8, "", this), - new Among ( "lla", 0, -1, "", this), - new Among ( "na", 0, -1, "", this), - new Among ( "ssa", 0, -1, "", this), - new Among ( "ta", 0, -1, "", this), - new Among ( "lta", 4, -1, "", this), - new Among ( "sta", 4, -1, "", this), - new Among ( "tta", 4, 9, "", this), - new Among ( "lle", -1, -1, "", this), - new Among ( "ine", -1, -1, "", this), - new Among ( "ksi", -1, -1, "", this), - new Among ( "n", -1, 7, "", this), - new Among ( "han", 11, 1, "", this), - new Among ( "den", 11, -1, "r_VI", this), - new Among ( "seen", 11, -1, "r_LONG", this), - new Among ( "hen", 11, 2, "", this), - new Among ( "tten", 11, -1, "r_VI", this), - new Among ( "hin", 11, 3, "", this), - new Among ( "siin", 11, -1, "r_VI", this), - new Among ( "hon", 11, 4, "", this), - new Among ( "h\u00E4n", 11, 5, "", this), - new Among ( "h\u00F6n", 11, 6, "", this), - new Among ( "\u00E4", -1, 8, "", this), - new Among ( "ll\u00E4", 22, -1, "", this), - new Among ( "n\u00E4", 22, -1, "", this), - new Among ( "ss\u00E4", 22, -1, "", this), - new Among ( "t\u00E4", 22, -1, "", this), - new Among ( "lt\u00E4", 26, -1, "", this), - new Among ( "st\u00E4", 26, -1, "", this), - new Among ( "tt\u00E4", 26, 9, "", this) - }; + private final static Among a_4[] = { + new Among ( "nsa", -1, 3, "", methodObject ), + new Among ( "mme", -1, 3, "", methodObject ), + new Among ( "nne", -1, 3, "", methodObject ), + new Among ( "ni", -1, 2, "", methodObject ), + new Among ( "si", -1, 1, "", methodObject ), + new Among ( "an", -1, 4, "", methodObject ), + new Among ( "en", -1, 6, "", methodObject ), + new Among ( "\u00E4n", -1, 5, "", methodObject ), + new Among ( "ns\u00E4", -1, 3, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "eja", -1, -1, "", this), - new Among ( "mma", -1, 1, "", this), - new Among ( "imma", 1, -1, "", this), - new Among ( "mpa", -1, 1, "", this), - new Among ( "impa", 3, -1, "", this), - new Among ( "mmi", -1, 1, "", this), - new Among ( "immi", 5, -1, "", this), - new Among ( "mpi", -1, 1, "", this), - new Among ( "impi", 7, -1, "", this), - new Among ( "ej\u00E4", -1, -1, "", this), - new Among ( "mm\u00E4", -1, 1, "", this), - new Among ( "imm\u00E4", 10, -1, "", this), - new Among ( "mp\u00E4", -1, 1, "", this), - new Among ( "imp\u00E4", 12, -1, "", this) - }; + private final static Among a_5[] = { + new Among ( "aa", -1, -1, "", methodObject ), + new Among ( "ee", -1, -1, "", methodObject ), + new Among ( "ii", -1, -1, "", methodObject ), + new Among ( "oo", -1, -1, "", methodObject ), + new Among ( "uu", -1, -1, "", methodObject ), + new Among ( "\u00E4\u00E4", -1, -1, "", methodObject ), + new Among ( "\u00F6\u00F6", -1, -1, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "i", -1, -1, "", this), - new Among ( "j", -1, -1, "", this) - }; + private final static Among a_6[] = { + new Among ( "a", -1, 8, "", methodObject ), + new Among ( "lla", 0, -1, "", methodObject ), + new Among ( "na", 0, -1, "", methodObject ), + new Among ( "ssa", 0, -1, "", methodObject ), + new Among ( "ta", 0, -1, "", methodObject ), + new Among ( "lta", 4, -1, "", methodObject ), + new Among ( "sta", 4, -1, "", methodObject ), + new Among ( "tta", 4, 9, "", methodObject ), + new Among ( "lle", -1, -1, "", methodObject ), + new Among ( "ine", -1, -1, "", methodObject ), + new Among ( "ksi", -1, -1, "", methodObject ), + new Among ( "n", -1, 7, "", methodObject ), + new Among ( "han", 11, 1, "", methodObject ), + new Among ( "den", 11, -1, "r_VI", methodObject ), + new Among ( "seen", 11, -1, "r_LONG", methodObject ), + new Among ( "hen", 11, 2, "", methodObject ), + new Among ( "tten", 11, -1, "r_VI", methodObject ), + new Among ( "hin", 11, 3, "", methodObject ), + new Among ( "siin", 11, -1, "r_VI", methodObject ), + new Among ( "hon", 11, 4, "", methodObject ), + new Among ( "h\u00E4n", 11, 5, "", methodObject ), + new Among ( "h\u00F6n", 11, 6, "", methodObject ), + new Among ( "\u00E4", -1, 8, "", methodObject ), + new Among ( "ll\u00E4", 22, -1, "", methodObject ), + new Among ( "n\u00E4", 22, -1, "", methodObject ), + new Among ( "ss\u00E4", 22, -1, "", methodObject ), + new Among ( "t\u00E4", 22, -1, "", methodObject ), + new Among ( "lt\u00E4", 26, -1, "", methodObject ), + new Among ( "st\u00E4", 26, -1, "", methodObject ), + new Among ( "tt\u00E4", 26, 9, "", methodObject ) + }; - private Among a_9[] = { - new Among ( "mma", -1, 1, "", this), - new Among ( "imma", 0, -1, "", this) - }; + private final static Among a_7[] = { + new Among ( "eja", -1, -1, "", methodObject ), + new Among ( "mma", -1, 1, "", methodObject ), + new Among ( "imma", 1, -1, "", methodObject ), + new Among ( "mpa", -1, 1, "", methodObject ), + new Among ( "impa", 3, -1, "", methodObject ), + new Among ( "mmi", -1, 1, "", methodObject ), + new Among ( "immi", 5, -1, "", methodObject ), + new Among ( "mpi", -1, 1, "", methodObject ), + new Among ( "impi", 7, -1, "", methodObject ), + new Among ( "ej\u00E4", -1, -1, "", methodObject ), + new Among ( "mm\u00E4", -1, 1, "", methodObject ), + new Among ( "imm\u00E4", 10, -1, "", methodObject ), + new Among ( "mp\u00E4", -1, 1, "", methodObject ), + new Among ( "imp\u00E4", 12, -1, "", methodObject ) + }; - private static final char g_AEI[] = {17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; + private final static Among a_8[] = { + new Among ( "i", -1, -1, "", methodObject ), + new Among ( "j", -1, -1, "", methodObject ) + }; - private static final char g_V1[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + private final static Among a_9[] = { + new Among ( "mma", -1, 1, "", methodObject ), + new Among ( "imma", 0, -1, "", methodObject ) + }; - private static final char g_V2[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + private static final char g_AEI[] = {17, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8 }; - private static final char g_particle_end[] = {17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + private static final char g_V1[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + private static final char g_V2[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + + private static final char g_particle_end[] = {17, 97, 24, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32 }; + private boolean B_ending_removed; - private StringBuffer S_x = new StringBuffer(); + private java.lang.StringBuilder S_x = new java.lang.StringBuilder(); private int I_p2; private int I_p1; - private void copy_from(FinnishStemmer other) { - B_ending_removed = other.B_ending_removed; - S_x = other.S_x; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(FinnishStemmer other) { + B_ending_removed = other.B_ending_removed; + S_x = other.S_x; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_3; - // (, line 41 - I_p1 = limit; - I_p2 = limit; - // goto, line 46 - golab0: while(true) - { - v_1 = cursor; - lab1: do { - if (!(in_grouping(g_V1, 97, 246))) + // (, line 41 + I_p1 = limit; + I_p2 = limit; + // goto, line 46 + golab0: while(true) { - break lab1; + v_1 = cursor; + lab1: do { + if (!(in_grouping(g_V1, 97, 246))) + { + break lab1; + } + cursor = v_1; + break golab0; + } while (false); + cursor = v_1; + if (cursor >= limit) + { + return false; + } + cursor++; } - cursor = v_1; - break golab0; - } while (false); - cursor = v_1; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 46 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_V1, 97, 246))) + // gopast, line 46 + golab2: while(true) { - break lab3; + lab3: do { + if (!(out_grouping(g_V1, 97, 246))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // setmark p1, line 46 - I_p1 = cursor; - // goto, line 47 - golab4: while(true) - { - v_3 = cursor; - lab5: do { - if (!(in_grouping(g_V1, 97, 246))) + // setmark p1, line 46 + I_p1 = cursor; + // goto, line 47 + golab4: while(true) { - break lab5; + v_3 = cursor; + lab5: do { + if (!(in_grouping(g_V1, 97, 246))) + { + break lab5; + } + cursor = v_3; + break golab4; + } while (false); + cursor = v_3; + if (cursor >= limit) + { + return false; + } + cursor++; } - cursor = v_3; - break golab4; - } while (false); - cursor = v_3; - if (cursor >= limit) - { - return false; + // gopast, line 47 + golab6: while(true) + { + lab7: do { + if (!(out_grouping(g_V1, 97, 246))) + { + break lab7; + } + break golab6; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // setmark p2, line 47 + I_p2 = cursor; + return true; } - cursor++; - } - // gopast, line 47 - golab6: while(true) - { - lab7: do { - if (!(out_grouping(g_V1, 97, 246))) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab7; + return false; } - break golab6; - } while (false); - if (cursor >= limit) - { - return false; + return true; } - cursor++; - } - // setmark p2, line 47 - I_p2 = cursor; - return true; - } - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_particle_etc() { + private boolean r_particle_etc() { int among_var; int v_1; int v_2; - // (, line 54 - // setlimit, line 55 - v_1 = limit - cursor; - // tomark, line 55 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 55 - // [, line 55 - ket = cursor; - // substring, line 55 - among_var = find_among_b(a_0, 10); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 55 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 62 - if (!(in_grouping_b(g_particle_end, 97, 246))) + // (, line 54 + // setlimit, line 55 + v_1 = limit - cursor; + // tomark, line 55 + if (cursor < I_p1) { return false; } - break; - case 2: - // (, line 64 - // call R2, line 64 - if (!r_R2()) + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 55 + // [, line 55 + ket = cursor; + // substring, line 55 + among_var = find_among_b(a_0, 10); + if (among_var == 0) { + limit_backward = v_2; return false; } - break; - } - // delete, line 66 - slice_del(); - return true; - } + // ], line 55 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 62 + if (!(in_grouping_b(g_particle_end, 97, 246))) + { + return false; + } + break; + case 2: + // (, line 64 + // call R2, line 64 + if (!r_R2()) + { + return false; + } + break; + } + // delete, line 66 + slice_del(); + return true; + } - private boolean r_possessive() { + private boolean r_possessive() { int among_var; int v_1; int v_2; int v_3; - // (, line 68 - // setlimit, line 69 - v_1 = limit - cursor; - // tomark, line 69 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 69 - // [, line 69 - ket = cursor; - // substring, line 69 - among_var = find_among_b(a_4, 9); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 69 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 72 - // not, line 72 + // (, line 68 + // setlimit, line 69 + v_1 = limit - cursor; + // tomark, line 69 + if (cursor < I_p1) { - v_3 = limit - cursor; - lab0: do { - // literal, line 72 - if (!(eq_s_b(1, "k"))) - { - break lab0; - } - return false; - } while (false); - cursor = limit - v_3; + return false; } - // delete, line 72 - slice_del(); - break; - case 2: - // (, line 74 - // delete, line 74 - slice_del(); - // [, line 74 + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 69 + // [, line 69 ket = cursor; - // literal, line 74 - if (!(eq_s_b(3, "kse"))) + // substring, line 69 + among_var = find_among_b(a_4, 9); + if (among_var == 0) { + limit_backward = v_2; return false; } - // ], line 74 + // ], line 69 bra = cursor; - // <-, line 74 - slice_from("ksi"); - break; - case 3: - // (, line 78 - // delete, line 78 - slice_del(); - break; - case 4: - // (, line 81 - // among, line 81 - if (find_among_b(a_1, 6) == 0) + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 72 + // not, line 72 + { + v_3 = limit - cursor; + lab0: do { + // literal, line 72 + if (!(eq_s_b(1, "k"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_3; + } + // delete, line 72 + slice_del(); + break; + case 2: + // (, line 74 + // delete, line 74 + slice_del(); + // [, line 74 + ket = cursor; + // literal, line 74 + if (!(eq_s_b(3, "kse"))) + { + return false; + } + // ], line 74 + bra = cursor; + // <-, line 74 + slice_from("ksi"); + break; + case 3: + // (, line 78 + // delete, line 78 + slice_del(); + break; + case 4: + // (, line 81 + // among, line 81 + if (find_among_b(a_1, 6) == 0) + { + return false; + } + // delete, line 81 + slice_del(); + break; + case 5: + // (, line 83 + // among, line 83 + if (find_among_b(a_2, 6) == 0) + { + return false; + } + // delete, line 84 + slice_del(); + break; + case 6: + // (, line 86 + // among, line 86 + if (find_among_b(a_3, 2) == 0) + { + return false; + } + // delete, line 86 + slice_del(); + break; + } + return true; + } + + private boolean r_LONG() { + // among, line 91 + if (find_among_b(a_5, 7) == 0) { return false; } - // delete, line 81 - slice_del(); - break; - case 5: - // (, line 83 - // among, line 83 - if (find_among_b(a_2, 6) == 0) + return true; + } + + private boolean r_VI() { + // (, line 93 + // literal, line 93 + if (!(eq_s_b(1, "i"))) { return false; } - // delete, line 84 - slice_del(); - break; - case 6: - // (, line 86 - // among, line 86 - if (find_among_b(a_3, 2) == 0) + if (!(in_grouping_b(g_V2, 97, 246))) { return false; } - // delete, line 86 - slice_del(); - break; - } - return true; - } + return true; + } - private boolean r_LONG() { - // among, line 91 - if (find_among_b(a_5, 7) == 0) - { - return false; - } - return true; - } - - private boolean r_VI() { - // (, line 93 - // literal, line 93 - if (!(eq_s_b(1, "i"))) - { - return false; - } - if (!(in_grouping_b(g_V2, 97, 246))) - { - return false; - } - return true; - } - - private boolean r_case_ending() { + private boolean r_case_ending() { int among_var; int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 95 - // setlimit, line 96 - v_1 = limit - cursor; - // tomark, line 96 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 96 - // [, line 96 - ket = cursor; - // substring, line 96 - among_var = find_among_b(a_6, 30); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 96 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 98 - // literal, line 98 - if (!(eq_s_b(1, "a"))) + // (, line 95 + // setlimit, line 96 + v_1 = limit - cursor; + // tomark, line 96 + if (cursor < I_p1) { return false; } - break; - case 2: - // (, line 99 - // literal, line 99 - if (!(eq_s_b(1, "e"))) + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 96 + // [, line 96 + ket = cursor; + // substring, line 96 + among_var = find_among_b(a_6, 30); + if (among_var == 0) { + limit_backward = v_2; return false; } - break; - case 3: - // (, line 100 - // literal, line 100 - if (!(eq_s_b(1, "i"))) - { - return false; - } - break; - case 4: - // (, line 101 - // literal, line 101 - if (!(eq_s_b(1, "o"))) - { - return false; - } - break; - case 5: - // (, line 102 - // literal, line 102 - if (!(eq_s_b(1, "\u00E4"))) - { - return false; - } - break; - case 6: - // (, line 103 - // literal, line 103 - if (!(eq_s_b(1, "\u00F6"))) - { - return false; - } - break; - case 7: - // (, line 111 - // try, line 111 - v_3 = limit - cursor; - lab0: do { - // (, line 111 - // and, line 113 - v_4 = limit - cursor; - // or, line 112 - lab1: do { - v_5 = limit - cursor; - lab2: do { - // call LONG, line 111 - if (!r_LONG()) + // ], line 96 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 98 + // literal, line 98 + if (!(eq_s_b(1, "a"))) + { + return false; + } + break; + case 2: + // (, line 99 + // literal, line 99 + if (!(eq_s_b(1, "e"))) + { + return false; + } + break; + case 3: + // (, line 100 + // literal, line 100 + if (!(eq_s_b(1, "i"))) + { + return false; + } + break; + case 4: + // (, line 101 + // literal, line 101 + if (!(eq_s_b(1, "o"))) + { + return false; + } + break; + case 5: + // (, line 102 + // literal, line 102 + if (!(eq_s_b(1, "\u00E4"))) + { + return false; + } + break; + case 6: + // (, line 103 + // literal, line 103 + if (!(eq_s_b(1, "\u00F6"))) + { + return false; + } + break; + case 7: + // (, line 111 + // try, line 111 + v_3 = limit - cursor; + lab0: do { + // (, line 111 + // and, line 113 + v_4 = limit - cursor; + // or, line 112 + lab1: do { + v_5 = limit - cursor; + lab2: do { + // call LONG, line 111 + if (!r_LONG()) + { + break lab2; + } + break lab1; + } while (false); + cursor = limit - v_5; + // literal, line 112 + if (!(eq_s_b(2, "ie"))) + { + cursor = limit - v_3; + break lab0; + } + } while (false); + cursor = limit - v_4; + // next, line 113 + if (cursor <= limit_backward) { - break lab2; + cursor = limit - v_3; + break lab0; } - break lab1; + cursor--; + // ], line 113 + bra = cursor; } while (false); - cursor = limit - v_5; - // literal, line 112 - if (!(eq_s_b(2, "ie"))) + break; + case 8: + // (, line 119 + if (!(in_grouping_b(g_V1, 97, 246))) { - cursor = limit - v_3; - break lab0; + return false; } - } while (false); - cursor = limit - v_4; - // next, line 113 - if (cursor <= limit_backward) - { - cursor = limit - v_3; - break lab0; - } - cursor--; - // ], line 113 - bra = cursor; - } while (false); - break; - case 8: - // (, line 119 - if (!(in_grouping_b(g_V1, 97, 246))) - { - return false; + if (!(out_grouping_b(g_V1, 97, 246))) + { + return false; + } + break; + case 9: + // (, line 121 + // literal, line 121 + if (!(eq_s_b(1, "e"))) + { + return false; + } + break; } - if (!(out_grouping_b(g_V1, 97, 246))) - { - return false; - } - break; - case 9: - // (, line 121 - // literal, line 121 - if (!(eq_s_b(1, "e"))) - { - return false; - } - break; - } - // delete, line 138 - slice_del(); - // set ending_removed, line 139 - B_ending_removed = true; - return true; - } + // delete, line 138 + slice_del(); + // set ending_removed, line 139 + B_ending_removed = true; + return true; + } - private boolean r_other_endings() { + private boolean r_other_endings() { int among_var; int v_1; int v_2; int v_3; - // (, line 141 - // setlimit, line 142 - v_1 = limit - cursor; - // tomark, line 142 - if (cursor < I_p2) - { - return false; - } - cursor = I_p2; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 142 - // [, line 142 - ket = cursor; - // substring, line 142 - among_var = find_among_b(a_7, 14); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 142 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 146 - // not, line 146 + // (, line 141 + // setlimit, line 142 + v_1 = limit - cursor; + // tomark, line 142 + if (cursor < I_p2) { - v_3 = limit - cursor; - lab0: do { - // literal, line 146 - if (!(eq_s_b(2, "po"))) + return false; + } + cursor = I_p2; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 142 + // [, line 142 + ket = cursor; + // substring, line 142 + among_var = find_among_b(a_7, 14); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 142 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 146 + // not, line 146 { - break lab0; + v_3 = limit - cursor; + lab0: do { + // literal, line 146 + if (!(eq_s_b(2, "po"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_3; } - return false; - } while (false); - cursor = limit - v_3; + break; } - break; - } - // delete, line 151 - slice_del(); - return true; - } + // delete, line 151 + slice_del(); + return true; + } - private boolean r_i_plural() { + private boolean r_i_plural() { int v_1; int v_2; - // (, line 153 - // setlimit, line 154 - v_1 = limit - cursor; - // tomark, line 154 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 154 - // [, line 154 - ket = cursor; - // substring, line 154 - if (find_among_b(a_8, 2) == 0) - { - limit_backward = v_2; - return false; - } - // ], line 154 - bra = cursor; - limit_backward = v_2; - // delete, line 158 - slice_del(); - return true; - } + // (, line 153 + // setlimit, line 154 + v_1 = limit - cursor; + // tomark, line 154 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 154 + // [, line 154 + ket = cursor; + // substring, line 154 + if (find_among_b(a_8, 2) == 0) + { + limit_backward = v_2; + return false; + } + // ], line 154 + bra = cursor; + limit_backward = v_2; + // delete, line 158 + slice_del(); + return true; + } - private boolean r_t_plural() { + private boolean r_t_plural() { int among_var; int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; - // (, line 160 - // setlimit, line 161 - v_1 = limit - cursor; - // tomark, line 161 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 161 - // [, line 162 - ket = cursor; - // literal, line 162 - if (!(eq_s_b(1, "t"))) - { - limit_backward = v_2; - return false; - } - // ], line 162 - bra = cursor; - // test, line 162 - v_3 = limit - cursor; - if (!(in_grouping_b(g_V1, 97, 246))) - { - limit_backward = v_2; - return false; - } - cursor = limit - v_3; - // delete, line 163 - slice_del(); - limit_backward = v_2; - // setlimit, line 165 - v_4 = limit - cursor; - // tomark, line 165 - if (cursor < I_p2) - { - return false; - } - cursor = I_p2; - v_5 = limit_backward; - limit_backward = cursor; - cursor = limit - v_4; - // (, line 165 - // [, line 165 - ket = cursor; - // substring, line 165 - among_var = find_among_b(a_9, 2); - if (among_var == 0) - { - limit_backward = v_5; - return false; - } - // ], line 165 - bra = cursor; - limit_backward = v_5; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 167 - // not, line 167 + // (, line 160 + // setlimit, line 161 + v_1 = limit - cursor; + // tomark, line 161 + if (cursor < I_p1) { - v_6 = limit - cursor; - lab0: do { - // literal, line 167 - if (!(eq_s_b(2, "po"))) + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 161 + // [, line 162 + ket = cursor; + // literal, line 162 + if (!(eq_s_b(1, "t"))) + { + limit_backward = v_2; + return false; + } + // ], line 162 + bra = cursor; + // test, line 162 + v_3 = limit - cursor; + if (!(in_grouping_b(g_V1, 97, 246))) + { + limit_backward = v_2; + return false; + } + cursor = limit - v_3; + // delete, line 163 + slice_del(); + limit_backward = v_2; + // setlimit, line 165 + v_4 = limit - cursor; + // tomark, line 165 + if (cursor < I_p2) + { + return false; + } + cursor = I_p2; + v_5 = limit_backward; + limit_backward = cursor; + cursor = limit - v_4; + // (, line 165 + // [, line 165 + ket = cursor; + // substring, line 165 + among_var = find_among_b(a_9, 2); + if (among_var == 0) + { + limit_backward = v_5; + return false; + } + // ], line 165 + bra = cursor; + limit_backward = v_5; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 167 + // not, line 167 { - break lab0; + v_6 = limit - cursor; + lab0: do { + // literal, line 167 + if (!(eq_s_b(2, "po"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_6; } - return false; - } while (false); - cursor = limit - v_6; + break; } - break; - } - // delete, line 170 - slice_del(); - return true; - } + // delete, line 170 + slice_del(); + return true; + } - private boolean r_tidy() { + private boolean r_tidy() { int v_1; int v_2; int v_3; @@ -758,166 +765,167 @@ int v_7; int v_8; int v_9; - // (, line 172 - // setlimit, line 173 - v_1 = limit - cursor; - // tomark, line 173 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 173 - // do, line 174 - v_3 = limit - cursor; - lab0: do { - // (, line 174 - // and, line 174 - v_4 = limit - cursor; - // call LONG, line 174 - if (!r_LONG()) - { - break lab0; - } - cursor = limit - v_4; - // (, line 174 - // [, line 174 - ket = cursor; - // next, line 174 - if (cursor <= limit_backward) - { - break lab0; - } - cursor--; - // ], line 174 - bra = cursor; - // delete, line 174 - slice_del(); - } while (false); - cursor = limit - v_3; - // do, line 175 - v_5 = limit - cursor; - lab1: do { - // (, line 175 - // [, line 175 - ket = cursor; - if (!(in_grouping_b(g_AEI, 97, 228))) - { - break lab1; - } - // ], line 175 - bra = cursor; - if (!(out_grouping_b(g_V1, 97, 246))) - { - break lab1; - } - // delete, line 175 - slice_del(); - } while (false); - cursor = limit - v_5; - // do, line 176 - v_6 = limit - cursor; - lab2: do { - // (, line 176 - // [, line 176 - ket = cursor; - // literal, line 176 - if (!(eq_s_b(1, "j"))) - { - break lab2; - } - // ], line 176 - bra = cursor; - // or, line 176 - lab3: do { - v_7 = limit - cursor; - lab4: do { + // (, line 172 + // setlimit, line 173 + v_1 = limit - cursor; + // tomark, line 173 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 173 + // do, line 174 + v_3 = limit - cursor; + lab0: do { + // (, line 174 + // and, line 174 + v_4 = limit - cursor; + // call LONG, line 174 + if (!r_LONG()) + { + break lab0; + } + cursor = limit - v_4; + // (, line 174 + // [, line 174 + ket = cursor; + // next, line 174 + if (cursor <= limit_backward) + { + break lab0; + } + cursor--; + // ], line 174 + bra = cursor; + // delete, line 174 + slice_del(); + } while (false); + cursor = limit - v_3; + // do, line 175 + v_5 = limit - cursor; + lab1: do { + // (, line 175 + // [, line 175 + ket = cursor; + if (!(in_grouping_b(g_AEI, 97, 228))) + { + break lab1; + } + // ], line 175 + bra = cursor; + if (!(out_grouping_b(g_V1, 97, 246))) + { + break lab1; + } + // delete, line 175 + slice_del(); + } while (false); + cursor = limit - v_5; + // do, line 176 + v_6 = limit - cursor; + lab2: do { + // (, line 176 + // [, line 176 + ket = cursor; // literal, line 176 + if (!(eq_s_b(1, "j"))) + { + break lab2; + } + // ], line 176 + bra = cursor; + // or, line 176 + lab3: do { + v_7 = limit - cursor; + lab4: do { + // literal, line 176 + if (!(eq_s_b(1, "o"))) + { + break lab4; + } + break lab3; + } while (false); + cursor = limit - v_7; + // literal, line 176 + if (!(eq_s_b(1, "u"))) + { + break lab2; + } + } while (false); + // delete, line 176 + slice_del(); + } while (false); + cursor = limit - v_6; + // do, line 177 + v_8 = limit - cursor; + lab5: do { + // (, line 177 + // [, line 177 + ket = cursor; + // literal, line 177 if (!(eq_s_b(1, "o"))) { - break lab4; + break lab5; } - break lab3; + // ], line 177 + bra = cursor; + // literal, line 177 + if (!(eq_s_b(1, "j"))) + { + break lab5; + } + // delete, line 177 + slice_del(); } while (false); - cursor = limit - v_7; - // literal, line 176 - if (!(eq_s_b(1, "u"))) + cursor = limit - v_8; + limit_backward = v_2; + // goto, line 179 + golab6: while(true) { - break lab2; + v_9 = limit - cursor; + lab7: do { + if (!(out_grouping_b(g_V1, 97, 246))) + { + break lab7; + } + cursor = limit - v_9; + break golab6; + } while (false); + cursor = limit - v_9; + if (cursor <= limit_backward) + { + return false; + } + cursor--; } - } while (false); - // delete, line 176 - slice_del(); - } while (false); - cursor = limit - v_6; - // do, line 177 - v_8 = limit - cursor; - lab5: do { - // (, line 177 - // [, line 177 - ket = cursor; - // literal, line 177 - if (!(eq_s_b(1, "o"))) - { - break lab5; - } - // ], line 177 - bra = cursor; - // literal, line 177 - if (!(eq_s_b(1, "j"))) - { - break lab5; - } - // delete, line 177 - slice_del(); - } while (false); - cursor = limit - v_8; - limit_backward = v_2; - // goto, line 179 - golab6: while(true) - { - v_9 = limit - cursor; - lab7: do { - if (!(out_grouping_b(g_V1, 97, 246))) + // [, line 179 + ket = cursor; + // next, line 179 + if (cursor <= limit_backward) { - break lab7; + return false; } - cursor = limit - v_9; - break golab6; - } while (false); - cursor = limit - v_9; - if (cursor <= limit_backward) - { - return false; + cursor--; + // ], line 179 + bra = cursor; + // -> x, line 179 + S_x = slice_to(S_x); + // name x, line 179 + if (!(eq_v_b(S_x))) + { + return false; + } + // delete, line 179 + slice_del(); + return true; } - cursor--; - } - // [, line 179 - ket = cursor; - // next, line 179 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 179 - bra = cursor; - // -> x, line 179 - S_x = slice_to(S_x); - // name x, line 179 - if (!(eq_v_b(S_x))) - { - return false; - } - // delete, line 179 - slice_del(); - return true; - } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -927,108 +935,120 @@ int v_7; int v_8; int v_9; - // (, line 183 - // do, line 185 - v_1 = cursor; - lab0: do { - // call mark_regions, line 185 - if (!r_mark_regions()) - { - break lab0; - } - } while (false); - cursor = v_1; - // unset ending_removed, line 186 - B_ending_removed = false; - // backwards, line 187 - limit_backward = cursor; cursor = limit; - // (, line 187 - // do, line 188 - v_2 = limit - cursor; - lab1: do { - // call particle_etc, line 188 - if (!r_particle_etc()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 189 - v_3 = limit - cursor; - lab2: do { - // call possessive, line 189 - if (!r_possessive()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 190 - v_4 = limit - cursor; - lab3: do { - // call case_ending, line 190 - if (!r_case_ending()) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - // do, line 191 - v_5 = limit - cursor; - lab4: do { - // call other_endings, line 191 - if (!r_other_endings()) - { - break lab4; - } - } while (false); - cursor = limit - v_5; - // or, line 192 - lab5: do { - v_6 = limit - cursor; - lab6: do { - // (, line 192 - // Boolean test ending_removed, line 192 - if (!(B_ending_removed)) - { - break lab6; - } - // do, line 192 - v_7 = limit - cursor; - lab7: do { - // call i_plural, line 192 - if (!r_i_plural()) + // (, line 183 + // do, line 185 + v_1 = cursor; + lab0: do { + // call mark_regions, line 185 + if (!r_mark_regions()) { - break lab7; + break lab0; } } while (false); - cursor = limit - v_7; - break lab5; - } while (false); - cursor = limit - v_6; - // do, line 192 - v_8 = limit - cursor; - lab8: do { - // call t_plural, line 192 - if (!r_t_plural()) - { - break lab8; - } - } while (false); - cursor = limit - v_8; - } while (false); - // do, line 193 - v_9 = limit - cursor; - lab9: do { - // call tidy, line 193 - if (!r_tidy()) - { - break lab9; + cursor = v_1; + // unset ending_removed, line 186 + B_ending_removed = false; + // backwards, line 187 + limit_backward = cursor; cursor = limit; + // (, line 187 + // do, line 188 + v_2 = limit - cursor; + lab1: do { + // call particle_etc, line 188 + if (!r_particle_etc()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 189 + v_3 = limit - cursor; + lab2: do { + // call possessive, line 189 + if (!r_possessive()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 190 + v_4 = limit - cursor; + lab3: do { + // call case_ending, line 190 + if (!r_case_ending()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // do, line 191 + v_5 = limit - cursor; + lab4: do { + // call other_endings, line 191 + if (!r_other_endings()) + { + break lab4; + } + } while (false); + cursor = limit - v_5; + // or, line 192 + lab5: do { + v_6 = limit - cursor; + lab6: do { + // (, line 192 + // Boolean test ending_removed, line 192 + if (!(B_ending_removed)) + { + break lab6; + } + // do, line 192 + v_7 = limit - cursor; + lab7: do { + // call i_plural, line 192 + if (!r_i_plural()) + { + break lab7; + } + } while (false); + cursor = limit - v_7; + break lab5; + } while (false); + cursor = limit - v_6; + // do, line 192 + v_8 = limit - cursor; + lab8: do { + // call t_plural, line 192 + if (!r_t_plural()) + { + break lab8; + } + } while (false); + cursor = limit - v_8; + } while (false); + // do, line 193 + v_9 = limit - cursor; + lab9: do { + // call tidy, line 193 + if (!r_tidy()) + { + break lab9; + } + } while (false); + cursor = limit - v_9; + cursor = limit_backward; return true; } - } while (false); - cursor = limit - v_9; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof FinnishStemmer; } + @Override + public int hashCode() { + return FinnishStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/FrenchStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/FrenchStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/FrenchStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/FrenchStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,566 +1,573 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class FrenchStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "col", -1, -1, "", this), - new Among ( "par", -1, -1, "", this), - new Among ( "tap", -1, -1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "", -1, 4, "", this), - new Among ( "I", 0, 1, "", this), - new Among ( "U", 0, 2, "", this), - new Among ( "Y", 0, 3, "", this) - }; + private final static FrenchStemmer methodObject = new FrenchStemmer (); - private Among a_2[] = { - new Among ( "iqU", -1, 3, "", this), - new Among ( "abl", -1, 3, "", this), - new Among ( "I\u00E8r", -1, 4, "", this), - new Among ( "i\u00E8r", -1, 4, "", this), - new Among ( "eus", -1, 2, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_0[] = { + new Among ( "col", -1, -1, "", methodObject ), + new Among ( "par", -1, -1, "", methodObject ), + new Among ( "tap", -1, -1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ic", -1, 2, "", this), - new Among ( "abil", -1, 1, "", this), - new Among ( "iv", -1, 3, "", this) - }; + private final static Among a_1[] = { + new Among ( "", -1, 4, "", methodObject ), + new Among ( "I", 0, 1, "", methodObject ), + new Among ( "U", 0, 2, "", methodObject ), + new Among ( "Y", 0, 3, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "iqUe", -1, 1, "", this), - new Among ( "atrice", -1, 2, "", this), - new Among ( "ance", -1, 1, "", this), - new Among ( "ence", -1, 5, "", this), - new Among ( "logie", -1, 3, "", this), - new Among ( "able", -1, 1, "", this), - new Among ( "isme", -1, 1, "", this), - new Among ( "euse", -1, 11, "", this), - new Among ( "iste", -1, 1, "", this), - new Among ( "ive", -1, 8, "", this), - new Among ( "if", -1, 8, "", this), - new Among ( "usion", -1, 4, "", this), - new Among ( "ation", -1, 2, "", this), - new Among ( "ution", -1, 4, "", this), - new Among ( "ateur", -1, 2, "", this), - new Among ( "iqUes", -1, 1, "", this), - new Among ( "atrices", -1, 2, "", this), - new Among ( "ances", -1, 1, "", this), - new Among ( "ences", -1, 5, "", this), - new Among ( "logies", -1, 3, "", this), - new Among ( "ables", -1, 1, "", this), - new Among ( "ismes", -1, 1, "", this), - new Among ( "euses", -1, 11, "", this), - new Among ( "istes", -1, 1, "", this), - new Among ( "ives", -1, 8, "", this), - new Among ( "ifs", -1, 8, "", this), - new Among ( "usions", -1, 4, "", this), - new Among ( "ations", -1, 2, "", this), - new Among ( "utions", -1, 4, "", this), - new Among ( "ateurs", -1, 2, "", this), - new Among ( "ments", -1, 15, "", this), - new Among ( "ements", 30, 6, "", this), - new Among ( "issements", 31, 12, "", this), - new Among ( "it\u00E9s", -1, 7, "", this), - new Among ( "ment", -1, 15, "", this), - new Among ( "ement", 34, 6, "", this), - new Among ( "issement", 35, 12, "", this), - new Among ( "amment", 34, 13, "", this), - new Among ( "emment", 34, 14, "", this), - new Among ( "aux", -1, 10, "", this), - new Among ( "eaux", 39, 9, "", this), - new Among ( "eux", -1, 1, "", this), - new Among ( "it\u00E9", -1, 7, "", this) - }; + private final static Among a_2[] = { + new Among ( "iqU", -1, 3, "", methodObject ), + new Among ( "abl", -1, 3, "", methodObject ), + new Among ( "I\u00E8r", -1, 4, "", methodObject ), + new Among ( "i\u00E8r", -1, 4, "", methodObject ), + new Among ( "eus", -1, 2, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "ira", -1, 1, "", this), - new Among ( "ie", -1, 1, "", this), - new Among ( "isse", -1, 1, "", this), - new Among ( "issante", -1, 1, "", this), - new Among ( "i", -1, 1, "", this), - new Among ( "irai", 4, 1, "", this), - new Among ( "ir", -1, 1, "", this), - new Among ( "iras", -1, 1, "", this), - new Among ( "ies", -1, 1, "", this), - new Among ( "\u00EEmes", -1, 1, "", this), - new Among ( "isses", -1, 1, "", this), - new Among ( "issantes", -1, 1, "", this), - new Among ( "\u00EEtes", -1, 1, "", this), - new Among ( "is", -1, 1, "", this), - new Among ( "irais", 13, 1, "", this), - new Among ( "issais", 13, 1, "", this), - new Among ( "irions", -1, 1, "", this), - new Among ( "issions", -1, 1, "", this), - new Among ( "irons", -1, 1, "", this), - new Among ( "issons", -1, 1, "", this), - new Among ( "issants", -1, 1, "", this), - new Among ( "it", -1, 1, "", this), - new Among ( "irait", 21, 1, "", this), - new Among ( "issait", 21, 1, "", this), - new Among ( "issant", -1, 1, "", this), - new Among ( "iraIent", -1, 1, "", this), - new Among ( "issaIent", -1, 1, "", this), - new Among ( "irent", -1, 1, "", this), - new Among ( "issent", -1, 1, "", this), - new Among ( "iront", -1, 1, "", this), - new Among ( "\u00EEt", -1, 1, "", this), - new Among ( "iriez", -1, 1, "", this), - new Among ( "issiez", -1, 1, "", this), - new Among ( "irez", -1, 1, "", this), - new Among ( "issez", -1, 1, "", this) - }; + private final static Among a_3[] = { + new Among ( "ic", -1, 2, "", methodObject ), + new Among ( "abil", -1, 1, "", methodObject ), + new Among ( "iv", -1, 3, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "a", -1, 3, "", this), - new Among ( "era", 0, 2, "", this), - new Among ( "asse", -1, 3, "", this), - new Among ( "ante", -1, 3, "", this), - new Among ( "\u00E9e", -1, 2, "", this), - new Among ( "ai", -1, 3, "", this), - new Among ( "erai", 5, 2, "", this), - new Among ( "er", -1, 2, "", this), - new Among ( "as", -1, 3, "", this), - new Among ( "eras", 8, 2, "", this), - new Among ( "\u00E2mes", -1, 3, "", this), - new Among ( "asses", -1, 3, "", this), - new Among ( "antes", -1, 3, "", this), - new Among ( "\u00E2tes", -1, 3, "", this), - new Among ( "\u00E9es", -1, 2, "", this), - new Among ( "ais", -1, 3, "", this), - new Among ( "erais", 15, 2, "", this), - new Among ( "ions", -1, 1, "", this), - new Among ( "erions", 17, 2, "", this), - new Among ( "assions", 17, 3, "", this), - new Among ( "erons", -1, 2, "", this), - new Among ( "ants", -1, 3, "", this), - new Among ( "\u00E9s", -1, 2, "", this), - new Among ( "ait", -1, 3, "", this), - new Among ( "erait", 23, 2, "", this), - new Among ( "ant", -1, 3, "", this), - new Among ( "aIent", -1, 3, "", this), - new Among ( "eraIent", 26, 2, "", this), - new Among ( "\u00E8rent", -1, 2, "", this), - new Among ( "assent", -1, 3, "", this), - new Among ( "eront", -1, 2, "", this), - new Among ( "\u00E2t", -1, 3, "", this), - new Among ( "ez", -1, 2, "", this), - new Among ( "iez", 32, 2, "", this), - new Among ( "eriez", 33, 2, "", this), - new Among ( "assiez", 33, 3, "", this), - new Among ( "erez", 32, 2, "", this), - new Among ( "\u00E9", -1, 2, "", this) - }; + private final static Among a_4[] = { + new Among ( "iqUe", -1, 1, "", methodObject ), + new Among ( "atrice", -1, 2, "", methodObject ), + new Among ( "ance", -1, 1, "", methodObject ), + new Among ( "ence", -1, 5, "", methodObject ), + new Among ( "logie", -1, 3, "", methodObject ), + new Among ( "able", -1, 1, "", methodObject ), + new Among ( "isme", -1, 1, "", methodObject ), + new Among ( "euse", -1, 11, "", methodObject ), + new Among ( "iste", -1, 1, "", methodObject ), + new Among ( "ive", -1, 8, "", methodObject ), + new Among ( "if", -1, 8, "", methodObject ), + new Among ( "usion", -1, 4, "", methodObject ), + new Among ( "ation", -1, 2, "", methodObject ), + new Among ( "ution", -1, 4, "", methodObject ), + new Among ( "ateur", -1, 2, "", methodObject ), + new Among ( "iqUes", -1, 1, "", methodObject ), + new Among ( "atrices", -1, 2, "", methodObject ), + new Among ( "ances", -1, 1, "", methodObject ), + new Among ( "ences", -1, 5, "", methodObject ), + new Among ( "logies", -1, 3, "", methodObject ), + new Among ( "ables", -1, 1, "", methodObject ), + new Among ( "ismes", -1, 1, "", methodObject ), + new Among ( "euses", -1, 11, "", methodObject ), + new Among ( "istes", -1, 1, "", methodObject ), + new Among ( "ives", -1, 8, "", methodObject ), + new Among ( "ifs", -1, 8, "", methodObject ), + new Among ( "usions", -1, 4, "", methodObject ), + new Among ( "ations", -1, 2, "", methodObject ), + new Among ( "utions", -1, 4, "", methodObject ), + new Among ( "ateurs", -1, 2, "", methodObject ), + new Among ( "ments", -1, 15, "", methodObject ), + new Among ( "ements", 30, 6, "", methodObject ), + new Among ( "issements", 31, 12, "", methodObject ), + new Among ( "it\u00E9s", -1, 7, "", methodObject ), + new Among ( "ment", -1, 15, "", methodObject ), + new Among ( "ement", 34, 6, "", methodObject ), + new Among ( "issement", 35, 12, "", methodObject ), + new Among ( "amment", 34, 13, "", methodObject ), + new Among ( "emment", 34, 14, "", methodObject ), + new Among ( "aux", -1, 10, "", methodObject ), + new Among ( "eaux", 39, 9, "", methodObject ), + new Among ( "eux", -1, 1, "", methodObject ), + new Among ( "it\u00E9", -1, 7, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "e", -1, 3, "", this), - new Among ( "I\u00E8re", 0, 2, "", this), - new Among ( "i\u00E8re", 0, 2, "", this), - new Among ( "ion", -1, 1, "", this), - new Among ( "Ier", -1, 2, "", this), - new Among ( "ier", -1, 2, "", this), - new Among ( "\u00EB", -1, 4, "", this) - }; + private final static Among a_5[] = { + new Among ( "ira", -1, 1, "", methodObject ), + new Among ( "ie", -1, 1, "", methodObject ), + new Among ( "isse", -1, 1, "", methodObject ), + new Among ( "issante", -1, 1, "", methodObject ), + new Among ( "i", -1, 1, "", methodObject ), + new Among ( "irai", 4, 1, "", methodObject ), + new Among ( "ir", -1, 1, "", methodObject ), + new Among ( "iras", -1, 1, "", methodObject ), + new Among ( "ies", -1, 1, "", methodObject ), + new Among ( "\u00EEmes", -1, 1, "", methodObject ), + new Among ( "isses", -1, 1, "", methodObject ), + new Among ( "issantes", -1, 1, "", methodObject ), + new Among ( "\u00EEtes", -1, 1, "", methodObject ), + new Among ( "is", -1, 1, "", methodObject ), + new Among ( "irais", 13, 1, "", methodObject ), + new Among ( "issais", 13, 1, "", methodObject ), + new Among ( "irions", -1, 1, "", methodObject ), + new Among ( "issions", -1, 1, "", methodObject ), + new Among ( "irons", -1, 1, "", methodObject ), + new Among ( "issons", -1, 1, "", methodObject ), + new Among ( "issants", -1, 1, "", methodObject ), + new Among ( "it", -1, 1, "", methodObject ), + new Among ( "irait", 21, 1, "", methodObject ), + new Among ( "issait", 21, 1, "", methodObject ), + new Among ( "issant", -1, 1, "", methodObject ), + new Among ( "iraIent", -1, 1, "", methodObject ), + new Among ( "issaIent", -1, 1, "", methodObject ), + new Among ( "irent", -1, 1, "", methodObject ), + new Among ( "issent", -1, 1, "", methodObject ), + new Among ( "iront", -1, 1, "", methodObject ), + new Among ( "\u00EEt", -1, 1, "", methodObject ), + new Among ( "iriez", -1, 1, "", methodObject ), + new Among ( "issiez", -1, 1, "", methodObject ), + new Among ( "irez", -1, 1, "", methodObject ), + new Among ( "issez", -1, 1, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "ell", -1, -1, "", this), - new Among ( "eill", -1, -1, "", this), - new Among ( "enn", -1, -1, "", this), - new Among ( "onn", -1, -1, "", this), - new Among ( "ett", -1, -1, "", this) - }; + private final static Among a_6[] = { + new Among ( "a", -1, 3, "", methodObject ), + new Among ( "era", 0, 2, "", methodObject ), + new Among ( "asse", -1, 3, "", methodObject ), + new Among ( "ante", -1, 3, "", methodObject ), + new Among ( "\u00E9e", -1, 2, "", methodObject ), + new Among ( "ai", -1, 3, "", methodObject ), + new Among ( "erai", 5, 2, "", methodObject ), + new Among ( "er", -1, 2, "", methodObject ), + new Among ( "as", -1, 3, "", methodObject ), + new Among ( "eras", 8, 2, "", methodObject ), + new Among ( "\u00E2mes", -1, 3, "", methodObject ), + new Among ( "asses", -1, 3, "", methodObject ), + new Among ( "antes", -1, 3, "", methodObject ), + new Among ( "\u00E2tes", -1, 3, "", methodObject ), + new Among ( "\u00E9es", -1, 2, "", methodObject ), + new Among ( "ais", -1, 3, "", methodObject ), + new Among ( "erais", 15, 2, "", methodObject ), + new Among ( "ions", -1, 1, "", methodObject ), + new Among ( "erions", 17, 2, "", methodObject ), + new Among ( "assions", 17, 3, "", methodObject ), + new Among ( "erons", -1, 2, "", methodObject ), + new Among ( "ants", -1, 3, "", methodObject ), + new Among ( "\u00E9s", -1, 2, "", methodObject ), + new Among ( "ait", -1, 3, "", methodObject ), + new Among ( "erait", 23, 2, "", methodObject ), + new Among ( "ant", -1, 3, "", methodObject ), + new Among ( "aIent", -1, 3, "", methodObject ), + new Among ( "eraIent", 26, 2, "", methodObject ), + new Among ( "\u00E8rent", -1, 2, "", methodObject ), + new Among ( "assent", -1, 3, "", methodObject ), + new Among ( "eront", -1, 2, "", methodObject ), + new Among ( "\u00E2t", -1, 3, "", methodObject ), + new Among ( "ez", -1, 2, "", methodObject ), + new Among ( "iez", 32, 2, "", methodObject ), + new Among ( "eriez", 33, 2, "", methodObject ), + new Among ( "assiez", 33, 3, "", methodObject ), + new Among ( "erez", 32, 2, "", methodObject ), + new Among ( "\u00E9", -1, 2, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; + private final static Among a_7[] = { + new Among ( "e", -1, 3, "", methodObject ), + new Among ( "I\u00E8re", 0, 2, "", methodObject ), + new Among ( "i\u00E8re", 0, 2, "", methodObject ), + new Among ( "ion", -1, 1, "", methodObject ), + new Among ( "Ier", -1, 2, "", methodObject ), + new Among ( "ier", -1, 2, "", methodObject ), + new Among ( "\u00EB", -1, 4, "", methodObject ) + }; - private static final char g_keep_with_s[] = {1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private final static Among a_8[] = { + new Among ( "ell", -1, -1, "", methodObject ), + new Among ( "eill", -1, -1, "", methodObject ), + new Among ( "enn", -1, -1, "", methodObject ), + new Among ( "onn", -1, -1, "", methodObject ), + new Among ( "ett", -1, -1, "", methodObject ) + }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 130, 103, 8, 5 }; + + private static final char g_keep_with_s[] = {1, 65, 20, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 }; + private int I_p2; private int I_p1; private int I_pV; - private void copy_from(FrenchStemmer other) { - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_pV = other.I_pV; - super.copy_from(other); - } + private void copy_from(FrenchStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int v_1; int v_2; int v_3; int v_4; - // repeat, line 38 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // goto, line 38 - golab2: while(true) + // repeat, line 38 + replab0: while(true) { - v_2 = cursor; - lab3: do { - // (, line 38 - // or, line 44 - lab4: do { - v_3 = cursor; - lab5: do { - // (, line 40 - if (!(in_grouping(g_v, 97, 251))) - { - break lab5; - } - // [, line 40 - bra = cursor; - // or, line 40 - lab6: do { - v_4 = cursor; - lab7: do { + v_1 = cursor; + lab1: do { + // goto, line 38 + golab2: while(true) + { + v_2 = cursor; + lab3: do { + // (, line 38 + // or, line 44 + lab4: do { + v_3 = cursor; + lab5: do { // (, line 40 - // literal, line 40 - if (!(eq_s(1, "u"))) - { - break lab7; - } - // ], line 40 - ket = cursor; if (!(in_grouping(g_v, 97, 251))) { - break lab7; + break lab5; } - // <-, line 40 - slice_from("U"); - break lab6; + // [, line 40 + bra = cursor; + // or, line 40 + lab6: do { + v_4 = cursor; + lab7: do { + // (, line 40 + // literal, line 40 + if (!(eq_s(1, "u"))) + { + break lab7; + } + // ], line 40 + ket = cursor; + if (!(in_grouping(g_v, 97, 251))) + { + break lab7; + } + // <-, line 40 + slice_from("U"); + break lab6; + } while (false); + cursor = v_4; + lab8: do { + // (, line 41 + // literal, line 41 + if (!(eq_s(1, "i"))) + { + break lab8; + } + // ], line 41 + ket = cursor; + if (!(in_grouping(g_v, 97, 251))) + { + break lab8; + } + // <-, line 41 + slice_from("I"); + break lab6; + } while (false); + cursor = v_4; + // (, line 42 + // literal, line 42 + if (!(eq_s(1, "y"))) + { + break lab5; + } + // ], line 42 + ket = cursor; + // <-, line 42 + slice_from("Y"); + } while (false); + break lab4; } while (false); - cursor = v_4; - lab8: do { - // (, line 41 - // literal, line 41 - if (!(eq_s(1, "i"))) + cursor = v_3; + lab9: do { + // (, line 45 + // [, line 45 + bra = cursor; + // literal, line 45 + if (!(eq_s(1, "y"))) { - break lab8; + break lab9; } - // ], line 41 + // ], line 45 ket = cursor; if (!(in_grouping(g_v, 97, 251))) { - break lab8; + break lab9; } - // <-, line 41 - slice_from("I"); - break lab6; + // <-, line 45 + slice_from("Y"); + break lab4; } while (false); - cursor = v_4; - // (, line 42 - // literal, line 42 - if (!(eq_s(1, "y"))) + cursor = v_3; + // (, line 47 + // literal, line 47 + if (!(eq_s(1, "q"))) { - break lab5; + break lab3; } - // ], line 42 + // [, line 47 + bra = cursor; + // literal, line 47 + if (!(eq_s(1, "u"))) + { + break lab3; + } + // ], line 47 ket = cursor; - // <-, line 42 - slice_from("Y"); + // <-, line 47 + slice_from("U"); } while (false); - break lab4; + cursor = v_2; + break golab2; } while (false); - cursor = v_3; - lab9: do { - // (, line 45 - // [, line 45 - bra = cursor; - // literal, line 45 - if (!(eq_s(1, "y"))) - { - break lab9; - } - // ], line 45 - ket = cursor; - if (!(in_grouping(g_v, 97, 251))) - { - break lab9; - } - // <-, line 45 - slice_from("Y"); - break lab4; - } while (false); - cursor = v_3; - // (, line 47 - // literal, line 47 - if (!(eq_s(1, "q"))) + cursor = v_2; + if (cursor >= limit) { - break lab3; + break lab1; } - // [, line 47 - bra = cursor; - // literal, line 47 - if (!(eq_s(1, "u"))) + cursor++; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; + } + return true; + } + + private boolean r_mark_regions() { + int v_1; + int v_2; + int v_4; + // (, line 50 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 56 + v_1 = cursor; + lab0: do { + // (, line 56 + // or, line 58 + lab1: do { + v_2 = cursor; + lab2: do { + // (, line 57 + if (!(in_grouping(g_v, 97, 251))) { + break lab2; + } + if (!(in_grouping(g_v, 97, 251))) + { + break lab2; + } + // next, line 57 + if (cursor >= limit) + { + break lab2; + } + cursor++; + break lab1; + } while (false); + cursor = v_2; + lab3: do { + // among, line 59 + if (find_among(a_0, 3) == 0) + { break lab3; } - // ], line 47 - ket = cursor; - // <-, line 47 - slice_from("U"); + break lab1; } while (false); cursor = v_2; - break golab2; + // (, line 66 + // next, line 66 + if (cursor >= limit) + { + break lab0; + } + cursor++; + // gopast, line 66 + golab4: while(true) + { + lab5: do { + if (!(in_grouping(g_v, 97, 251))) + { + break lab5; + } + break golab4; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; + } } while (false); - cursor = v_2; - if (cursor >= limit) + // setmark pV, line 67 + I_pV = cursor; + } while (false); + cursor = v_1; + // do, line 69 + v_4 = cursor; + lab6: do { + // (, line 69 + // gopast, line 70 + golab7: while(true) { - break lab1; + lab8: do { + if (!(in_grouping(g_v, 97, 251))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; } - cursor++; - } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } - - private boolean r_mark_regions() { - int v_1; - int v_2; - int v_4; - // (, line 50 - I_pV = limit; - I_p1 = limit; - I_p2 = limit; - // do, line 56 - v_1 = cursor; - lab0: do { - // (, line 56 - // or, line 58 - lab1: do { - v_2 = cursor; - lab2: do { - // (, line 57 - if (!(in_grouping(g_v, 97, 251))) + // gopast, line 70 + golab9: while(true) { - break lab2; + lab10: do { + if (!(out_grouping(g_v, 97, 251))) + { + break lab10; + } + break golab9; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; } - if (!(in_grouping(g_v, 97, 251))) + // setmark p1, line 70 + I_p1 = cursor; + // gopast, line 71 + golab11: while(true) { - break lab2; + lab12: do { + if (!(in_grouping(g_v, 97, 251))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; } - // next, line 57 - if (cursor >= limit) + // gopast, line 71 + golab13: while(true) { - break lab2; + lab14: do { + if (!(out_grouping(g_v, 97, 251))) + { + break lab14; + } + break golab13; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; } - cursor++; - break lab1; + // setmark p2, line 71 + I_p2 = cursor; } while (false); - cursor = v_2; - lab3: do { - // among, line 59 - if (find_among(a_0, 3) == 0) - { - break lab3; - } - break lab1; - } while (false); - cursor = v_2; - // (, line 66 - // next, line 66 - if (cursor >= limit) + cursor = v_4; + return true; + } + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 75 + replab0: while(true) { - break lab0; - } - cursor++; - // gopast, line 66 - golab4: while(true) - { - lab5: do { - if (!(in_grouping(g_v, 97, 251))) + v_1 = cursor; + lab1: do { + // (, line 75 + // [, line 77 + bra = cursor; + // substring, line 77 + among_var = find_among(a_1, 4); + if (among_var == 0) { - break lab5; + break lab1; } - break golab4; + // ], line 77 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 78 + // <-, line 78 + slice_from("i"); + break; + case 2: + // (, line 79 + // <-, line 79 + slice_from("u"); + break; + case 3: + // (, line 80 + // <-, line 80 + slice_from("y"); + break; + case 4: + // (, line 81 + // next, line 81 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; } while (false); - if (cursor >= limit) - { - break lab0; - } - cursor++; + cursor = v_1; + break replab0; } - } while (false); - // setmark pV, line 67 - I_pV = cursor; - } while (false); - cursor = v_1; - // do, line 69 - v_4 = cursor; - lab6: do { - // (, line 69 - // gopast, line 70 - golab7: while(true) - { - lab8: do { - if (!(in_grouping(g_v, 97, 251))) - { - break lab8; - } - break golab7; - } while (false); - if (cursor >= limit) - { - break lab6; - } - cursor++; + return true; } - // gopast, line 70 - golab9: while(true) - { - lab10: do { - if (!(out_grouping(g_v, 97, 251))) - { - break lab10; - } - break golab9; - } while (false); - if (cursor >= limit) + + private boolean r_RV() { + if (!(I_pV <= cursor)) { - break lab6; + return false; } - cursor++; + return true; } - // setmark p1, line 70 - I_p1 = cursor; - // gopast, line 71 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 251))) - { - break lab12; - } - break golab11; - } while (false); - if (cursor >= limit) + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab6; + return false; } - cursor++; + return true; } - // gopast, line 71 - golab13: while(true) - { - lab14: do { - if (!(out_grouping(g_v, 97, 251))) - { - break lab14; - } - break golab13; - } while (false); - if (cursor >= limit) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab6; + return false; } - cursor++; + return true; } - // setmark p2, line 71 - I_p2 = cursor; - } while (false); - cursor = v_4; - return true; - } - private boolean r_postlude() { + private boolean r_standard_suffix() { int among_var; int v_1; - // repeat, line 75 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 75 - // [, line 77 - bra = cursor; - // substring, line 77 - among_var = find_among(a_1, 4); - if (among_var == 0) - { - break lab1; - } - // ], line 77 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 78 - // <-, line 78 - slice_from("i"); - break; - case 2: - // (, line 79 - // <-, line 79 - slice_from("u"); - break; - case 3: - // (, line 80 - // <-, line 80 - slice_from("y"); - break; - case 4: - // (, line 81 - // next, line 81 - if (cursor >= limit) - { - break lab1; - } - cursor++; - break; - } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } - - private boolean r_RV() { - if (!(I_pV <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_standard_suffix() { - int among_var; - int v_1; int v_2; int v_3; int v_4; @@ -571,777 +578,778 @@ int v_9; int v_10; int v_11; - // (, line 91 - // [, line 92 - ket = cursor; - // substring, line 92 - among_var = find_among_b(a_4, 43); - if (among_var == 0) - { - return false; - } - // ], line 92 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 96 - // call R2, line 96 - if (!r_R2()) + // (, line 91 + // [, line 92 + ket = cursor; + // substring, line 92 + among_var = find_among_b(a_4, 43); + if (among_var == 0) { return false; } - // delete, line 96 - slice_del(); - break; - case 2: - // (, line 99 - // call R2, line 99 - if (!r_R2()) - { - return false; - } - // delete, line 99 - slice_del(); - // try, line 100 - v_1 = limit - cursor; - lab0: do { - // (, line 100 - // [, line 100 - ket = cursor; - // literal, line 100 - if (!(eq_s_b(2, "ic"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 100 - bra = cursor; - // or, line 100 - lab1: do { - v_2 = limit - cursor; - lab2: do { + // ], line 92 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 96 + // call R2, line 96 + if (!r_R2()) + { + return false; + } + // delete, line 96 + slice_del(); + break; + case 2: + // (, line 99 + // call R2, line 99 + if (!r_R2()) + { + return false; + } + // delete, line 99 + slice_del(); + // try, line 100 + v_1 = limit - cursor; + lab0: do { // (, line 100 - // call R2, line 100 - if (!r_R2()) - { - break lab2; - } - // delete, line 100 - slice_del(); - break lab1; - } while (false); - cursor = limit - v_2; - // <-, line 100 - slice_from("iqU"); - } while (false); - } while (false); - break; - case 3: - // (, line 104 - // call R2, line 104 - if (!r_R2()) - { - return false; - } - // <-, line 104 - slice_from("log"); - break; - case 4: - // (, line 107 - // call R2, line 107 - if (!r_R2()) - { - return false; - } - // <-, line 107 - slice_from("u"); - break; - case 5: - // (, line 110 - // call R2, line 110 - if (!r_R2()) - { - return false; - } - // <-, line 110 - slice_from("ent"); - break; - case 6: - // (, line 113 - // call RV, line 114 - if (!r_RV()) - { - return false; - } - // delete, line 114 - slice_del(); - // try, line 115 - v_3 = limit - cursor; - lab3: do { - // (, line 115 - // [, line 116 - ket = cursor; - // substring, line 116 - among_var = find_among_b(a_2, 6); - if (among_var == 0) - { - cursor = limit - v_3; - break lab3; - } - // ], line 116 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_3; - break lab3; - case 1: - // (, line 117 - // call R2, line 117 - if (!r_R2()) - { - cursor = limit - v_3; - break lab3; - } - // delete, line 117 - slice_del(); - // [, line 117 + // [, line 100 ket = cursor; - // literal, line 117 - if (!(eq_s_b(2, "at"))) + // literal, line 100 + if (!(eq_s_b(2, "ic"))) { - cursor = limit - v_3; - break lab3; + cursor = limit - v_1; + break lab0; } - // ], line 117 + // ], line 100 bra = cursor; - // call R2, line 117 - if (!r_R2()) - { - cursor = limit - v_3; - break lab3; - } - // delete, line 117 - slice_del(); - break; - case 2: - // (, line 118 - // or, line 118 - lab4: do { - v_4 = limit - cursor; - lab5: do { - // (, line 118 - // call R2, line 118 + // or, line 100 + lab1: do { + v_2 = limit - cursor; + lab2: do { + // (, line 100 + // call R2, line 100 if (!r_R2()) { - break lab5; + break lab2; } - // delete, line 118 + // delete, line 100 slice_del(); - break lab4; + break lab1; } while (false); - cursor = limit - v_4; - // (, line 118 - // call R1, line 118 - if (!r_R1()) - { - cursor = limit - v_3; - break lab3; - } - // <-, line 118 - slice_from("eux"); + cursor = limit - v_2; + // <-, line 100 + slice_from("iqU"); } while (false); - break; - case 3: - // (, line 120 - // call R2, line 120 - if (!r_R2()) + } while (false); + break; + case 3: + // (, line 104 + // call R2, line 104 + if (!r_R2()) + { + return false; + } + // <-, line 104 + slice_from("log"); + break; + case 4: + // (, line 107 + // call R2, line 107 + if (!r_R2()) + { + return false; + } + // <-, line 107 + slice_from("u"); + break; + case 5: + // (, line 110 + // call R2, line 110 + if (!r_R2()) + { + return false; + } + // <-, line 110 + slice_from("ent"); + break; + case 6: + // (, line 113 + // call RV, line 114 + if (!r_RV()) + { + return false; + } + // delete, line 114 + slice_del(); + // try, line 115 + v_3 = limit - cursor; + lab3: do { + // (, line 115 + // [, line 116 + ket = cursor; + // substring, line 116 + among_var = find_among_b(a_2, 6); + if (among_var == 0) { cursor = limit - v_3; break lab3; } - // delete, line 120 - slice_del(); - break; - case 4: - // (, line 122 - // call RV, line 122 - if (!r_RV()) - { - cursor = limit - v_3; - break lab3; - } - // <-, line 122 - slice_from("i"); - break; - } - } while (false); - break; - case 7: - // (, line 128 - // call R2, line 129 - if (!r_R2()) - { - return false; - } - // delete, line 129 - slice_del(); - // try, line 130 - v_5 = limit - cursor; - lab6: do { - // (, line 130 - // [, line 131 - ket = cursor; - // substring, line 131 - among_var = find_among_b(a_3, 3); - if (among_var == 0) - { - cursor = limit - v_5; - break lab6; - } - // ], line 131 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_5; - break lab6; - case 1: - // (, line 132 - // or, line 132 - lab7: do { - v_6 = limit - cursor; - lab8: do { - // (, line 132 - // call R2, line 132 + // ], line 116 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_3; + break lab3; + case 1: + // (, line 117 + // call R2, line 117 if (!r_R2()) { - break lab8; + cursor = limit - v_3; + break lab3; } - // delete, line 132 + // delete, line 117 slice_del(); - break lab7; - } while (false); - cursor = limit - v_6; - // <-, line 132 - slice_from("abl"); - } while (false); - break; - case 2: - // (, line 133 - // or, line 133 - lab9: do { - v_7 = limit - cursor; - lab10: do { - // (, line 133 - // call R2, line 133 + // [, line 117 + ket = cursor; + // literal, line 117 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_3; + break lab3; + } + // ], line 117 + bra = cursor; + // call R2, line 117 if (!r_R2()) { - break lab10; + cursor = limit - v_3; + break lab3; } - // delete, line 133 + // delete, line 117 slice_del(); - break lab9; - } while (false); - cursor = limit - v_7; - // <-, line 133 - slice_from("iqU"); - } while (false); - break; - case 3: - // (, line 134 - // call R2, line 134 - if (!r_R2()) + break; + case 2: + // (, line 118 + // or, line 118 + lab4: do { + v_4 = limit - cursor; + lab5: do { + // (, line 118 + // call R2, line 118 + if (!r_R2()) + { + break lab5; + } + // delete, line 118 + slice_del(); + break lab4; + } while (false); + cursor = limit - v_4; + // (, line 118 + // call R1, line 118 + if (!r_R1()) + { + cursor = limit - v_3; + break lab3; + } + // <-, line 118 + slice_from("eux"); + } while (false); + break; + case 3: + // (, line 120 + // call R2, line 120 + if (!r_R2()) + { + cursor = limit - v_3; + break lab3; + } + // delete, line 120 + slice_del(); + break; + case 4: + // (, line 122 + // call RV, line 122 + if (!r_RV()) + { + cursor = limit - v_3; + break lab3; + } + // <-, line 122 + slice_from("i"); + break; + } + } while (false); + break; + case 7: + // (, line 128 + // call R2, line 129 + if (!r_R2()) + { + return false; + } + // delete, line 129 + slice_del(); + // try, line 130 + v_5 = limit - cursor; + lab6: do { + // (, line 130 + // [, line 131 + ket = cursor; + // substring, line 131 + among_var = find_among_b(a_3, 3); + if (among_var == 0) { cursor = limit - v_5; break lab6; } - // delete, line 134 - slice_del(); - break; - } - } while (false); - break; - case 8: - // (, line 140 - // call R2, line 141 - if (!r_R2()) - { - return false; - } - // delete, line 141 - slice_del(); - // try, line 142 - v_8 = limit - cursor; - lab11: do { - // (, line 142 - // [, line 142 - ket = cursor; - // literal, line 142 - if (!(eq_s_b(2, "at"))) - { - cursor = limit - v_8; - break lab11; - } - // ], line 142 - bra = cursor; - // call R2, line 142 - if (!r_R2()) - { - cursor = limit - v_8; - break lab11; - } - // delete, line 142 - slice_del(); - // [, line 142 - ket = cursor; - // literal, line 142 - if (!(eq_s_b(2, "ic"))) - { - cursor = limit - v_8; - break lab11; - } - // ], line 142 - bra = cursor; - // or, line 142 - lab12: do { - v_9 = limit - cursor; - lab13: do { + // ], line 131 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_5; + break lab6; + case 1: + // (, line 132 + // or, line 132 + lab7: do { + v_6 = limit - cursor; + lab8: do { + // (, line 132 + // call R2, line 132 + if (!r_R2()) + { + break lab8; + } + // delete, line 132 + slice_del(); + break lab7; + } while (false); + cursor = limit - v_6; + // <-, line 132 + slice_from("abl"); + } while (false); + break; + case 2: + // (, line 133 + // or, line 133 + lab9: do { + v_7 = limit - cursor; + lab10: do { + // (, line 133 + // call R2, line 133 + if (!r_R2()) + { + break lab10; + } + // delete, line 133 + slice_del(); + break lab9; + } while (false); + cursor = limit - v_7; + // <-, line 133 + slice_from("iqU"); + } while (false); + break; + case 3: + // (, line 134 + // call R2, line 134 + if (!r_R2()) + { + cursor = limit - v_5; + break lab6; + } + // delete, line 134 + slice_del(); + break; + } + } while (false); + break; + case 8: + // (, line 140 + // call R2, line 141 + if (!r_R2()) + { + return false; + } + // delete, line 141 + slice_del(); + // try, line 142 + v_8 = limit - cursor; + lab11: do { // (, line 142 + // [, line 142 + ket = cursor; + // literal, line 142 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_8; + break lab11; + } + // ], line 142 + bra = cursor; // call R2, line 142 if (!r_R2()) { - break lab13; + cursor = limit - v_8; + break lab11; } // delete, line 142 slice_del(); - break lab12; + // [, line 142 + ket = cursor; + // literal, line 142 + if (!(eq_s_b(2, "ic"))) + { + cursor = limit - v_8; + break lab11; + } + // ], line 142 + bra = cursor; + // or, line 142 + lab12: do { + v_9 = limit - cursor; + lab13: do { + // (, line 142 + // call R2, line 142 + if (!r_R2()) + { + break lab13; + } + // delete, line 142 + slice_del(); + break lab12; + } while (false); + cursor = limit - v_9; + // <-, line 142 + slice_from("iqU"); + } while (false); } while (false); - cursor = limit - v_9; - // <-, line 142 - slice_from("iqU"); - } while (false); - } while (false); - break; - case 9: - // (, line 144 - // <-, line 144 - slice_from("eau"); - break; - case 10: - // (, line 145 - // call R1, line 145 - if (!r_R1()) - { - return false; - } - // <-, line 145 - slice_from("al"); - break; - case 11: - // (, line 147 - // or, line 147 - lab14: do { - v_10 = limit - cursor; - lab15: do { + break; + case 9: + // (, line 144 + // <-, line 144 + slice_from("eau"); + break; + case 10: + // (, line 145 + // call R1, line 145 + if (!r_R1()) + { + return false; + } + // <-, line 145 + slice_from("al"); + break; + case 11: // (, line 147 - // call R2, line 147 - if (!r_R2()) + // or, line 147 + lab14: do { + v_10 = limit - cursor; + lab15: do { + // (, line 147 + // call R2, line 147 + if (!r_R2()) + { + break lab15; + } + // delete, line 147 + slice_del(); + break lab14; + } while (false); + cursor = limit - v_10; + // (, line 147 + // call R1, line 147 + if (!r_R1()) + { + return false; + } + // <-, line 147 + slice_from("eux"); + } while (false); + break; + case 12: + // (, line 150 + // call R1, line 150 + if (!r_R1()) { - break lab15; + return false; } - // delete, line 147 + if (!(out_grouping_b(g_v, 97, 251))) + { + return false; + } + // delete, line 150 slice_del(); - break lab14; - } while (false); - cursor = limit - v_10; - // (, line 147 - // call R1, line 147 - if (!r_R1()) - { + break; + case 13: + // (, line 155 + // call RV, line 155 + if (!r_RV()) + { + return false; + } + // fail, line 155 + // (, line 155 + // <-, line 155 + slice_from("ant"); return false; - } - // <-, line 147 - slice_from("eux"); - } while (false); - break; - case 12: - // (, line 150 - // call R1, line 150 - if (!r_R1()) - { - return false; + case 14: + // (, line 156 + // call RV, line 156 + if (!r_RV()) + { + return false; + } + // fail, line 156 + // (, line 156 + // <-, line 156 + slice_from("ent"); + return false; + case 15: + // (, line 158 + // test, line 158 + v_11 = limit - cursor; + // (, line 158 + if (!(in_grouping_b(g_v, 97, 251))) + { + return false; + } + // call RV, line 158 + if (!r_RV()) + { + return false; + } + cursor = limit - v_11; + // fail, line 158 + // (, line 158 + // delete, line 158 + slice_del(); + return false; } - if (!(out_grouping_b(g_v, 97, 251))) + return true; + } + + private boolean r_i_verb_suffix() { + int among_var; + int v_1; + int v_2; + // setlimit, line 163 + v_1 = limit - cursor; + // tomark, line 163 + if (cursor < I_pV) { return false; } - // delete, line 150 - slice_del(); - break; - case 13: - // (, line 155 - // call RV, line 155 - if (!r_RV()) + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 163 + // [, line 164 + ket = cursor; + // substring, line 164 + among_var = find_among_b(a_5, 35); + if (among_var == 0) { + limit_backward = v_2; return false; } - // fail, line 155 - // (, line 155 - // <-, line 155 - slice_from("ant"); - return false; - case 14: - // (, line 156 - // call RV, line 156 - if (!r_RV()) - { - return false; + // ], line 164 + bra = cursor; + switch(among_var) { + case 0: + limit_backward = v_2; + return false; + case 1: + // (, line 170 + if (!(out_grouping_b(g_v, 97, 251))) + { + limit_backward = v_2; + return false; + } + // delete, line 170 + slice_del(); + break; } - // fail, line 156 - // (, line 156 - // <-, line 156 - slice_from("ent"); - return false; - case 15: - // (, line 158 - // test, line 158 - v_11 = limit - cursor; - // (, line 158 - if (!(in_grouping_b(g_v, 97, 251))) - { - return false; - } - // call RV, line 158 - if (!r_RV()) - { - return false; - } - cursor = limit - v_11; - // fail, line 158 - // (, line 158 - // delete, line 158 - slice_del(); - return false; - } - return true; - } + limit_backward = v_2; + return true; + } - private boolean r_i_verb_suffix() { + private boolean r_verb_suffix() { int among_var; int v_1; int v_2; - // setlimit, line 163 - v_1 = limit - cursor; - // tomark, line 163 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 163 - // [, line 164 - ket = cursor; - // substring, line 164 - among_var = find_among_b(a_5, 35); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 164 - bra = cursor; - switch(among_var) { - case 0: - limit_backward = v_2; - return false; - case 1: - // (, line 170 - if (!(out_grouping_b(g_v, 97, 251))) + int v_3; + // setlimit, line 174 + v_1 = limit - cursor; + // tomark, line 174 + if (cursor < I_pV) { + return false; + } + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 174 + // [, line 175 + ket = cursor; + // substring, line 175 + among_var = find_among_b(a_6, 38); + if (among_var == 0) + { limit_backward = v_2; return false; } - // delete, line 170 - slice_del(); - break; - } - limit_backward = v_2; - return true; - } + // ], line 175 + bra = cursor; + switch(among_var) { + case 0: + limit_backward = v_2; + return false; + case 1: + // (, line 177 + // call R2, line 177 + if (!r_R2()) + { + limit_backward = v_2; + return false; + } + // delete, line 177 + slice_del(); + break; + case 2: + // (, line 185 + // delete, line 185 + slice_del(); + break; + case 3: + // (, line 190 + // delete, line 190 + slice_del(); + // try, line 191 + v_3 = limit - cursor; + lab0: do { + // (, line 191 + // [, line 191 + ket = cursor; + // literal, line 191 + if (!(eq_s_b(1, "e"))) + { + cursor = limit - v_3; + break lab0; + } + // ], line 191 + bra = cursor; + // delete, line 191 + slice_del(); + } while (false); + break; + } + limit_backward = v_2; + return true; + } - private boolean r_verb_suffix() { + private boolean r_residual_suffix() { int among_var; int v_1; int v_2; int v_3; - // setlimit, line 174 - v_1 = limit - cursor; - // tomark, line 174 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 174 - // [, line 175 - ket = cursor; - // substring, line 175 - among_var = find_among_b(a_6, 38); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 175 - bra = cursor; - switch(among_var) { - case 0: - limit_backward = v_2; - return false; - case 1: - // (, line 177 - // call R2, line 177 - if (!r_R2()) - { - limit_backward = v_2; - return false; - } - // delete, line 177 - slice_del(); - break; - case 2: - // (, line 185 - // delete, line 185 - slice_del(); - break; - case 3: - // (, line 190 - // delete, line 190 - slice_del(); - // try, line 191 - v_3 = limit - cursor; + int v_4; + int v_5; + // (, line 198 + // try, line 199 + v_1 = limit - cursor; lab0: do { - // (, line 191 - // [, line 191 + // (, line 199 + // [, line 199 ket = cursor; - // literal, line 191 - if (!(eq_s_b(1, "e"))) + // literal, line 199 + if (!(eq_s_b(1, "s"))) { - cursor = limit - v_3; + cursor = limit - v_1; break lab0; } - // ], line 191 + // ], line 199 bra = cursor; - // delete, line 191 + // test, line 199 + v_2 = limit - cursor; + if (!(out_grouping_b(g_keep_with_s, 97, 232))) + { + cursor = limit - v_1; + break lab0; + } + cursor = limit - v_2; + // delete, line 199 slice_del(); } while (false); - break; - } - limit_backward = v_2; - return true; - } - - private boolean r_residual_suffix() { - int among_var; - int v_1; - int v_2; - int v_3; - int v_4; - int v_5; - // (, line 198 - // try, line 199 - v_1 = limit - cursor; - lab0: do { - // (, line 199 - // [, line 199 - ket = cursor; - // literal, line 199 - if (!(eq_s_b(1, "s"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 199 - bra = cursor; - // test, line 199 - v_2 = limit - cursor; - if (!(out_grouping_b(g_keep_with_s, 97, 232))) - { - cursor = limit - v_1; - break lab0; - } - cursor = limit - v_2; - // delete, line 199 - slice_del(); - } while (false); - // setlimit, line 200 - v_3 = limit - cursor; - // tomark, line 200 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_4 = limit_backward; - limit_backward = cursor; - cursor = limit - v_3; - // (, line 200 - // [, line 201 - ket = cursor; - // substring, line 201 - among_var = find_among_b(a_7, 7); - if (among_var == 0) - { - limit_backward = v_4; - return false; - } - // ], line 201 - bra = cursor; - switch(among_var) { - case 0: - limit_backward = v_4; - return false; - case 1: - // (, line 202 - // call R2, line 202 - if (!r_R2()) + // setlimit, line 200 + v_3 = limit - cursor; + // tomark, line 200 + if (cursor < I_pV) { + return false; + } + cursor = I_pV; + v_4 = limit_backward; + limit_backward = cursor; + cursor = limit - v_3; + // (, line 200 + // [, line 201 + ket = cursor; + // substring, line 201 + among_var = find_among_b(a_7, 7); + if (among_var == 0) + { limit_backward = v_4; return false; } - // or, line 202 - lab1: do { - v_5 = limit - cursor; - lab2: do { - // literal, line 202 - if (!(eq_s_b(1, "s"))) - { - break lab2; - } - break lab1; - } while (false); - cursor = limit - v_5; - // literal, line 202 - if (!(eq_s_b(1, "t"))) - { + // ], line 201 + bra = cursor; + switch(among_var) { + case 0: limit_backward = v_4; return false; - } - } while (false); - // delete, line 202 - slice_del(); - break; - case 2: - // (, line 204 - // <-, line 204 - slice_from("i"); - break; - case 3: - // (, line 205 - // delete, line 205 - slice_del(); - break; - case 4: - // (, line 206 - // literal, line 206 - if (!(eq_s_b(2, "gu"))) + case 1: + // (, line 202 + // call R2, line 202 + if (!r_R2()) + { + limit_backward = v_4; + return false; + } + // or, line 202 + lab1: do { + v_5 = limit - cursor; + lab2: do { + // literal, line 202 + if (!(eq_s_b(1, "s"))) + { + break lab2; + } + break lab1; + } while (false); + cursor = limit - v_5; + // literal, line 202 + if (!(eq_s_b(1, "t"))) + { + limit_backward = v_4; + return false; + } + } while (false); + // delete, line 202 + slice_del(); + break; + case 2: + // (, line 204 + // <-, line 204 + slice_from("i"); + break; + case 3: + // (, line 205 + // delete, line 205 + slice_del(); + break; + case 4: + // (, line 206 + // literal, line 206 + if (!(eq_s_b(2, "gu"))) + { + limit_backward = v_4; + return false; + } + // delete, line 206 + slice_del(); + break; + } + limit_backward = v_4; + return true; + } + + private boolean r_un_double() { + int v_1; + // (, line 211 + // test, line 212 + v_1 = limit - cursor; + // among, line 212 + if (find_among_b(a_8, 5) == 0) { - limit_backward = v_4; return false; } - // delete, line 206 + cursor = limit - v_1; + // [, line 212 + ket = cursor; + // next, line 212 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // ], line 212 + bra = cursor; + // delete, line 212 slice_del(); - break; - } - limit_backward = v_4; - return true; - } + return true; + } - private boolean r_un_double() { - int v_1; - // (, line 211 - // test, line 212 - v_1 = limit - cursor; - // among, line 212 - if (find_among_b(a_8, 5) == 0) - { - return false; - } - cursor = limit - v_1; - // [, line 212 - ket = cursor; - // next, line 212 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 212 - bra = cursor; - // delete, line 212 - slice_del(); - return true; - } - - private boolean r_un_accent() { + private boolean r_un_accent() { int v_3; - // (, line 215 - // atleast, line 216 - { - int v_1 = 1; - // atleast, line 216 - replab0: while(true) - { - lab1: do { - if (!(out_grouping_b(g_v, 97, 251))) + // (, line 215 + // atleast, line 216 + { + int v_1 = 1; + // atleast, line 216 + replab0: while(true) { - break lab1; + lab1: do { + if (!(out_grouping_b(g_v, 97, 251))) + { + break lab1; + } + v_1--; + continue replab0; + } while (false); + break replab0; } - v_1--; - continue replab0; + if (v_1 > 0) + { + return false; + } + } + // [, line 217 + ket = cursor; + // or, line 217 + lab2: do { + v_3 = limit - cursor; + lab3: do { + // literal, line 217 + if (!(eq_s_b(1, "\u00E9"))) + { + break lab3; + } + break lab2; + } while (false); + cursor = limit - v_3; + // literal, line 217 + if (!(eq_s_b(1, "\u00E8"))) + { + return false; + } } while (false); - break replab0; + // ], line 217 + bra = cursor; + // <-, line 217 + slice_from("e"); + return true; } - if (v_1 > 0) - { - return false; - } - } - // [, line 217 - ket = cursor; - // or, line 217 - lab2: do { - v_3 = limit - cursor; - lab3: do { - // literal, line 217 - if (!(eq_s_b(1, "\u00E9"))) - { - break lab3; - } - break lab2; - } while (false); - cursor = limit - v_3; - // literal, line 217 - if (!(eq_s_b(1, "\u00E8"))) - { - return false; - } - } while (false); - // ], line 217 - bra = cursor; - // <-, line 217 - slice_from("e"); - return true; - } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -1353,149 +1361,161 @@ int v_9; int v_10; int v_11; - // (, line 221 - // do, line 223 - v_1 = cursor; - lab0: do { - // call prelude, line 223 - if (!r_prelude()) - { - break lab0; - } - } while (false); - cursor = v_1; - // do, line 224 - v_2 = cursor; - lab1: do { - // call mark_regions, line 224 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 225 - limit_backward = cursor; cursor = limit; - // (, line 225 - // do, line 227 - v_3 = limit - cursor; - lab2: do { - // (, line 227 - // or, line 237 - lab3: do { - v_4 = limit - cursor; - lab4: do { - // (, line 228 - // and, line 233 - v_5 = limit - cursor; - // (, line 229 - // or, line 229 - lab5: do { - v_6 = limit - cursor; - lab6: do { - // call standard_suffix, line 229 - if (!r_standard_suffix()) - { - break lab6; - } - break lab5; - } while (false); - cursor = limit - v_6; - lab7: do { - // call i_verb_suffix, line 230 - if (!r_i_verb_suffix()) - { - break lab7; - } - break lab5; - } while (false); - cursor = limit - v_6; - // call verb_suffix, line 231 - if (!r_verb_suffix()) - { - break lab4; - } - } while (false); - cursor = limit - v_5; - // try, line 234 - v_7 = limit - cursor; - lab8: do { - // (, line 234 - // [, line 234 - ket = cursor; - // or, line 234 - lab9: do { - v_8 = limit - cursor; - lab10: do { - // (, line 234 - // literal, line 234 - if (!(eq_s_b(1, "Y"))) + // (, line 221 + // do, line 223 + v_1 = cursor; + lab0: do { + // call prelude, line 223 + if (!r_prelude()) + { + break lab0; + } + } while (false); + cursor = v_1; + // do, line 224 + v_2 = cursor; + lab1: do { + // call mark_regions, line 224 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 225 + limit_backward = cursor; cursor = limit; + // (, line 225 + // do, line 227 + v_3 = limit - cursor; + lab2: do { + // (, line 227 + // or, line 237 + lab3: do { + v_4 = limit - cursor; + lab4: do { + // (, line 228 + // and, line 233 + v_5 = limit - cursor; + // (, line 229 + // or, line 229 + lab5: do { + v_6 = limit - cursor; + lab6: do { + // call standard_suffix, line 229 + if (!r_standard_suffix()) + { + break lab6; + } + break lab5; + } while (false); + cursor = limit - v_6; + lab7: do { + // call i_verb_suffix, line 230 + if (!r_i_verb_suffix()) + { + break lab7; + } + break lab5; + } while (false); + cursor = limit - v_6; + // call verb_suffix, line 231 + if (!r_verb_suffix()) { - break lab10; + break lab4; } - // ], line 234 - bra = cursor; - // <-, line 234 - slice_from("i"); - break lab9; } while (false); - cursor = limit - v_8; - // (, line 235 - // literal, line 235 - if (!(eq_s_b(1, "\u00E7"))) - { - cursor = limit - v_7; - break lab8; - } - // ], line 235 - bra = cursor; - // <-, line 235 - slice_from("c"); + cursor = limit - v_5; + // try, line 234 + v_7 = limit - cursor; + lab8: do { + // (, line 234 + // [, line 234 + ket = cursor; + // or, line 234 + lab9: do { + v_8 = limit - cursor; + lab10: do { + // (, line 234 + // literal, line 234 + if (!(eq_s_b(1, "Y"))) + { + break lab10; + } + // ], line 234 + bra = cursor; + // <-, line 234 + slice_from("i"); + break lab9; + } while (false); + cursor = limit - v_8; + // (, line 235 + // literal, line 235 + if (!(eq_s_b(1, "\u00E7"))) + { + cursor = limit - v_7; + break lab8; + } + // ], line 235 + bra = cursor; + // <-, line 235 + slice_from("c"); + } while (false); + } while (false); + break lab3; } while (false); + cursor = limit - v_4; + // call residual_suffix, line 238 + if (!r_residual_suffix()) + { + break lab2; + } } while (false); - break lab3; } while (false); - cursor = limit - v_4; - // call residual_suffix, line 238 - if (!r_residual_suffix()) - { - break lab2; - } - } while (false); - } while (false); - cursor = limit - v_3; - // do, line 243 - v_9 = limit - cursor; - lab11: do { - // call un_double, line 243 - if (!r_un_double()) - { - break lab11; + cursor = limit - v_3; + // do, line 243 + v_9 = limit - cursor; + lab11: do { + // call un_double, line 243 + if (!r_un_double()) + { + break lab11; + } + } while (false); + cursor = limit - v_9; + // do, line 244 + v_10 = limit - cursor; + lab12: do { + // call un_accent, line 244 + if (!r_un_accent()) + { + break lab12; + } + } while (false); + cursor = limit - v_10; + cursor = limit_backward; // do, line 246 + v_11 = cursor; + lab13: do { + // call postlude, line 246 + if (!r_postlude()) + { + break lab13; + } + } while (false); + cursor = v_11; + return true; } - } while (false); - cursor = limit - v_9; - // do, line 244 - v_10 = limit - cursor; - lab12: do { - // call un_accent, line 244 - if (!r_un_accent()) - { - break lab12; - } - } while (false); - cursor = limit - v_10; - cursor = limit_backward; // do, line 246 - v_11 = cursor; - lab13: do { - // call postlude, line 246 - if (!r_postlude()) - { - break lab13; - } - } while (false); - cursor = v_11; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof FrenchStemmer; } + @Override + public int hashCode() { + return FrenchStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/German2Stemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/German2Stemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/German2Stemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/German2Stemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,412 +1,419 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class German2Stemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 6, "", this), - new Among ( "ae", 0, 2, "", this), - new Among ( "oe", 0, 3, "", this), - new Among ( "qu", 0, 5, "", this), - new Among ( "ue", 0, 4, "", this), - new Among ( "\u00DF", 0, 1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "", -1, 6, "", this), - new Among ( "U", 0, 2, "", this), - new Among ( "Y", 0, 1, "", this), - new Among ( "\u00E4", 0, 3, "", this), - new Among ( "\u00F6", 0, 4, "", this), - new Among ( "\u00FC", 0, 5, "", this) - }; + private final static German2Stemmer methodObject = new German2Stemmer (); - private Among a_2[] = { - new Among ( "e", -1, 1, "", this), - new Among ( "em", -1, 1, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "ern", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "s", -1, 2, "", this), - new Among ( "es", 5, 1, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 6, "", methodObject ), + new Among ( "ae", 0, 2, "", methodObject ), + new Among ( "oe", 0, 3, "", methodObject ), + new Among ( "qu", 0, 5, "", methodObject ), + new Among ( "ue", 0, 4, "", methodObject ), + new Among ( "\u00DF", 0, 1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "en", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "st", -1, 2, "", this), - new Among ( "est", 2, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "", -1, 6, "", methodObject ), + new Among ( "U", 0, 2, "", methodObject ), + new Among ( "Y", 0, 1, "", methodObject ), + new Among ( "\u00E4", 0, 3, "", methodObject ), + new Among ( "\u00F6", 0, 4, "", methodObject ), + new Among ( "\u00FC", 0, 5, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ig", -1, 1, "", this), - new Among ( "lich", -1, 1, "", this) - }; + private final static Among a_2[] = { + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "em", -1, 1, "", methodObject ), + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "ern", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "s", -1, 2, "", methodObject ), + new Among ( "es", 5, 1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "end", -1, 1, "", this), - new Among ( "ig", -1, 2, "", this), - new Among ( "ung", -1, 1, "", this), - new Among ( "lich", -1, 3, "", this), - new Among ( "isch", -1, 2, "", this), - new Among ( "ik", -1, 2, "", this), - new Among ( "heit", -1, 3, "", this), - new Among ( "keit", -1, 4, "", this) - }; + private final static Among a_3[] = { + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "st", -1, 2, "", methodObject ), + new Among ( "est", 2, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + private final static Among a_4[] = { + new Among ( "ig", -1, 1, "", methodObject ), + new Among ( "lich", -1, 1, "", methodObject ) + }; - private static final char g_s_ending[] = {117, 30, 5 }; + private final static Among a_5[] = { + new Among ( "end", -1, 1, "", methodObject ), + new Among ( "ig", -1, 2, "", methodObject ), + new Among ( "ung", -1, 1, "", methodObject ), + new Among ( "lich", -1, 3, "", methodObject ), + new Among ( "isch", -1, 2, "", methodObject ), + new Among ( "ik", -1, 2, "", methodObject ), + new Among ( "heit", -1, 3, "", methodObject ), + new Among ( "keit", -1, 4, "", methodObject ) + }; - private static final char g_st_ending[] = {117, 30, 4 }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + private static final char g_s_ending[] = {117, 30, 5 }; + + private static final char g_st_ending[] = {117, 30, 4 }; + private int I_x; private int I_p2; private int I_p1; - private void copy_from(German2Stemmer other) { - I_x = other.I_x; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(German2Stemmer other) { + I_x = other.I_x; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int among_var; int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 28 - // test, line 30 - v_1 = cursor; - // repeat, line 30 - replab0: while(true) - { - v_2 = cursor; - lab1: do { - // goto, line 30 - golab2: while(true) + // (, line 28 + // test, line 30 + v_1 = cursor; + // repeat, line 30 + replab0: while(true) { - v_3 = cursor; - lab3: do { - // (, line 30 - if (!(in_grouping(g_v, 97, 252))) + v_2 = cursor; + lab1: do { + // goto, line 30 + golab2: while(true) { - break lab3; - } - // [, line 31 - bra = cursor; - // or, line 31 - lab4: do { - v_4 = cursor; - lab5: do { - // (, line 31 - // literal, line 31 - if (!(eq_s(1, "u"))) - { - break lab5; - } - // ], line 31 - ket = cursor; + v_3 = cursor; + lab3: do { + // (, line 30 if (!(in_grouping(g_v, 97, 252))) { - break lab5; + break lab3; } - // <-, line 31 - slice_from("U"); - break lab4; + // [, line 31 + bra = cursor; + // or, line 31 + lab4: do { + v_4 = cursor; + lab5: do { + // (, line 31 + // literal, line 31 + if (!(eq_s(1, "u"))) + { + break lab5; + } + // ], line 31 + ket = cursor; + if (!(in_grouping(g_v, 97, 252))) + { + break lab5; + } + // <-, line 31 + slice_from("U"); + break lab4; + } while (false); + cursor = v_4; + // (, line 32 + // literal, line 32 + if (!(eq_s(1, "y"))) + { + break lab3; + } + // ], line 32 + ket = cursor; + if (!(in_grouping(g_v, 97, 252))) + { + break lab3; + } + // <-, line 32 + slice_from("Y"); + } while (false); + cursor = v_3; + break golab2; } while (false); - cursor = v_4; - // (, line 32 - // literal, line 32 - if (!(eq_s(1, "y"))) + cursor = v_3; + if (cursor >= limit) { - break lab3; + break lab1; } - // ], line 32 - ket = cursor; - if (!(in_grouping(g_v, 97, 252))) - { - break lab3; - } - // <-, line 32 - slice_from("Y"); - } while (false); - cursor = v_3; - break golab2; + cursor++; + } + continue replab0; } while (false); - cursor = v_3; - if (cursor >= limit) - { - break lab1; - } - cursor++; + cursor = v_2; + break replab0; } - continue replab0; - } while (false); - cursor = v_2; - break replab0; - } - cursor = v_1; - // repeat, line 35 - replab6: while(true) - { - v_5 = cursor; - lab7: do { - // (, line 35 - // [, line 36 - bra = cursor; - // substring, line 36 - among_var = find_among(a_0, 6); - if (among_var == 0) + cursor = v_1; + // repeat, line 35 + replab6: while(true) { - break lab7; - } - // ], line 36 - ket = cursor; - switch(among_var) { - case 0: - break lab7; - case 1: - // (, line 37 - // <-, line 37 - slice_from("ss"); - break; - case 2: - // (, line 38 - // <-, line 38 - slice_from("\u00E4"); - break; - case 3: - // (, line 39 - // <-, line 39 - slice_from("\u00F6"); - break; - case 4: - // (, line 40 - // <-, line 40 - slice_from("\u00FC"); - break; - case 5: - // (, line 41 - // hop, line 41 + v_5 = cursor; + lab7: do { + // (, line 35 + // [, line 36 + bra = cursor; + // substring, line 36 + among_var = find_among(a_0, 6); + if (among_var == 0) { - int c = cursor + 2; - if (0 > c || c > limit) - { - break lab7; - } - cursor = c; - } - break; - case 6: - // (, line 42 - // next, line 42 - if (cursor >= limit) - { break lab7; } - cursor++; - break; + // ], line 36 + ket = cursor; + switch(among_var) { + case 0: + break lab7; + case 1: + // (, line 37 + // <-, line 37 + slice_from("ss"); + break; + case 2: + // (, line 38 + // <-, line 38 + slice_from("\u00E4"); + break; + case 3: + // (, line 39 + // <-, line 39 + slice_from("\u00F6"); + break; + case 4: + // (, line 40 + // <-, line 40 + slice_from("\u00FC"); + break; + case 5: + // (, line 41 + // hop, line 41 + { + int c = cursor + 2; + if (0 > c || c > limit) + { + break lab7; + } + cursor = c; + } + break; + case 6: + // (, line 42 + // next, line 42 + if (cursor >= limit) + { + break lab7; + } + cursor++; + break; + } + continue replab6; + } while (false); + cursor = v_5; + break replab6; } - continue replab6; - } while (false); - cursor = v_5; - break replab6; - } - return true; - } + return true; + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; - // (, line 48 - I_p1 = limit; - I_p2 = limit; - // test, line 53 - v_1 = cursor; - // (, line 53 - // hop, line 53 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - // setmark x, line 53 - I_x = cursor; - cursor = v_1; - // gopast, line 55 - golab0: while(true) - { - lab1: do { - if (!(in_grouping(g_v, 97, 252))) + // (, line 48 + I_p1 = limit; + I_p2 = limit; + // test, line 53 + v_1 = cursor; + // (, line 53 + // hop, line 53 { - break lab1; + int c = cursor + 3; + if (0 > c || c > limit) + { + return false; + } + cursor = c; } - break golab0; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 55 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 252))) + // setmark x, line 53 + I_x = cursor; + cursor = v_1; + // gopast, line 55 + golab0: while(true) { - break lab3; + lab1: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab1; + } + break golab0; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // setmark p1, line 55 - I_p1 = cursor; - // try, line 56 - lab4: do { - // (, line 56 - if (!(I_p1 < I_x)) - { - break lab4; - } - I_p1 = I_x; - } while (false); - // gopast, line 57 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 252))) + // gopast, line 55 + golab2: while(true) { - break lab6; + lab3: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab5; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 57 - golab7: while(true) - { - lab8: do { - if (!(out_grouping(g_v, 97, 252))) + // setmark p1, line 55 + I_p1 = cursor; + // try, line 56 + lab4: do { + // (, line 56 + if (!(I_p1 < I_x)) + { + break lab4; + } + I_p1 = I_x; + } while (false); + // gopast, line 57 + golab5: while(true) { - break lab8; + lab6: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab7; - } while (false); - if (cursor >= limit) - { - return false; + // gopast, line 57 + golab7: while(true) + { + lab8: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // setmark p2, line 57 + I_p2 = cursor; + return true; } - cursor++; - } - // setmark p2, line 57 - I_p2 = cursor; - return true; - } - private boolean r_postlude() { + private boolean r_postlude() { int among_var; int v_1; - // repeat, line 61 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 61 - // [, line 63 - bra = cursor; - // substring, line 63 - among_var = find_among(a_1, 6); - if (among_var == 0) + // repeat, line 61 + replab0: while(true) { - break lab1; - } - // ], line 63 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 64 - // <-, line 64 - slice_from("y"); - break; - case 2: - // (, line 65 - // <-, line 65 - slice_from("u"); - break; - case 3: - // (, line 66 - // <-, line 66 - slice_from("a"); - break; - case 4: - // (, line 67 - // <-, line 67 - slice_from("o"); - break; - case 5: - // (, line 68 - // <-, line 68 - slice_from("u"); - break; - case 6: - // (, line 69 - // next, line 69 - if (cursor >= limit) + v_1 = cursor; + lab1: do { + // (, line 61 + // [, line 63 + bra = cursor; + // substring, line 63 + among_var = find_among(a_1, 6); + if (among_var == 0) { break lab1; } - cursor++; - break; + // ], line 63 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 64 + // <-, line 64 + slice_from("y"); + break; + case 2: + // (, line 65 + // <-, line 65 + slice_from("u"); + break; + case 3: + // (, line 66 + // <-, line 66 + slice_from("a"); + break; + case 4: + // (, line 67 + // <-, line 67 + slice_from("o"); + break; + case 5: + // (, line 68 + // <-, line 68 + slice_from("u"); + break; + case 6: + // (, line 69 + // next, line 69 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } + private boolean r_R1() { + if (!(I_p1 <= cursor)) + { + return false; + } + return true; + } - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } + private boolean r_R2() { + if (!(I_p2 <= cursor)) + { + return false; + } + return true; + } - private boolean r_standard_suffix() { + private boolean r_standard_suffix() { int among_var; int v_1; int v_2; @@ -417,310 +424,323 @@ int v_7; int v_8; int v_9; - // (, line 79 - // do, line 80 - v_1 = limit - cursor; - lab0: do { - // (, line 80 - // [, line 81 - ket = cursor; - // substring, line 81 - among_var = find_among_b(a_2, 7); - if (among_var == 0) - { - break lab0; - } - // ], line 81 - bra = cursor; - // call R1, line 81 - if (!r_R1()) - { - break lab0; - } - switch(among_var) { - case 0: - break lab0; - case 1: - // (, line 83 - // delete, line 83 - slice_del(); - break; - case 2: - // (, line 86 - if (!(in_grouping_b(g_s_ending, 98, 116))) + // (, line 79 + // do, line 80 + v_1 = limit - cursor; + lab0: do { + // (, line 80 + // [, line 81 + ket = cursor; + // substring, line 81 + among_var = find_among_b(a_2, 7); + if (among_var == 0) { break lab0; } - // delete, line 86 - slice_del(); - break; - } - } while (false); - cursor = limit - v_1; - // do, line 90 - v_2 = limit - cursor; - lab1: do { - // (, line 90 - // [, line 91 - ket = cursor; - // substring, line 91 - among_var = find_among_b(a_3, 4); - if (among_var == 0) - { - break lab1; - } - // ], line 91 - bra = cursor; - // call R1, line 91 - if (!r_R1()) - { - break lab1; - } - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 93 - // delete, line 93 - slice_del(); - break; - case 2: - // (, line 96 - if (!(in_grouping_b(g_st_ending, 98, 116))) + // ], line 81 + bra = cursor; + // call R1, line 81 + if (!r_R1()) { + break lab0; + } + switch(among_var) { + case 0: + break lab0; + case 1: + // (, line 83 + // delete, line 83 + slice_del(); + break; + case 2: + // (, line 86 + if (!(in_grouping_b(g_s_ending, 98, 116))) + { + break lab0; + } + // delete, line 86 + slice_del(); + break; + } + } while (false); + cursor = limit - v_1; + // do, line 90 + v_2 = limit - cursor; + lab1: do { + // (, line 90 + // [, line 91 + ket = cursor; + // substring, line 91 + among_var = find_among_b(a_3, 4); + if (among_var == 0) + { break lab1; } - // hop, line 96 + // ], line 91 + bra = cursor; + // call R1, line 91 + if (!r_R1()) { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - break lab1; - } - cursor = c; + break lab1; } - // delete, line 96 - slice_del(); - break; - } - } while (false); - cursor = limit - v_2; - // do, line 100 - v_3 = limit - cursor; - lab2: do { - // (, line 100 - // [, line 101 - ket = cursor; - // substring, line 101 - among_var = find_among_b(a_5, 8); - if (among_var == 0) - { - break lab2; - } - // ], line 101 - bra = cursor; - // call R2, line 101 - if (!r_R2()) - { - break lab2; - } - switch(among_var) { - case 0: - break lab2; - case 1: - // (, line 103 - // delete, line 103 - slice_del(); - // try, line 104 - v_4 = limit - cursor; - lab3: do { - // (, line 104 - // [, line 104 - ket = cursor; - // literal, line 104 - if (!(eq_s_b(2, "ig"))) - { - cursor = limit - v_4; - break lab3; - } - // ], line 104 - bra = cursor; - // not, line 104 - { - v_5 = limit - cursor; - lab4: do { - // literal, line 104 - if (!(eq_s_b(1, "e"))) + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 93 + // delete, line 93 + slice_del(); + break; + case 2: + // (, line 96 + if (!(in_grouping_b(g_st_ending, 98, 116))) + { + break lab1; + } + // hop, line 96 + { + int c = cursor - 3; + if (limit_backward > c || c > limit) { - break lab4; + break lab1; } - cursor = limit - v_4; - break lab3; - } while (false); - cursor = limit - v_5; - } - // call R2, line 104 - if (!r_R2()) - { - cursor = limit - v_4; - break lab3; - } - // delete, line 104 - slice_del(); - } while (false); - break; - case 2: - // (, line 107 - // not, line 107 - { - v_6 = limit - cursor; - lab5: do { - // literal, line 107 - if (!(eq_s_b(1, "e"))) - { - break lab5; + cursor = c; } - break lab2; - } while (false); - cursor = limit - v_6; + // delete, line 96 + slice_del(); + break; } - // delete, line 107 - slice_del(); - break; - case 3: - // (, line 110 - // delete, line 110 - slice_del(); - // try, line 111 - v_7 = limit - cursor; - lab6: do { - // (, line 111 - // [, line 112 - ket = cursor; - // or, line 112 - lab7: do { - v_8 = limit - cursor; - lab8: do { - // literal, line 112 - if (!(eq_s_b(2, "er"))) + } while (false); + cursor = limit - v_2; + // do, line 100 + v_3 = limit - cursor; + lab2: do { + // (, line 100 + // [, line 101 + ket = cursor; + // substring, line 101 + among_var = find_among_b(a_5, 8); + if (among_var == 0) + { + break lab2; + } + // ], line 101 + bra = cursor; + // call R2, line 101 + if (!r_R2()) + { + break lab2; + } + switch(among_var) { + case 0: + break lab2; + case 1: + // (, line 103 + // delete, line 103 + slice_del(); + // try, line 104 + v_4 = limit - cursor; + lab3: do { + // (, line 104 + // [, line 104 + ket = cursor; + // literal, line 104 + if (!(eq_s_b(2, "ig"))) { - break lab8; + cursor = limit - v_4; + break lab3; } - break lab7; + // ], line 104 + bra = cursor; + // not, line 104 + { + v_5 = limit - cursor; + lab4: do { + // literal, line 104 + if (!(eq_s_b(1, "e"))) + { + break lab4; + } + cursor = limit - v_4; + break lab3; + } while (false); + cursor = limit - v_5; + } + // call R2, line 104 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 104 + slice_del(); } while (false); - cursor = limit - v_8; - // literal, line 112 - if (!(eq_s_b(2, "en"))) + break; + case 2: + // (, line 107 + // not, line 107 { - cursor = limit - v_7; - break lab6; + v_6 = limit - cursor; + lab5: do { + // literal, line 107 + if (!(eq_s_b(1, "e"))) + { + break lab5; + } + break lab2; + } while (false); + cursor = limit - v_6; } - } while (false); - // ], line 112 - bra = cursor; - // call R1, line 112 - if (!r_R1()) - { - cursor = limit - v_7; - break lab6; - } - // delete, line 112 - slice_del(); - } while (false); - break; - case 4: - // (, line 116 - // delete, line 116 - slice_del(); - // try, line 117 - v_9 = limit - cursor; - lab9: do { - // (, line 117 - // [, line 118 - ket = cursor; - // substring, line 118 - among_var = find_among_b(a_4, 2); - if (among_var == 0) - { - cursor = limit - v_9; - break lab9; - } - // ], line 118 - bra = cursor; - // call R2, line 118 - if (!r_R2()) - { - cursor = limit - v_9; - break lab9; - } - switch(among_var) { - case 0: - cursor = limit - v_9; - break lab9; - case 1: - // (, line 120 - // delete, line 120 + // delete, line 107 + slice_del(); + break; + case 3: + // (, line 110 + // delete, line 110 + slice_del(); + // try, line 111 + v_7 = limit - cursor; + lab6: do { + // (, line 111 + // [, line 112 + ket = cursor; + // or, line 112 + lab7: do { + v_8 = limit - cursor; + lab8: do { + // literal, line 112 + if (!(eq_s_b(2, "er"))) + { + break lab8; + } + break lab7; + } while (false); + cursor = limit - v_8; + // literal, line 112 + if (!(eq_s_b(2, "en"))) + { + cursor = limit - v_7; + break lab6; + } + } while (false); + // ], line 112 + bra = cursor; + // call R1, line 112 + if (!r_R1()) + { + cursor = limit - v_7; + break lab6; + } + // delete, line 112 slice_del(); - break; - } - } while (false); - break; + } while (false); + break; + case 4: + // (, line 116 + // delete, line 116 + slice_del(); + // try, line 117 + v_9 = limit - cursor; + lab9: do { + // (, line 117 + // [, line 118 + ket = cursor; + // substring, line 118 + among_var = find_among_b(a_4, 2); + if (among_var == 0) + { + cursor = limit - v_9; + break lab9; + } + // ], line 118 + bra = cursor; + // call R2, line 118 + if (!r_R2()) + { + cursor = limit - v_9; + break lab9; + } + switch(among_var) { + case 0: + cursor = limit - v_9; + break lab9; + case 1: + // (, line 120 + // delete, line 120 + slice_del(); + break; + } + } while (false); + break; + } + } while (false); + cursor = limit - v_3; + return true; } - } while (false); - cursor = limit - v_3; - return true; - } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; int v_4; - // (, line 130 - // do, line 131 - v_1 = cursor; - lab0: do { - // call prelude, line 131 - if (!r_prelude()) - { - break lab0; + // (, line 130 + // do, line 131 + v_1 = cursor; + lab0: do { + // call prelude, line 131 + if (!r_prelude()) + { + break lab0; + } + } while (false); + cursor = v_1; + // do, line 132 + v_2 = cursor; + lab1: do { + // call mark_regions, line 132 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 133 + limit_backward = cursor; cursor = limit; + // do, line 134 + v_3 = limit - cursor; + lab2: do { + // call standard_suffix, line 134 + if (!r_standard_suffix()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + cursor = limit_backward; // do, line 135 + v_4 = cursor; + lab3: do { + // call postlude, line 135 + if (!r_postlude()) + { + break lab3; + } + } while (false); + cursor = v_4; + return true; } - } while (false); - cursor = v_1; - // do, line 132 - v_2 = cursor; - lab1: do { - // call mark_regions, line 132 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 133 - limit_backward = cursor; cursor = limit; - // do, line 134 - v_3 = limit - cursor; - lab2: do { - // call standard_suffix, line 134 - if (!r_standard_suffix()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - cursor = limit_backward; // do, line 135 - v_4 = cursor; - lab3: do { - // call postlude, line 135 - if (!r_postlude()) - { - break lab3; - } - } while (false); - cursor = v_4; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof German2Stemmer; } + @Override + public int hashCode() { + return German2Stemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/GermanStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/GermanStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/GermanStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/GermanStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,374 +1,381 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class GermanStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 6, "", this), - new Among ( "U", 0, 2, "", this), - new Among ( "Y", 0, 1, "", this), - new Among ( "\u00E4", 0, 3, "", this), - new Among ( "\u00F6", 0, 4, "", this), - new Among ( "\u00FC", 0, 5, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "e", -1, 1, "", this), - new Among ( "em", -1, 1, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "ern", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "s", -1, 2, "", this), - new Among ( "es", 5, 1, "", this) - }; + private final static GermanStemmer methodObject = new GermanStemmer (); - private Among a_2[] = { - new Among ( "en", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "st", -1, 2, "", this), - new Among ( "est", 2, 1, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 6, "", methodObject ), + new Among ( "U", 0, 2, "", methodObject ), + new Among ( "Y", 0, 1, "", methodObject ), + new Among ( "\u00E4", 0, 3, "", methodObject ), + new Among ( "\u00F6", 0, 4, "", methodObject ), + new Among ( "\u00FC", 0, 5, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ig", -1, 1, "", this), - new Among ( "lich", -1, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "em", -1, 1, "", methodObject ), + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "ern", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "s", -1, 2, "", methodObject ), + new Among ( "es", 5, 1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "end", -1, 1, "", this), - new Among ( "ig", -1, 2, "", this), - new Among ( "ung", -1, 1, "", this), - new Among ( "lich", -1, 3, "", this), - new Among ( "isch", -1, 2, "", this), - new Among ( "ik", -1, 2, "", this), - new Among ( "heit", -1, 3, "", this), - new Among ( "keit", -1, 4, "", this) - }; + private final static Among a_2[] = { + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "st", -1, 2, "", methodObject ), + new Among ( "est", 2, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + private final static Among a_3[] = { + new Among ( "ig", -1, 1, "", methodObject ), + new Among ( "lich", -1, 1, "", methodObject ) + }; - private static final char g_s_ending[] = {117, 30, 5 }; + private final static Among a_4[] = { + new Among ( "end", -1, 1, "", methodObject ), + new Among ( "ig", -1, 2, "", methodObject ), + new Among ( "ung", -1, 1, "", methodObject ), + new Among ( "lich", -1, 3, "", methodObject ), + new Among ( "isch", -1, 2, "", methodObject ), + new Among ( "ik", -1, 2, "", methodObject ), + new Among ( "heit", -1, 3, "", methodObject ), + new Among ( "keit", -1, 4, "", methodObject ) + }; - private static final char g_st_ending[] = {117, 30, 4 }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 32, 8 }; + private static final char g_s_ending[] = {117, 30, 5 }; + + private static final char g_st_ending[] = {117, 30, 4 }; + private int I_x; private int I_p2; private int I_p1; - private void copy_from(GermanStemmer other) { - I_x = other.I_x; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(GermanStemmer other) { + I_x = other.I_x; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; - // (, line 28 - // test, line 30 - v_1 = cursor; - // repeat, line 30 - replab0: while(true) - { - v_2 = cursor; - lab1: do { - // (, line 30 - // or, line 33 - lab2: do { - v_3 = cursor; + // (, line 28 + // test, line 30 + v_1 = cursor; + // repeat, line 30 + replab0: while(true) + { + v_2 = cursor; + lab1: do { + // (, line 30 + // or, line 33 + lab2: do { + v_3 = cursor; + lab3: do { + // (, line 31 + // [, line 32 + bra = cursor; + // literal, line 32 + if (!(eq_s(1, "\u00DF"))) + { + break lab3; + } + // ], line 32 + ket = cursor; + // <-, line 32 + slice_from("ss"); + break lab2; + } while (false); + cursor = v_3; + // next, line 33 + if (cursor >= limit) + { + break lab1; + } + cursor++; + } while (false); + continue replab0; + } while (false); + cursor = v_2; + break replab0; + } + cursor = v_1; + // repeat, line 36 + replab4: while(true) + { + v_4 = cursor; + lab5: do { + // goto, line 36 + golab6: while(true) + { + v_5 = cursor; + lab7: do { + // (, line 36 + if (!(in_grouping(g_v, 97, 252))) + { + break lab7; + } + // [, line 37 + bra = cursor; + // or, line 37 + lab8: do { + v_6 = cursor; + lab9: do { + // (, line 37 + // literal, line 37 + if (!(eq_s(1, "u"))) + { + break lab9; + } + // ], line 37 + ket = cursor; + if (!(in_grouping(g_v, 97, 252))) + { + break lab9; + } + // <-, line 37 + slice_from("U"); + break lab8; + } while (false); + cursor = v_6; + // (, line 38 + // literal, line 38 + if (!(eq_s(1, "y"))) + { + break lab7; + } + // ], line 38 + ket = cursor; + if (!(in_grouping(g_v, 97, 252))) + { + break lab7; + } + // <-, line 38 + slice_from("Y"); + } while (false); + cursor = v_5; + break golab6; + } while (false); + cursor = v_5; + if (cursor >= limit) + { + break lab5; + } + cursor++; + } + continue replab4; + } while (false); + cursor = v_4; + break replab4; + } + return true; + } + + private boolean r_mark_regions() { + int v_1; + // (, line 42 + I_p1 = limit; + I_p2 = limit; + // test, line 47 + v_1 = cursor; + // (, line 47 + // hop, line 47 + { + int c = cursor + 3; + if (0 > c || c > limit) + { + return false; + } + cursor = c; + } + // setmark x, line 47 + I_x = cursor; + cursor = v_1; + // gopast, line 49 + golab0: while(true) + { + lab1: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab1; + } + break golab0; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // gopast, line 49 + golab2: while(true) + { lab3: do { - // (, line 31 - // [, line 32 - bra = cursor; - // literal, line 32 - if (!(eq_s(1, "\u00DF"))) + if (!(out_grouping(g_v, 97, 252))) { break lab3; } - // ], line 32 - ket = cursor; - // <-, line 32 - slice_from("ss"); - break lab2; + break golab2; } while (false); - cursor = v_3; - // next, line 33 if (cursor >= limit) { - break lab1; + return false; } cursor++; + } + // setmark p1, line 49 + I_p1 = cursor; + // try, line 50 + lab4: do { + // (, line 50 + if (!(I_p1 < I_x)) + { + break lab4; + } + I_p1 = I_x; } while (false); - continue replab0; - } while (false); - cursor = v_2; - break replab0; - } - cursor = v_1; - // repeat, line 36 - replab4: while(true) - { - v_4 = cursor; - lab5: do { - // goto, line 36 - golab6: while(true) + // gopast, line 51 + golab5: while(true) { - v_5 = cursor; - lab7: do { - // (, line 36 + lab6: do { if (!(in_grouping(g_v, 97, 252))) { - break lab7; + break lab6; } - // [, line 37 - bra = cursor; - // or, line 37 - lab8: do { - v_6 = cursor; - lab9: do { - // (, line 37 - // literal, line 37 - if (!(eq_s(1, "u"))) - { - break lab9; - } - // ], line 37 - ket = cursor; - if (!(in_grouping(g_v, 97, 252))) - { - break lab9; - } - // <-, line 37 - slice_from("U"); - break lab8; - } while (false); - cursor = v_6; - // (, line 38 - // literal, line 38 - if (!(eq_s(1, "y"))) - { - break lab7; - } - // ], line 38 - ket = cursor; - if (!(in_grouping(g_v, 97, 252))) - { - break lab7; - } - // <-, line 38 - slice_from("Y"); - } while (false); - cursor = v_5; - break golab6; + break golab5; } while (false); - cursor = v_5; if (cursor >= limit) { - break lab5; + return false; } cursor++; } - continue replab4; - } while (false); - cursor = v_4; - break replab4; - } - return true; - } - - private boolean r_mark_regions() { - int v_1; - // (, line 42 - I_p1 = limit; - I_p2 = limit; - // test, line 47 - v_1 = cursor; - // (, line 47 - // hop, line 47 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - // setmark x, line 47 - I_x = cursor; - cursor = v_1; - // gopast, line 49 - golab0: while(true) - { - lab1: do { - if (!(in_grouping(g_v, 97, 252))) + // gopast, line 51 + golab7: while(true) { - break lab1; + lab8: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab0; - } while (false); - if (cursor >= limit) - { - return false; + // setmark p2, line 51 + I_p2 = cursor; + return true; } - cursor++; - } - // gopast, line 49 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab3; - } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // setmark p1, line 49 - I_p1 = cursor; - // try, line 50 - lab4: do { - // (, line 50 - if (!(I_p1 < I_x)) - { - break lab4; - } - I_p1 = I_x; - } while (false); - // gopast, line 51 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab6; - } - break golab5; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 51 - golab7: while(true) - { - lab8: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab8; - } - break golab7; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // setmark p2, line 51 - I_p2 = cursor; - return true; - } - private boolean r_postlude() { + private boolean r_postlude() { int among_var; int v_1; - // repeat, line 55 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 55 - // [, line 57 - bra = cursor; - // substring, line 57 - among_var = find_among(a_0, 6); - if (among_var == 0) + // repeat, line 55 + replab0: while(true) { - break lab1; - } - // ], line 57 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 58 - // <-, line 58 - slice_from("y"); - break; - case 2: - // (, line 59 - // <-, line 59 - slice_from("u"); - break; - case 3: - // (, line 60 - // <-, line 60 - slice_from("a"); - break; - case 4: - // (, line 61 - // <-, line 61 - slice_from("o"); - break; - case 5: - // (, line 62 - // <-, line 62 - slice_from("u"); - break; - case 6: - // (, line 63 - // next, line 63 - if (cursor >= limit) + v_1 = cursor; + lab1: do { + // (, line 55 + // [, line 57 + bra = cursor; + // substring, line 57 + among_var = find_among(a_0, 6); + if (among_var == 0) { break lab1; } - cursor++; - break; + // ], line 57 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 58 + // <-, line 58 + slice_from("y"); + break; + case 2: + // (, line 59 + // <-, line 59 + slice_from("u"); + break; + case 3: + // (, line 60 + // <-, line 60 + slice_from("a"); + break; + case 4: + // (, line 61 + // <-, line 61 + slice_from("o"); + break; + case 5: + // (, line 62 + // <-, line 62 + slice_from("u"); + break; + case 6: + // (, line 63 + // next, line 63 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } + private boolean r_R1() { + if (!(I_p1 <= cursor)) + { + return false; + } + return true; + } - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } + private boolean r_R2() { + if (!(I_p2 <= cursor)) + { + return false; + } + return true; + } - private boolean r_standard_suffix() { + private boolean r_standard_suffix() { int among_var; int v_1; int v_2; @@ -379,310 +386,323 @@ int v_7; int v_8; int v_9; - // (, line 73 - // do, line 74 - v_1 = limit - cursor; - lab0: do { - // (, line 74 - // [, line 75 - ket = cursor; - // substring, line 75 - among_var = find_among_b(a_1, 7); - if (among_var == 0) - { - break lab0; - } - // ], line 75 - bra = cursor; - // call R1, line 75 - if (!r_R1()) - { - break lab0; - } - switch(among_var) { - case 0: - break lab0; - case 1: - // (, line 77 - // delete, line 77 - slice_del(); - break; - case 2: - // (, line 80 - if (!(in_grouping_b(g_s_ending, 98, 116))) + // (, line 73 + // do, line 74 + v_1 = limit - cursor; + lab0: do { + // (, line 74 + // [, line 75 + ket = cursor; + // substring, line 75 + among_var = find_among_b(a_1, 7); + if (among_var == 0) { break lab0; } - // delete, line 80 - slice_del(); - break; - } - } while (false); - cursor = limit - v_1; - // do, line 84 - v_2 = limit - cursor; - lab1: do { - // (, line 84 - // [, line 85 - ket = cursor; - // substring, line 85 - among_var = find_among_b(a_2, 4); - if (among_var == 0) - { - break lab1; - } - // ], line 85 - bra = cursor; - // call R1, line 85 - if (!r_R1()) - { - break lab1; - } - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 87 - // delete, line 87 - slice_del(); - break; - case 2: - // (, line 90 - if (!(in_grouping_b(g_st_ending, 98, 116))) + // ], line 75 + bra = cursor; + // call R1, line 75 + if (!r_R1()) { + break lab0; + } + switch(among_var) { + case 0: + break lab0; + case 1: + // (, line 77 + // delete, line 77 + slice_del(); + break; + case 2: + // (, line 80 + if (!(in_grouping_b(g_s_ending, 98, 116))) + { + break lab0; + } + // delete, line 80 + slice_del(); + break; + } + } while (false); + cursor = limit - v_1; + // do, line 84 + v_2 = limit - cursor; + lab1: do { + // (, line 84 + // [, line 85 + ket = cursor; + // substring, line 85 + among_var = find_among_b(a_2, 4); + if (among_var == 0) + { break lab1; } - // hop, line 90 + // ], line 85 + bra = cursor; + // call R1, line 85 + if (!r_R1()) { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - break lab1; - } - cursor = c; + break lab1; } - // delete, line 90 - slice_del(); - break; - } - } while (false); - cursor = limit - v_2; - // do, line 94 - v_3 = limit - cursor; - lab2: do { - // (, line 94 - // [, line 95 - ket = cursor; - // substring, line 95 - among_var = find_among_b(a_4, 8); - if (among_var == 0) - { - break lab2; - } - // ], line 95 - bra = cursor; - // call R2, line 95 - if (!r_R2()) - { - break lab2; - } - switch(among_var) { - case 0: - break lab2; - case 1: - // (, line 97 - // delete, line 97 - slice_del(); - // try, line 98 - v_4 = limit - cursor; - lab3: do { - // (, line 98 - // [, line 98 - ket = cursor; - // literal, line 98 - if (!(eq_s_b(2, "ig"))) - { - cursor = limit - v_4; - break lab3; - } - // ], line 98 - bra = cursor; - // not, line 98 - { - v_5 = limit - cursor; - lab4: do { - // literal, line 98 - if (!(eq_s_b(1, "e"))) + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 87 + // delete, line 87 + slice_del(); + break; + case 2: + // (, line 90 + if (!(in_grouping_b(g_st_ending, 98, 116))) + { + break lab1; + } + // hop, line 90 + { + int c = cursor - 3; + if (limit_backward > c || c > limit) { - break lab4; + break lab1; } - cursor = limit - v_4; - break lab3; - } while (false); - cursor = limit - v_5; - } - // call R2, line 98 - if (!r_R2()) - { - cursor = limit - v_4; - break lab3; - } - // delete, line 98 - slice_del(); - } while (false); - break; - case 2: - // (, line 101 - // not, line 101 - { - v_6 = limit - cursor; - lab5: do { - // literal, line 101 - if (!(eq_s_b(1, "e"))) - { - break lab5; + cursor = c; } - break lab2; - } while (false); - cursor = limit - v_6; + // delete, line 90 + slice_del(); + break; } - // delete, line 101 - slice_del(); - break; - case 3: - // (, line 104 - // delete, line 104 - slice_del(); - // try, line 105 - v_7 = limit - cursor; - lab6: do { - // (, line 105 - // [, line 106 - ket = cursor; - // or, line 106 - lab7: do { - v_8 = limit - cursor; - lab8: do { - // literal, line 106 - if (!(eq_s_b(2, "er"))) + } while (false); + cursor = limit - v_2; + // do, line 94 + v_3 = limit - cursor; + lab2: do { + // (, line 94 + // [, line 95 + ket = cursor; + // substring, line 95 + among_var = find_among_b(a_4, 8); + if (among_var == 0) + { + break lab2; + } + // ], line 95 + bra = cursor; + // call R2, line 95 + if (!r_R2()) + { + break lab2; + } + switch(among_var) { + case 0: + break lab2; + case 1: + // (, line 97 + // delete, line 97 + slice_del(); + // try, line 98 + v_4 = limit - cursor; + lab3: do { + // (, line 98 + // [, line 98 + ket = cursor; + // literal, line 98 + if (!(eq_s_b(2, "ig"))) { - break lab8; + cursor = limit - v_4; + break lab3; } - break lab7; + // ], line 98 + bra = cursor; + // not, line 98 + { + v_5 = limit - cursor; + lab4: do { + // literal, line 98 + if (!(eq_s_b(1, "e"))) + { + break lab4; + } + cursor = limit - v_4; + break lab3; + } while (false); + cursor = limit - v_5; + } + // call R2, line 98 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 98 + slice_del(); } while (false); - cursor = limit - v_8; - // literal, line 106 - if (!(eq_s_b(2, "en"))) + break; + case 2: + // (, line 101 + // not, line 101 { - cursor = limit - v_7; - break lab6; + v_6 = limit - cursor; + lab5: do { + // literal, line 101 + if (!(eq_s_b(1, "e"))) + { + break lab5; + } + break lab2; + } while (false); + cursor = limit - v_6; } - } while (false); - // ], line 106 - bra = cursor; - // call R1, line 106 - if (!r_R1()) - { - cursor = limit - v_7; - break lab6; - } - // delete, line 106 - slice_del(); - } while (false); - break; - case 4: - // (, line 110 - // delete, line 110 - slice_del(); - // try, line 111 - v_9 = limit - cursor; - lab9: do { - // (, line 111 - // [, line 112 - ket = cursor; - // substring, line 112 - among_var = find_among_b(a_3, 2); - if (among_var == 0) - { - cursor = limit - v_9; - break lab9; - } - // ], line 112 - bra = cursor; - // call R2, line 112 - if (!r_R2()) - { - cursor = limit - v_9; - break lab9; - } - switch(among_var) { - case 0: - cursor = limit - v_9; - break lab9; - case 1: - // (, line 114 - // delete, line 114 + // delete, line 101 + slice_del(); + break; + case 3: + // (, line 104 + // delete, line 104 + slice_del(); + // try, line 105 + v_7 = limit - cursor; + lab6: do { + // (, line 105 + // [, line 106 + ket = cursor; + // or, line 106 + lab7: do { + v_8 = limit - cursor; + lab8: do { + // literal, line 106 + if (!(eq_s_b(2, "er"))) + { + break lab8; + } + break lab7; + } while (false); + cursor = limit - v_8; + // literal, line 106 + if (!(eq_s_b(2, "en"))) + { + cursor = limit - v_7; + break lab6; + } + } while (false); + // ], line 106 + bra = cursor; + // call R1, line 106 + if (!r_R1()) + { + cursor = limit - v_7; + break lab6; + } + // delete, line 106 slice_del(); - break; - } - } while (false); - break; + } while (false); + break; + case 4: + // (, line 110 + // delete, line 110 + slice_del(); + // try, line 111 + v_9 = limit - cursor; + lab9: do { + // (, line 111 + // [, line 112 + ket = cursor; + // substring, line 112 + among_var = find_among_b(a_3, 2); + if (among_var == 0) + { + cursor = limit - v_9; + break lab9; + } + // ], line 112 + bra = cursor; + // call R2, line 112 + if (!r_R2()) + { + cursor = limit - v_9; + break lab9; + } + switch(among_var) { + case 0: + cursor = limit - v_9; + break lab9; + case 1: + // (, line 114 + // delete, line 114 + slice_del(); + break; + } + } while (false); + break; + } + } while (false); + cursor = limit - v_3; + return true; } - } while (false); - cursor = limit - v_3; - return true; - } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; int v_4; - // (, line 124 - // do, line 125 - v_1 = cursor; - lab0: do { - // call prelude, line 125 - if (!r_prelude()) - { - break lab0; + // (, line 124 + // do, line 125 + v_1 = cursor; + lab0: do { + // call prelude, line 125 + if (!r_prelude()) + { + break lab0; + } + } while (false); + cursor = v_1; + // do, line 126 + v_2 = cursor; + lab1: do { + // call mark_regions, line 126 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 127 + limit_backward = cursor; cursor = limit; + // do, line 128 + v_3 = limit - cursor; + lab2: do { + // call standard_suffix, line 128 + if (!r_standard_suffix()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + cursor = limit_backward; // do, line 129 + v_4 = cursor; + lab3: do { + // call postlude, line 129 + if (!r_postlude()) + { + break lab3; + } + } while (false); + cursor = v_4; + return true; } - } while (false); - cursor = v_1; - // do, line 126 - v_2 = cursor; - lab1: do { - // call mark_regions, line 126 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 127 - limit_backward = cursor; cursor = limit; - // do, line 128 - v_3 = limit - cursor; - lab2: do { - // call standard_suffix, line 128 - if (!r_standard_suffix()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - cursor = limit_backward; // do, line 129 - v_4 = cursor; - lab3: do { - // call postlude, line 129 - if (!r_postlude()) - { - break lab3; - } - } while (false); - cursor = v_4; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof GermanStemmer; } + @Override + public int hashCode() { + return GermanStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/HungarianStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/HungarianStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/HungarianStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/HungarianStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,1042 +1,1050 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class HungarianStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "cs", -1, -1, "", this), - new Among ( "dzs", -1, -1, "", this), - new Among ( "gy", -1, -1, "", this), - new Among ( "ly", -1, -1, "", this), - new Among ( "ny", -1, -1, "", this), - new Among ( "sz", -1, -1, "", this), - new Among ( "ty", -1, -1, "", this), - new Among ( "zs", -1, -1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "\u00E1", -1, 1, "", this), - new Among ( "\u00E9", -1, 2, "", this) - }; + private final static HungarianStemmer methodObject = new HungarianStemmer (); - private Among a_2[] = { - new Among ( "bb", -1, -1, "", this), - new Among ( "cc", -1, -1, "", this), - new Among ( "dd", -1, -1, "", this), - new Among ( "ff", -1, -1, "", this), - new Among ( "gg", -1, -1, "", this), - new Among ( "jj", -1, -1, "", this), - new Among ( "kk", -1, -1, "", this), - new Among ( "ll", -1, -1, "", this), - new Among ( "mm", -1, -1, "", this), - new Among ( "nn", -1, -1, "", this), - new Among ( "pp", -1, -1, "", this), - new Among ( "rr", -1, -1, "", this), - new Among ( "ccs", -1, -1, "", this), - new Among ( "ss", -1, -1, "", this), - new Among ( "zzs", -1, -1, "", this), - new Among ( "tt", -1, -1, "", this), - new Among ( "vv", -1, -1, "", this), - new Among ( "ggy", -1, -1, "", this), - new Among ( "lly", -1, -1, "", this), - new Among ( "nny", -1, -1, "", this), - new Among ( "tty", -1, -1, "", this), - new Among ( "ssz", -1, -1, "", this), - new Among ( "zz", -1, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "cs", -1, -1, "", methodObject ), + new Among ( "dzs", -1, -1, "", methodObject ), + new Among ( "gy", -1, -1, "", methodObject ), + new Among ( "ly", -1, -1, "", methodObject ), + new Among ( "ny", -1, -1, "", methodObject ), + new Among ( "sz", -1, -1, "", methodObject ), + new Among ( "ty", -1, -1, "", methodObject ), + new Among ( "zs", -1, -1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "al", -1, 1, "", this), - new Among ( "el", -1, 2, "", this) - }; + private final static Among a_1[] = { + new Among ( "\u00E1", -1, 1, "", methodObject ), + new Among ( "\u00E9", -1, 2, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ba", -1, -1, "", this), - new Among ( "ra", -1, -1, "", this), - new Among ( "be", -1, -1, "", this), - new Among ( "re", -1, -1, "", this), - new Among ( "ig", -1, -1, "", this), - new Among ( "nak", -1, -1, "", this), - new Among ( "nek", -1, -1, "", this), - new Among ( "val", -1, -1, "", this), - new Among ( "vel", -1, -1, "", this), - new Among ( "ul", -1, -1, "", this), - new Among ( "n\u00E1l", -1, -1, "", this), - new Among ( "n\u00E9l", -1, -1, "", this), - new Among ( "b\u00F3l", -1, -1, "", this), - new Among ( "r\u00F3l", -1, -1, "", this), - new Among ( "t\u00F3l", -1, -1, "", this), - new Among ( "b\u00F5l", -1, -1, "", this), - new Among ( "r\u00F5l", -1, -1, "", this), - new Among ( "t\u00F5l", -1, -1, "", this), - new Among ( "\u00FCl", -1, -1, "", this), - new Among ( "n", -1, -1, "", this), - new Among ( "an", 19, -1, "", this), - new Among ( "ban", 20, -1, "", this), - new Among ( "en", 19, -1, "", this), - new Among ( "ben", 22, -1, "", this), - new Among ( "k\u00E9ppen", 22, -1, "", this), - new Among ( "on", 19, -1, "", this), - new Among ( "\u00F6n", 19, -1, "", this), - new Among ( "k\u00E9pp", -1, -1, "", this), - new Among ( "kor", -1, -1, "", this), - new Among ( "t", -1, -1, "", this), - new Among ( "at", 29, -1, "", this), - new Among ( "et", 29, -1, "", this), - new Among ( "k\u00E9nt", 29, -1, "", this), - new Among ( "ank\u00E9nt", 32, -1, "", this), - new Among ( "enk\u00E9nt", 32, -1, "", this), - new Among ( "onk\u00E9nt", 32, -1, "", this), - new Among ( "ot", 29, -1, "", this), - new Among ( "\u00E9rt", 29, -1, "", this), - new Among ( "\u00F6t", 29, -1, "", this), - new Among ( "hez", -1, -1, "", this), - new Among ( "hoz", -1, -1, "", this), - new Among ( "h\u00F6z", -1, -1, "", this), - new Among ( "v\u00E1", -1, -1, "", this), - new Among ( "v\u00E9", -1, -1, "", this) - }; + private final static Among a_2[] = { + new Among ( "bb", -1, -1, "", methodObject ), + new Among ( "cc", -1, -1, "", methodObject ), + new Among ( "dd", -1, -1, "", methodObject ), + new Among ( "ff", -1, -1, "", methodObject ), + new Among ( "gg", -1, -1, "", methodObject ), + new Among ( "jj", -1, -1, "", methodObject ), + new Among ( "kk", -1, -1, "", methodObject ), + new Among ( "ll", -1, -1, "", methodObject ), + new Among ( "mm", -1, -1, "", methodObject ), + new Among ( "nn", -1, -1, "", methodObject ), + new Among ( "pp", -1, -1, "", methodObject ), + new Among ( "rr", -1, -1, "", methodObject ), + new Among ( "ccs", -1, -1, "", methodObject ), + new Among ( "ss", -1, -1, "", methodObject ), + new Among ( "zzs", -1, -1, "", methodObject ), + new Among ( "tt", -1, -1, "", methodObject ), + new Among ( "vv", -1, -1, "", methodObject ), + new Among ( "ggy", -1, -1, "", methodObject ), + new Among ( "lly", -1, -1, "", methodObject ), + new Among ( "nny", -1, -1, "", methodObject ), + new Among ( "tty", -1, -1, "", methodObject ), + new Among ( "ssz", -1, -1, "", methodObject ), + new Among ( "zz", -1, -1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "\u00E1n", -1, 2, "", this), - new Among ( "\u00E9n", -1, 1, "", this), - new Among ( "\u00E1nk\u00E9nt", -1, 3, "", this) - }; + private final static Among a_3[] = { + new Among ( "al", -1, 1, "", methodObject ), + new Among ( "el", -1, 2, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "stul", -1, 2, "", this), - new Among ( "astul", 0, 1, "", this), - new Among ( "\u00E1stul", 0, 3, "", this), - new Among ( "st\u00FCl", -1, 2, "", this), - new Among ( "est\u00FCl", 3, 1, "", this), - new Among ( "\u00E9st\u00FCl", 3, 4, "", this) - }; + private final static Among a_4[] = { + new Among ( "ba", -1, -1, "", methodObject ), + new Among ( "ra", -1, -1, "", methodObject ), + new Among ( "be", -1, -1, "", methodObject ), + new Among ( "re", -1, -1, "", methodObject ), + new Among ( "ig", -1, -1, "", methodObject ), + new Among ( "nak", -1, -1, "", methodObject ), + new Among ( "nek", -1, -1, "", methodObject ), + new Among ( "val", -1, -1, "", methodObject ), + new Among ( "vel", -1, -1, "", methodObject ), + new Among ( "ul", -1, -1, "", methodObject ), + new Among ( "n\u00E1l", -1, -1, "", methodObject ), + new Among ( "n\u00E9l", -1, -1, "", methodObject ), + new Among ( "b\u00F3l", -1, -1, "", methodObject ), + new Among ( "r\u00F3l", -1, -1, "", methodObject ), + new Among ( "t\u00F3l", -1, -1, "", methodObject ), + new Among ( "b\u00F5l", -1, -1, "", methodObject ), + new Among ( "r\u00F5l", -1, -1, "", methodObject ), + new Among ( "t\u00F5l", -1, -1, "", methodObject ), + new Among ( "\u00FCl", -1, -1, "", methodObject ), + new Among ( "n", -1, -1, "", methodObject ), + new Among ( "an", 19, -1, "", methodObject ), + new Among ( "ban", 20, -1, "", methodObject ), + new Among ( "en", 19, -1, "", methodObject ), + new Among ( "ben", 22, -1, "", methodObject ), + new Among ( "k\u00E9ppen", 22, -1, "", methodObject ), + new Among ( "on", 19, -1, "", methodObject ), + new Among ( "\u00F6n", 19, -1, "", methodObject ), + new Among ( "k\u00E9pp", -1, -1, "", methodObject ), + new Among ( "kor", -1, -1, "", methodObject ), + new Among ( "t", -1, -1, "", methodObject ), + new Among ( "at", 29, -1, "", methodObject ), + new Among ( "et", 29, -1, "", methodObject ), + new Among ( "k\u00E9nt", 29, -1, "", methodObject ), + new Among ( "ank\u00E9nt", 32, -1, "", methodObject ), + new Among ( "enk\u00E9nt", 32, -1, "", methodObject ), + new Among ( "onk\u00E9nt", 32, -1, "", methodObject ), + new Among ( "ot", 29, -1, "", methodObject ), + new Among ( "\u00E9rt", 29, -1, "", methodObject ), + new Among ( "\u00F6t", 29, -1, "", methodObject ), + new Among ( "hez", -1, -1, "", methodObject ), + new Among ( "hoz", -1, -1, "", methodObject ), + new Among ( "h\u00F6z", -1, -1, "", methodObject ), + new Among ( "v\u00E1", -1, -1, "", methodObject ), + new Among ( "v\u00E9", -1, -1, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "\u00E1", -1, 1, "", this), - new Among ( "\u00E9", -1, 2, "", this) - }; + private final static Among a_5[] = { + new Among ( "\u00E1n", -1, 2, "", methodObject ), + new Among ( "\u00E9n", -1, 1, "", methodObject ), + new Among ( "\u00E1nk\u00E9nt", -1, 3, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "k", -1, 7, "", this), - new Among ( "ak", 0, 4, "", this), - new Among ( "ek", 0, 6, "", this), - new Among ( "ok", 0, 5, "", this), - new Among ( "\u00E1k", 0, 1, "", this), - new Among ( "\u00E9k", 0, 2, "", this), - new Among ( "\u00F6k", 0, 3, "", this) - }; + private final static Among a_6[] = { + new Among ( "stul", -1, 2, "", methodObject ), + new Among ( "astul", 0, 1, "", methodObject ), + new Among ( "\u00E1stul", 0, 3, "", methodObject ), + new Among ( "st\u00FCl", -1, 2, "", methodObject ), + new Among ( "est\u00FCl", 3, 1, "", methodObject ), + new Among ( "\u00E9st\u00FCl", 3, 4, "", methodObject ) + }; - private Among a_9[] = { - new Among ( "\u00E9i", -1, 7, "", this), - new Among ( "\u00E1\u00E9i", 0, 6, "", this), - new Among ( "\u00E9\u00E9i", 0, 5, "", this), - new Among ( "\u00E9", -1, 9, "", this), - new Among ( "k\u00E9", 3, 4, "", this), - new Among ( "ak\u00E9", 4, 1, "", this), - new Among ( "ek\u00E9", 4, 1, "", this), - new Among ( "ok\u00E9", 4, 1, "", this), - new Among ( "\u00E1k\u00E9", 4, 3, "", this), - new Among ( "\u00E9k\u00E9", 4, 2, "", this), - new Among ( "\u00F6k\u00E9", 4, 1, "", this), - new Among ( "\u00E9\u00E9", 3, 8, "", this) - }; + private final static Among a_7[] = { + new Among ( "\u00E1", -1, 1, "", methodObject ), + new Among ( "\u00E9", -1, 2, "", methodObject ) + }; - private Among a_10[] = { - new Among ( "a", -1, 18, "", this), - new Among ( "ja", 0, 17, "", this), - new Among ( "d", -1, 16, "", this), - new Among ( "ad", 2, 13, "", this), - new Among ( "ed", 2, 13, "", this), - new Among ( "od", 2, 13, "", this), - new Among ( "\u00E1d", 2, 14, "", this), - new Among ( "\u00E9d", 2, 15, "", this), - new Among ( "\u00F6d", 2, 13, "", this), - new Among ( "e", -1, 18, "", this), - new Among ( "je", 9, 17, "", this), - new Among ( "nk", -1, 4, "", this), - new Among ( "unk", 11, 1, "", this), - new Among ( "\u00E1nk", 11, 2, "", this), - new Among ( "\u00E9nk", 11, 3, "", this), - new Among ( "\u00FCnk", 11, 1, "", this), - new Among ( "uk", -1, 8, "", this), - new Among ( "juk", 16, 7, "", this), - new Among ( "\u00E1juk", 17, 5, "", this), - new Among ( "\u00FCk", -1, 8, "", this), - new Among ( "j\u00FCk", 19, 7, "", this), - new Among ( "\u00E9j\u00FCk", 20, 6, "", this), - new Among ( "m", -1, 12, "", this), - new Among ( "am", 22, 9, "", this), - new Among ( "em", 22, 9, "", this), - new Among ( "om", 22, 9, "", this), - new Among ( "\u00E1m", 22, 10, "", this), - new Among ( "\u00E9m", 22, 11, "", this), - new Among ( "o", -1, 18, "", this), - new Among ( "\u00E1", -1, 19, "", this), - new Among ( "\u00E9", -1, 20, "", this) - }; + private final static Among a_8[] = { + new Among ( "k", -1, 7, "", methodObject ), + new Among ( "ak", 0, 4, "", methodObject ), + new Among ( "ek", 0, 6, "", methodObject ), + new Among ( "ok", 0, 5, "", methodObject ), + new Among ( "\u00E1k", 0, 1, "", methodObject ), + new Among ( "\u00E9k", 0, 2, "", methodObject ), + new Among ( "\u00F6k", 0, 3, "", methodObject ) + }; - private Among a_11[] = { - new Among ( "id", -1, 10, "", this), - new Among ( "aid", 0, 9, "", this), - new Among ( "jaid", 1, 6, "", this), - new Among ( "eid", 0, 9, "", this), - new Among ( "jeid", 3, 6, "", this), - new Among ( "\u00E1id", 0, 7, "", this), - new Among ( "\u00E9id", 0, 8, "", this), - new Among ( "i", -1, 15, "", this), - new Among ( "ai", 7, 14, "", this), - new Among ( "jai", 8, 11, "", this), - new Among ( "ei", 7, 14, "", this), - new Among ( "jei", 10, 11, "", this), - new Among ( "\u00E1i", 7, 12, "", this), - new Among ( "\u00E9i", 7, 13, "", this), - new Among ( "itek", -1, 24, "", this), - new Among ( "eitek", 14, 21, "", this), - new Among ( "jeitek", 15, 20, "", this), - new Among ( "\u00E9itek", 14, 23, "", this), - new Among ( "ik", -1, 29, "", this), - new Among ( "aik", 18, 26, "", this), - new Among ( "jaik", 19, 25, "", this), - new Among ( "eik", 18, 26, "", this), - new Among ( "jeik", 21, 25, "", this), - new Among ( "\u00E1ik", 18, 27, "", this), - new Among ( "\u00E9ik", 18, 28, "", this), - new Among ( "ink", -1, 20, "", this), - new Among ( "aink", 25, 17, "", this), - new Among ( "jaink", 26, 16, "", this), - new Among ( "eink", 25, 17, "", this), - new Among ( "jeink", 28, 16, "", this), - new Among ( "\u00E1ink", 25, 18, "", this), - new Among ( "\u00E9ink", 25, 19, "", this), - new Among ( "aitok", -1, 21, "", this), - new Among ( "jaitok", 32, 20, "", this), - new Among ( "\u00E1itok", -1, 22, "", this), - new Among ( "im", -1, 5, "", this), - new Among ( "aim", 35, 4, "", this), - new Among ( "jaim", 36, 1, "", this), - new Among ( "eim", 35, 4, "", this), - new Among ( "jeim", 38, 1, "", this), - new Among ( "\u00E1im", 35, 2, "", this), - new Among ( "\u00E9im", 35, 3, "", this) - }; + private final static Among a_9[] = { + new Among ( "\u00E9i", -1, 7, "", methodObject ), + new Among ( "\u00E1\u00E9i", 0, 6, "", methodObject ), + new Among ( "\u00E9\u00E9i", 0, 5, "", methodObject ), + new Among ( "\u00E9", -1, 9, "", methodObject ), + new Among ( "k\u00E9", 3, 4, "", methodObject ), + new Among ( "ak\u00E9", 4, 1, "", methodObject ), + new Among ( "ek\u00E9", 4, 1, "", methodObject ), + new Among ( "ok\u00E9", 4, 1, "", methodObject ), + new Among ( "\u00E1k\u00E9", 4, 3, "", methodObject ), + new Among ( "\u00E9k\u00E9", 4, 2, "", methodObject ), + new Among ( "\u00F6k\u00E9", 4, 1, "", methodObject ), + new Among ( "\u00E9\u00E9", 3, 8, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; + private final static Among a_10[] = { + new Among ( "a", -1, 18, "", methodObject ), + new Among ( "ja", 0, 17, "", methodObject ), + new Among ( "d", -1, 16, "", methodObject ), + new Among ( "ad", 2, 13, "", methodObject ), + new Among ( "ed", 2, 13, "", methodObject ), + new Among ( "od", 2, 13, "", methodObject ), + new Among ( "\u00E1d", 2, 14, "", methodObject ), + new Among ( "\u00E9d", 2, 15, "", methodObject ), + new Among ( "\u00F6d", 2, 13, "", methodObject ), + new Among ( "e", -1, 18, "", methodObject ), + new Among ( "je", 9, 17, "", methodObject ), + new Among ( "nk", -1, 4, "", methodObject ), + new Among ( "unk", 11, 1, "", methodObject ), + new Among ( "\u00E1nk", 11, 2, "", methodObject ), + new Among ( "\u00E9nk", 11, 3, "", methodObject ), + new Among ( "\u00FCnk", 11, 1, "", methodObject ), + new Among ( "uk", -1, 8, "", methodObject ), + new Among ( "juk", 16, 7, "", methodObject ), + new Among ( "\u00E1juk", 17, 5, "", methodObject ), + new Among ( "\u00FCk", -1, 8, "", methodObject ), + new Among ( "j\u00FCk", 19, 7, "", methodObject ), + new Among ( "\u00E9j\u00FCk", 20, 6, "", methodObject ), + new Among ( "m", -1, 12, "", methodObject ), + new Among ( "am", 22, 9, "", methodObject ), + new Among ( "em", 22, 9, "", methodObject ), + new Among ( "om", 22, 9, "", methodObject ), + new Among ( "\u00E1m", 22, 10, "", methodObject ), + new Among ( "\u00E9m", 22, 11, "", methodObject ), + new Among ( "o", -1, 18, "", methodObject ), + new Among ( "\u00E1", -1, 19, "", methodObject ), + new Among ( "\u00E9", -1, 20, "", methodObject ) + }; + private final static Among a_11[] = { + new Among ( "id", -1, 10, "", methodObject ), + new Among ( "aid", 0, 9, "", methodObject ), + new Among ( "jaid", 1, 6, "", methodObject ), + new Among ( "eid", 0, 9, "", methodObject ), + new Among ( "jeid", 3, 6, "", methodObject ), + new Among ( "\u00E1id", 0, 7, "", methodObject ), + new Among ( "\u00E9id", 0, 8, "", methodObject ), + new Among ( "i", -1, 15, "", methodObject ), + new Among ( "ai", 7, 14, "", methodObject ), + new Among ( "jai", 8, 11, "", methodObject ), + new Among ( "ei", 7, 14, "", methodObject ), + new Among ( "jei", 10, 11, "", methodObject ), + new Among ( "\u00E1i", 7, 12, "", methodObject ), + new Among ( "\u00E9i", 7, 13, "", methodObject ), + new Among ( "itek", -1, 24, "", methodObject ), + new Among ( "eitek", 14, 21, "", methodObject ), + new Among ( "jeitek", 15, 20, "", methodObject ), + new Among ( "\u00E9itek", 14, 23, "", methodObject ), + new Among ( "ik", -1, 29, "", methodObject ), + new Among ( "aik", 18, 26, "", methodObject ), + new Among ( "jaik", 19, 25, "", methodObject ), + new Among ( "eik", 18, 26, "", methodObject ), + new Among ( "jeik", 21, 25, "", methodObject ), + new Among ( "\u00E1ik", 18, 27, "", methodObject ), + new Among ( "\u00E9ik", 18, 28, "", methodObject ), + new Among ( "ink", -1, 20, "", methodObject ), + new Among ( "aink", 25, 17, "", methodObject ), + new Among ( "jaink", 26, 16, "", methodObject ), + new Among ( "eink", 25, 17, "", methodObject ), + new Among ( "jeink", 28, 16, "", methodObject ), + new Among ( "\u00E1ink", 25, 18, "", methodObject ), + new Among ( "\u00E9ink", 25, 19, "", methodObject ), + new Among ( "aitok", -1, 21, "", methodObject ), + new Among ( "jaitok", 32, 20, "", methodObject ), + new Among ( "\u00E1itok", -1, 22, "", methodObject ), + new Among ( "im", -1, 5, "", methodObject ), + new Among ( "aim", 35, 4, "", methodObject ), + new Among ( "jaim", 36, 1, "", methodObject ), + new Among ( "eim", 35, 4, "", methodObject ), + new Among ( "jeim", 38, 1, "", methodObject ), + new Among ( "\u00E1im", 35, 2, "", methodObject ), + new Among ( "\u00E9im", 35, 3, "", methodObject ) + }; + + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 52, 14 }; + private int I_p1; - private void copy_from(HungarianStemmer other) { - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(HungarianStemmer other) { + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; int v_3; - // (, line 44 - I_p1 = limit; - // or, line 51 - lab0: do { - v_1 = cursor; - lab1: do { - // (, line 48 - if (!(in_grouping(g_v, 97, 252))) - { - break lab1; - } - // goto, line 48 - golab2: while(true) - { - v_2 = cursor; - lab3: do { - if (!(out_grouping(g_v, 97, 252))) + // (, line 44 + I_p1 = limit; + // or, line 51 + lab0: do { + v_1 = cursor; + lab1: do { + // (, line 48 + if (!(in_grouping(g_v, 97, 252))) { - break lab3; + break lab1; } - cursor = v_2; - break golab2; - } while (false); - cursor = v_2; - if (cursor >= limit) - { - break lab1; - } - cursor++; - } - // or, line 49 - lab4: do { - v_3 = cursor; - lab5: do { - // among, line 49 - if (find_among(a_0, 8) == 0) + // goto, line 48 + golab2: while(true) { - break lab5; + v_2 = cursor; + lab3: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab3; + } + cursor = v_2; + break golab2; + } while (false); + cursor = v_2; + if (cursor >= limit) + { + break lab1; + } + cursor++; } - break lab4; + // or, line 49 + lab4: do { + v_3 = cursor; + lab5: do { + // among, line 49 + if (find_among(a_0, 8) == 0) + { + break lab5; + } + break lab4; + } while (false); + cursor = v_3; + // next, line 49 + if (cursor >= limit) + { + break lab1; + } + cursor++; + } while (false); + // setmark p1, line 50 + I_p1 = cursor; + break lab0; } while (false); - cursor = v_3; - // next, line 49 - if (cursor >= limit) + cursor = v_1; + // (, line 53 + if (!(out_grouping(g_v, 97, 252))) { - break lab1; + return false; } - cursor++; - } while (false); - // setmark p1, line 50 - I_p1 = cursor; - break lab0; - } while (false); - cursor = v_1; - // (, line 53 - if (!(out_grouping(g_v, 97, 252))) - { - return false; - } - // gopast, line 53 - golab6: while(true) - { - lab7: do { - if (!(in_grouping(g_v, 97, 252))) + // gopast, line 53 + golab6: while(true) { - break lab7; + lab7: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab7; + } + break golab6; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab6; + // setmark p1, line 53 + I_p1 = cursor; } while (false); - if (cursor >= limit) + return true; + } + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { return false; } - cursor++; + return true; } - // setmark p1, line 53 - I_p1 = cursor; - } while (false); - return true; - } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_v_ending() { + private boolean r_v_ending() { int among_var; - // (, line 60 - // [, line 61 - ket = cursor; - // substring, line 61 - among_var = find_among_b(a_1, 2); - if (among_var == 0) - { - return false; - } - // ], line 61 - bra = cursor; - // call R1, line 61 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 62 - // <-, line 62 - slice_from("a"); - break; - case 2: - // (, line 63 - // <-, line 63 - slice_from("e"); - break; - } - return true; - } + // (, line 60 + // [, line 61 + ket = cursor; + // substring, line 61 + among_var = find_among_b(a_1, 2); + if (among_var == 0) + { + return false; + } + // ], line 61 + bra = cursor; + // call R1, line 61 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 62 + // <-, line 62 + slice_from("a"); + break; + case 2: + // (, line 63 + // <-, line 63 + slice_from("e"); + break; + } + return true; + } - private boolean r_double() { + private boolean r_double() { int v_1; - // (, line 67 - // test, line 68 - v_1 = limit - cursor; - // among, line 68 - if (find_among_b(a_2, 23) == 0) - { - return false; - } - cursor = limit - v_1; - return true; - } + // (, line 67 + // test, line 68 + v_1 = limit - cursor; + // among, line 68 + if (find_among_b(a_2, 23) == 0) + { + return false; + } + cursor = limit - v_1; + return true; + } - private boolean r_undouble() { - // (, line 72 - // next, line 73 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // [, line 73 - ket = cursor; - // hop, line 73 - { - int c = cursor - 1; - if (limit_backward > c || c > limit) - { - return false; + private boolean r_undouble() { + // (, line 72 + // next, line 73 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // [, line 73 + ket = cursor; + // hop, line 73 + { + int c = cursor - 1; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + // ], line 73 + bra = cursor; + // delete, line 73 + slice_del(); + return true; } - cursor = c; - } - // ], line 73 - bra = cursor; - // delete, line 73 - slice_del(); - return true; - } - private boolean r_instrum() { + private boolean r_instrum() { int among_var; - // (, line 76 - // [, line 77 - ket = cursor; - // substring, line 77 - among_var = find_among_b(a_3, 2); - if (among_var == 0) - { - return false; - } - // ], line 77 - bra = cursor; - // call R1, line 77 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 78 - // call double, line 78 - if (!r_double()) + // (, line 76 + // [, line 77 + ket = cursor; + // substring, line 77 + among_var = find_among_b(a_3, 2); + if (among_var == 0) { return false; } - break; - case 2: - // (, line 79 - // call double, line 79 - if (!r_double()) + // ], line 77 + bra = cursor; + // call R1, line 77 + if (!r_R1()) { return false; } - break; - } - // delete, line 81 - slice_del(); - // call undouble, line 82 - if (!r_undouble()) - { - return false; - } - return true; - } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 78 + // call double, line 78 + if (!r_double()) + { + return false; + } + break; + case 2: + // (, line 79 + // call double, line 79 + if (!r_double()) + { + return false; + } + break; + } + // delete, line 81 + slice_del(); + // call undouble, line 82 + if (!r_undouble()) + { + return false; + } + return true; + } - private boolean r_case() { - // (, line 86 - // [, line 87 - ket = cursor; - // substring, line 87 - if (find_among_b(a_4, 44) == 0) - { - return false; - } - // ], line 87 - bra = cursor; - // call R1, line 87 - if (!r_R1()) - { - return false; - } - // delete, line 111 - slice_del(); - // call v_ending, line 112 - if (!r_v_ending()) - { - return false; - } - return true; - } + private boolean r_case() { + // (, line 86 + // [, line 87 + ket = cursor; + // substring, line 87 + if (find_among_b(a_4, 44) == 0) + { + return false; + } + // ], line 87 + bra = cursor; + // call R1, line 87 + if (!r_R1()) + { + return false; + } + // delete, line 111 + slice_del(); + // call v_ending, line 112 + if (!r_v_ending()) + { + return false; + } + return true; + } - private boolean r_case_special() { + private boolean r_case_special() { int among_var; - // (, line 115 - // [, line 116 - ket = cursor; - // substring, line 116 - among_var = find_among_b(a_5, 3); - if (among_var == 0) - { - return false; - } - // ], line 116 - bra = cursor; - // call R1, line 116 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 117 - // <-, line 117 - slice_from("e"); - break; - case 2: - // (, line 118 - // <-, line 118 - slice_from("a"); - break; - case 3: - // (, line 119 - // <-, line 119 - slice_from("a"); - break; - } - return true; - } + // (, line 115 + // [, line 116 + ket = cursor; + // substring, line 116 + among_var = find_among_b(a_5, 3); + if (among_var == 0) + { + return false; + } + // ], line 116 + bra = cursor; + // call R1, line 116 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 117 + // <-, line 117 + slice_from("e"); + break; + case 2: + // (, line 118 + // <-, line 118 + slice_from("a"); + break; + case 3: + // (, line 119 + // <-, line 119 + slice_from("a"); + break; + } + return true; + } - private boolean r_case_other() { + private boolean r_case_other() { int among_var; - // (, line 123 - // [, line 124 - ket = cursor; - // substring, line 124 - among_var = find_among_b(a_6, 6); - if (among_var == 0) - { - return false; - } - // ], line 124 - bra = cursor; - // call R1, line 124 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 125 - // delete, line 125 - slice_del(); - break; - case 2: - // (, line 126 - // delete, line 126 - slice_del(); - break; - case 3: - // (, line 127 - // <-, line 127 - slice_from("a"); - break; - case 4: - // (, line 128 - // <-, line 128 - slice_from("e"); - break; - } - return true; - } + // (, line 123 + // [, line 124 + ket = cursor; + // substring, line 124 + among_var = find_among_b(a_6, 6); + if (among_var == 0) + { + return false; + } + // ], line 124 + bra = cursor; + // call R1, line 124 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 125 + // delete, line 125 + slice_del(); + break; + case 2: + // (, line 126 + // delete, line 126 + slice_del(); + break; + case 3: + // (, line 127 + // <-, line 127 + slice_from("a"); + break; + case 4: + // (, line 128 + // <-, line 128 + slice_from("e"); + break; + } + return true; + } - private boolean r_factive() { + private boolean r_factive() { int among_var; - // (, line 132 - // [, line 133 - ket = cursor; - // substring, line 133 - among_var = find_among_b(a_7, 2); - if (among_var == 0) - { - return false; - } - // ], line 133 - bra = cursor; - // call R1, line 133 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 134 - // call double, line 134 - if (!r_double()) + // (, line 132 + // [, line 133 + ket = cursor; + // substring, line 133 + among_var = find_among_b(a_7, 2); + if (among_var == 0) { return false; } - break; - case 2: - // (, line 135 - // call double, line 135 - if (!r_double()) + // ], line 133 + bra = cursor; + // call R1, line 133 + if (!r_R1()) { return false; } - break; - } - // delete, line 137 - slice_del(); - // call undouble, line 138 - if (!r_undouble()) - { - return false; - } - return true; - } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 134 + // call double, line 134 + if (!r_double()) + { + return false; + } + break; + case 2: + // (, line 135 + // call double, line 135 + if (!r_double()) + { + return false; + } + break; + } + // delete, line 137 + slice_del(); + // call undouble, line 138 + if (!r_undouble()) + { + return false; + } + return true; + } - private boolean r_plural() { + private boolean r_plural() { int among_var; - // (, line 141 - // [, line 142 - ket = cursor; - // substring, line 142 - among_var = find_among_b(a_8, 7); - if (among_var == 0) - { - return false; - } - // ], line 142 - bra = cursor; - // call R1, line 142 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 143 - // <-, line 143 - slice_from("a"); - break; - case 2: - // (, line 144 - // <-, line 144 - slice_from("e"); - break; - case 3: - // (, line 145 - // delete, line 145 - slice_del(); - break; - case 4: - // (, line 146 - // delete, line 146 - slice_del(); - break; - case 5: - // (, line 147 - // delete, line 147 - slice_del(); - break; - case 6: - // (, line 148 - // delete, line 148 - slice_del(); - break; - case 7: - // (, line 149 - // delete, line 149 - slice_del(); - break; - } - return true; - } + // (, line 141 + // [, line 142 + ket = cursor; + // substring, line 142 + among_var = find_among_b(a_8, 7); + if (among_var == 0) + { + return false; + } + // ], line 142 + bra = cursor; + // call R1, line 142 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 143 + // <-, line 143 + slice_from("a"); + break; + case 2: + // (, line 144 + // <-, line 144 + slice_from("e"); + break; + case 3: + // (, line 145 + // delete, line 145 + slice_del(); + break; + case 4: + // (, line 146 + // delete, line 146 + slice_del(); + break; + case 5: + // (, line 147 + // delete, line 147 + slice_del(); + break; + case 6: + // (, line 148 + // delete, line 148 + slice_del(); + break; + case 7: + // (, line 149 + // delete, line 149 + slice_del(); + break; + } + return true; + } - private boolean r_owned() { + private boolean r_owned() { int among_var; - // (, line 153 - // [, line 154 - ket = cursor; - // substring, line 154 - among_var = find_among_b(a_9, 12); - if (among_var == 0) - { - return false; - } - // ], line 154 - bra = cursor; - // call R1, line 154 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 155 - // delete, line 155 - slice_del(); - break; - case 2: - // (, line 156 - // <-, line 156 - slice_from("e"); - break; - case 3: - // (, line 157 - // <-, line 157 - slice_from("a"); - break; - case 4: - // (, line 158 - // delete, line 158 - slice_del(); - break; - case 5: - // (, line 159 - // <-, line 159 - slice_from("e"); - break; - case 6: - // (, line 160 - // <-, line 160 - slice_from("a"); - break; - case 7: - // (, line 161 - // delete, line 161 - slice_del(); - break; - case 8: - // (, line 162 - // <-, line 162 - slice_from("e"); - break; - case 9: - // (, line 163 - // delete, line 163 - slice_del(); - break; - } - return true; - } + // (, line 153 + // [, line 154 + ket = cursor; + // substring, line 154 + among_var = find_among_b(a_9, 12); + if (among_var == 0) + { + return false; + } + // ], line 154 + bra = cursor; + // call R1, line 154 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 155 + // delete, line 155 + slice_del(); + break; + case 2: + // (, line 156 + // <-, line 156 + slice_from("e"); + break; + case 3: + // (, line 157 + // <-, line 157 + slice_from("a"); + break; + case 4: + // (, line 158 + // delete, line 158 + slice_del(); + break; + case 5: + // (, line 159 + // <-, line 159 + slice_from("e"); + break; + case 6: + // (, line 160 + // <-, line 160 + slice_from("a"); + break; + case 7: + // (, line 161 + // delete, line 161 + slice_del(); + break; + case 8: + // (, line 162 + // <-, line 162 + slice_from("e"); + break; + case 9: + // (, line 163 + // delete, line 163 + slice_del(); + break; + } + return true; + } - private boolean r_sing_owner() { + private boolean r_sing_owner() { int among_var; - // (, line 167 - // [, line 168 - ket = cursor; - // substring, line 168 - among_var = find_among_b(a_10, 31); - if (among_var == 0) - { - return false; - } - // ], line 168 - bra = cursor; - // call R1, line 168 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 169 - // delete, line 169 - slice_del(); - break; - case 2: - // (, line 170 - // <-, line 170 - slice_from("a"); - break; - case 3: - // (, line 171 - // <-, line 171 - slice_from("e"); - break; - case 4: - // (, line 172 - // delete, line 172 - slice_del(); - break; - case 5: - // (, line 173 - // <-, line 173 - slice_from("a"); - break; - case 6: - // (, line 174 - // <-, line 174 - slice_from("e"); - break; - case 7: - // (, line 175 - // delete, line 175 - slice_del(); - break; - case 8: - // (, line 176 - // delete, line 176 - slice_del(); - break; - case 9: - // (, line 177 - // delete, line 177 - slice_del(); - break; - case 10: - // (, line 178 - // <-, line 178 - slice_from("a"); - break; - case 11: - // (, line 179 - // <-, line 179 - slice_from("e"); - break; - case 12: - // (, line 180 - // delete, line 180 - slice_del(); - break; - case 13: - // (, line 181 - // delete, line 181 - slice_del(); - break; - case 14: - // (, line 182 - // <-, line 182 - slice_from("a"); - break; - case 15: - // (, line 183 - // <-, line 183 - slice_from("e"); - break; - case 16: - // (, line 184 - // delete, line 184 - slice_del(); - break; - case 17: - // (, line 185 - // delete, line 185 - slice_del(); - break; - case 18: - // (, line 186 - // delete, line 186 - slice_del(); - break; - case 19: - // (, line 187 - // <-, line 187 - slice_from("a"); - break; - case 20: - // (, line 188 - // <-, line 188 - slice_from("e"); - break; - } - return true; - } + // (, line 167 + // [, line 168 + ket = cursor; + // substring, line 168 + among_var = find_among_b(a_10, 31); + if (among_var == 0) + { + return false; + } + // ], line 168 + bra = cursor; + // call R1, line 168 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 169 + // delete, line 169 + slice_del(); + break; + case 2: + // (, line 170 + // <-, line 170 + slice_from("a"); + break; + case 3: + // (, line 171 + // <-, line 171 + slice_from("e"); + break; + case 4: + // (, line 172 + // delete, line 172 + slice_del(); + break; + case 5: + // (, line 173 + // <-, line 173 + slice_from("a"); + break; + case 6: + // (, line 174 + // <-, line 174 + slice_from("e"); + break; + case 7: + // (, line 175 + // delete, line 175 + slice_del(); + break; + case 8: + // (, line 176 + // delete, line 176 + slice_del(); + break; + case 9: + // (, line 177 + // delete, line 177 + slice_del(); + break; + case 10: + // (, line 178 + // <-, line 178 + slice_from("a"); + break; + case 11: + // (, line 179 + // <-, line 179 + slice_from("e"); + break; + case 12: + // (, line 180 + // delete, line 180 + slice_del(); + break; + case 13: + // (, line 181 + // delete, line 181 + slice_del(); + break; + case 14: + // (, line 182 + // <-, line 182 + slice_from("a"); + break; + case 15: + // (, line 183 + // <-, line 183 + slice_from("e"); + break; + case 16: + // (, line 184 + // delete, line 184 + slice_del(); + break; + case 17: + // (, line 185 + // delete, line 185 + slice_del(); + break; + case 18: + // (, line 186 + // delete, line 186 + slice_del(); + break; + case 19: + // (, line 187 + // <-, line 187 + slice_from("a"); + break; + case 20: + // (, line 188 + // <-, line 188 + slice_from("e"); + break; + } + return true; + } - private boolean r_plur_owner() { + private boolean r_plur_owner() { int among_var; - // (, line 192 - // [, line 193 - ket = cursor; - // substring, line 193 - among_var = find_among_b(a_11, 42); - if (among_var == 0) - { - return false; - } - // ], line 193 - bra = cursor; - // call R1, line 193 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 194 - // delete, line 194 - slice_del(); - break; - case 2: - // (, line 195 - // <-, line 195 - slice_from("a"); - break; - case 3: - // (, line 196 - // <-, line 196 - slice_from("e"); - break; - case 4: - // (, line 197 - // delete, line 197 - slice_del(); - break; - case 5: - // (, line 198 - // delete, line 198 - slice_del(); - break; - case 6: - // (, line 199 - // delete, line 199 - slice_del(); - break; - case 7: - // (, line 200 - // <-, line 200 - slice_from("a"); - break; - case 8: - // (, line 201 - // <-, line 201 - slice_from("e"); - break; - case 9: - // (, line 202 - // delete, line 202 - slice_del(); - break; - case 10: - // (, line 203 - // delete, line 203 - slice_del(); - break; - case 11: - // (, line 204 - // delete, line 204 - slice_del(); - break; - case 12: - // (, line 205 - // <-, line 205 - slice_from("a"); - break; - case 13: - // (, line 206 - // <-, line 206 - slice_from("e"); - break; - case 14: - // (, line 207 - // delete, line 207 - slice_del(); - break; - case 15: - // (, line 208 - // delete, line 208 - slice_del(); - break; - case 16: - // (, line 209 - // delete, line 209 - slice_del(); - break; - case 17: - // (, line 210 - // delete, line 210 - slice_del(); - break; - case 18: - // (, line 211 - // <-, line 211 - slice_from("a"); - break; - case 19: - // (, line 212 - // <-, line 212 - slice_from("e"); - break; - case 20: - // (, line 214 - // delete, line 214 - slice_del(); - break; - case 21: - // (, line 215 - // delete, line 215 - slice_del(); - break; - case 22: - // (, line 216 - // <-, line 216 - slice_from("a"); - break; - case 23: - // (, line 217 - // <-, line 217 - slice_from("e"); - break; - case 24: - // (, line 218 - // delete, line 218 - slice_del(); - break; - case 25: - // (, line 219 - // delete, line 219 - slice_del(); - break; - case 26: - // (, line 220 - // delete, line 220 - slice_del(); - break; - case 27: - // (, line 221 - // <-, line 221 - slice_from("a"); - break; - case 28: - // (, line 222 - // <-, line 222 - slice_from("e"); - break; - case 29: - // (, line 223 - // delete, line 223 - slice_del(); - break; - } - return true; - } + // (, line 192 + // [, line 193 + ket = cursor; + // substring, line 193 + among_var = find_among_b(a_11, 42); + if (among_var == 0) + { + return false; + } + // ], line 193 + bra = cursor; + // call R1, line 193 + if (!r_R1()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 194 + // delete, line 194 + slice_del(); + break; + case 2: + // (, line 195 + // <-, line 195 + slice_from("a"); + break; + case 3: + // (, line 196 + // <-, line 196 + slice_from("e"); + break; + case 4: + // (, line 197 + // delete, line 197 + slice_del(); + break; + case 5: + // (, line 198 + // delete, line 198 + slice_del(); + break; + case 6: + // (, line 199 + // delete, line 199 + slice_del(); + break; + case 7: + // (, line 200 + // <-, line 200 + slice_from("a"); + break; + case 8: + // (, line 201 + // <-, line 201 + slice_from("e"); + break; + case 9: + // (, line 202 + // delete, line 202 + slice_del(); + break; + case 10: + // (, line 203 + // delete, line 203 + slice_del(); + break; + case 11: + // (, line 204 + // delete, line 204 + slice_del(); + break; + case 12: + // (, line 205 + // <-, line 205 + slice_from("a"); + break; + case 13: + // (, line 206 + // <-, line 206 + slice_from("e"); + break; + case 14: + // (, line 207 + // delete, line 207 + slice_del(); + break; + case 15: + // (, line 208 + // delete, line 208 + slice_del(); + break; + case 16: + // (, line 209 + // delete, line 209 + slice_del(); + break; + case 17: + // (, line 210 + // delete, line 210 + slice_del(); + break; + case 18: + // (, line 211 + // <-, line 211 + slice_from("a"); + break; + case 19: + // (, line 212 + // <-, line 212 + slice_from("e"); + break; + case 20: + // (, line 214 + // delete, line 214 + slice_del(); + break; + case 21: + // (, line 215 + // delete, line 215 + slice_del(); + break; + case 22: + // (, line 216 + // <-, line 216 + slice_from("a"); + break; + case 23: + // (, line 217 + // <-, line 217 + slice_from("e"); + break; + case 24: + // (, line 218 + // delete, line 218 + slice_del(); + break; + case 25: + // (, line 219 + // delete, line 219 + slice_del(); + break; + case 26: + // (, line 220 + // delete, line 220 + slice_del(); + break; + case 27: + // (, line 221 + // <-, line 221 + slice_from("a"); + break; + case 28: + // (, line 222 + // <-, line 222 + slice_from("e"); + break; + case 29: + // (, line 223 + // delete, line 223 + slice_del(); + break; + } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -1047,112 +1055,124 @@ int v_8; int v_9; int v_10; - // (, line 228 - // do, line 229 - v_1 = cursor; - lab0: do { - // call mark_regions, line 229 - if (!r_mark_regions()) - { - break lab0; + // (, line 228 + // do, line 229 + v_1 = cursor; + lab0: do { + // call mark_regions, line 229 + if (!r_mark_regions()) + { + break lab0; + } + } while (false); + cursor = v_1; + // backwards, line 230 + limit_backward = cursor; cursor = limit; + // (, line 230 + // do, line 231 + v_2 = limit - cursor; + lab1: do { + // call instrum, line 231 + if (!r_instrum()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 232 + v_3 = limit - cursor; + lab2: do { + // call case, line 232 + if (!r_case()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 233 + v_4 = limit - cursor; + lab3: do { + // call case_special, line 233 + if (!r_case_special()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // do, line 234 + v_5 = limit - cursor; + lab4: do { + // call case_other, line 234 + if (!r_case_other()) + { + break lab4; + } + } while (false); + cursor = limit - v_5; + // do, line 235 + v_6 = limit - cursor; + lab5: do { + // call factive, line 235 + if (!r_factive()) + { + break lab5; + } + } while (false); + cursor = limit - v_6; + // do, line 236 + v_7 = limit - cursor; + lab6: do { + // call owned, line 236 + if (!r_owned()) + { + break lab6; + } + } while (false); + cursor = limit - v_7; + // do, line 237 + v_8 = limit - cursor; + lab7: do { + // call sing_owner, line 237 + if (!r_sing_owner()) + { + break lab7; + } + } while (false); + cursor = limit - v_8; + // do, line 238 + v_9 = limit - cursor; + lab8: do { + // call plur_owner, line 238 + if (!r_plur_owner()) + { + break lab8; + } + } while (false); + cursor = limit - v_9; + // do, line 239 + v_10 = limit - cursor; + lab9: do { + // call plural, line 239 + if (!r_plural()) + { + break lab9; + } + } while (false); + cursor = limit - v_10; + cursor = limit_backward; return true; } - } while (false); - cursor = v_1; - // backwards, line 230 - limit_backward = cursor; cursor = limit; - // (, line 230 - // do, line 231 - v_2 = limit - cursor; - lab1: do { - // call instrum, line 231 - if (!r_instrum()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 232 - v_3 = limit - cursor; - lab2: do { - // call case, line 232 - if (!r_case()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 233 - v_4 = limit - cursor; - lab3: do { - // call case_special, line 233 - if (!r_case_special()) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - // do, line 234 - v_5 = limit - cursor; - lab4: do { - // call case_other, line 234 - if (!r_case_other()) - { - break lab4; - } - } while (false); - cursor = limit - v_5; - // do, line 235 - v_6 = limit - cursor; - lab5: do { - // call factive, line 235 - if (!r_factive()) - { - break lab5; - } - } while (false); - cursor = limit - v_6; - // do, line 236 - v_7 = limit - cursor; - lab6: do { - // call owned, line 236 - if (!r_owned()) - { - break lab6; - } - } while (false); - cursor = limit - v_7; - // do, line 237 - v_8 = limit - cursor; - lab7: do { - // call sing_owner, line 237 - if (!r_sing_owner()) - { - break lab7; - } - } while (false); - cursor = limit - v_8; - // do, line 238 - v_9 = limit - cursor; - lab8: do { - // call plur_owner, line 238 - if (!r_plur_owner()) - { - break lab8; - } - } while (false); - cursor = limit - v_9; - // do, line 239 - v_10 = limit - cursor; - lab9: do { - // call plural, line 239 - if (!r_plural()) - { - break lab9; - } - } while (false); - cursor = limit - v_10; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof HungarianStemmer; } + @Override + public int hashCode() { + return HungarianStemmer.class.getName().hashCode(); + } + + + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/ext/IrishStemmer.java'. Fisheye: No comparison available. Pass `N' to diff? Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/ItalianStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/ItalianStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/ItalianStemmer.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/ItalianStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,1180 +1,1200 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class ItalianStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 7, "", this), - new Among ( "qu", 0, 6, "", this), - new Among ( "\u00E1", 0, 1, "", this), - new Among ( "\u00E9", 0, 2, "", this), - new Among ( "\u00ED", 0, 3, "", this), - new Among ( "\u00F3", 0, 4, "", this), - new Among ( "\u00FA", 0, 5, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "", -1, 3, "", this), - new Among ( "I", 0, 1, "", this), - new Among ( "U", 0, 2, "", this) - }; + private final static ItalianStemmer methodObject = new ItalianStemmer (); - private Among a_2[] = { - new Among ( "la", -1, -1, "", this), - new Among ( "cela", 0, -1, "", this), - new Among ( "gliela", 0, -1, "", this), - new Among ( "mela", 0, -1, "", this), - new Among ( "tela", 0, -1, "", this), - new Among ( "vela", 0, -1, "", this), - new Among ( "le", -1, -1, "", this), - new Among ( "cele", 6, -1, "", this), - new Among ( "gliele", 6, -1, "", this), - new Among ( "mele", 6, -1, "", this), - new Among ( "tele", 6, -1, "", this), - new Among ( "vele", 6, -1, "", this), - new Among ( "ne", -1, -1, "", this), - new Among ( "cene", 12, -1, "", this), - new Among ( "gliene", 12, -1, "", this), - new Among ( "mene", 12, -1, "", this), - new Among ( "sene", 12, -1, "", this), - new Among ( "tene", 12, -1, "", this), - new Among ( "vene", 12, -1, "", this), - new Among ( "ci", -1, -1, "", this), - new Among ( "li", -1, -1, "", this), - new Among ( "celi", 20, -1, "", this), - new Among ( "glieli", 20, -1, "", this), - new Among ( "meli", 20, -1, "", this), - new Among ( "teli", 20, -1, "", this), - new Among ( "veli", 20, -1, "", this), - new Among ( "gli", 20, -1, "", this), - new Among ( "mi", -1, -1, "", this), - new Among ( "si", -1, -1, "", this), - new Among ( "ti", -1, -1, "", this), - new Among ( "vi", -1, -1, "", this), - new Among ( "lo", -1, -1, "", this), - new Among ( "celo", 31, -1, "", this), - new Among ( "glielo", 31, -1, "", this), - new Among ( "melo", 31, -1, "", this), - new Among ( "telo", 31, -1, "", this), - new Among ( "velo", 31, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 7, "", methodObject ), + new Among ( "qu", 0, 6, "", methodObject ), + new Among ( "\u00E1", 0, 1, "", methodObject ), + new Among ( "\u00E9", 0, 2, "", methodObject ), + new Among ( "\u00ED", 0, 3, "", methodObject ), + new Among ( "\u00F3", 0, 4, "", methodObject ), + new Among ( "\u00FA", 0, 5, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ando", -1, 1, "", this), - new Among ( "endo", -1, 1, "", this), - new Among ( "ar", -1, 2, "", this), - new Among ( "er", -1, 2, "", this), - new Among ( "ir", -1, 2, "", this) - }; + private final static Among a_1[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "I", 0, 1, "", methodObject ), + new Among ( "U", 0, 2, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ic", -1, -1, "", this), - new Among ( "abil", -1, -1, "", this), - new Among ( "os", -1, -1, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_2[] = { + new Among ( "la", -1, -1, "", methodObject ), + new Among ( "cela", 0, -1, "", methodObject ), + new Among ( "gliela", 0, -1, "", methodObject ), + new Among ( "mela", 0, -1, "", methodObject ), + new Among ( "tela", 0, -1, "", methodObject ), + new Among ( "vela", 0, -1, "", methodObject ), + new Among ( "le", -1, -1, "", methodObject ), + new Among ( "cele", 6, -1, "", methodObject ), + new Among ( "gliele", 6, -1, "", methodObject ), + new Among ( "mele", 6, -1, "", methodObject ), + new Among ( "tele", 6, -1, "", methodObject ), + new Among ( "vele", 6, -1, "", methodObject ), + new Among ( "ne", -1, -1, "", methodObject ), + new Among ( "cene", 12, -1, "", methodObject ), + new Among ( "gliene", 12, -1, "", methodObject ), + new Among ( "mene", 12, -1, "", methodObject ), + new Among ( "sene", 12, -1, "", methodObject ), + new Among ( "tene", 12, -1, "", methodObject ), + new Among ( "vene", 12, -1, "", methodObject ), + new Among ( "ci", -1, -1, "", methodObject ), + new Among ( "li", -1, -1, "", methodObject ), + new Among ( "celi", 20, -1, "", methodObject ), + new Among ( "glieli", 20, -1, "", methodObject ), + new Among ( "meli", 20, -1, "", methodObject ), + new Among ( "teli", 20, -1, "", methodObject ), + new Among ( "veli", 20, -1, "", methodObject ), + new Among ( "gli", 20, -1, "", methodObject ), + new Among ( "mi", -1, -1, "", methodObject ), + new Among ( "si", -1, -1, "", methodObject ), + new Among ( "ti", -1, -1, "", methodObject ), + new Among ( "vi", -1, -1, "", methodObject ), + new Among ( "lo", -1, -1, "", methodObject ), + new Among ( "celo", 31, -1, "", methodObject ), + new Among ( "glielo", 31, -1, "", methodObject ), + new Among ( "melo", 31, -1, "", methodObject ), + new Among ( "telo", 31, -1, "", methodObject ), + new Among ( "velo", 31, -1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "ic", -1, 1, "", this), - new Among ( "abil", -1, 1, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_3[] = { + new Among ( "ando", -1, 1, "", methodObject ), + new Among ( "endo", -1, 1, "", methodObject ), + new Among ( "ar", -1, 2, "", methodObject ), + new Among ( "er", -1, 2, "", methodObject ), + new Among ( "ir", -1, 2, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "ica", -1, 1, "", this), - new Among ( "logia", -1, 3, "", this), - new Among ( "osa", -1, 1, "", this), - new Among ( "ista", -1, 1, "", this), - new Among ( "iva", -1, 9, "", this), - new Among ( "anza", -1, 1, "", this), - new Among ( "enza", -1, 5, "", this), - new Among ( "ice", -1, 1, "", this), - new Among ( "atrice", 7, 1, "", this), - new Among ( "iche", -1, 1, "", this), - new Among ( "logie", -1, 3, "", this), - new Among ( "abile", -1, 1, "", this), - new Among ( "ibile", -1, 1, "", this), - new Among ( "usione", -1, 4, "", this), - new Among ( "azione", -1, 2, "", this), - new Among ( "uzione", -1, 4, "", this), - new Among ( "atore", -1, 2, "", this), - new Among ( "ose", -1, 1, "", this), - new Among ( "ante", -1, 1, "", this), - new Among ( "mente", -1, 1, "", this), - new Among ( "amente", 19, 7, "", this), - new Among ( "iste", -1, 1, "", this), - new Among ( "ive", -1, 9, "", this), - new Among ( "anze", -1, 1, "", this), - new Among ( "enze", -1, 5, "", this), - new Among ( "ici", -1, 1, "", this), - new Among ( "atrici", 25, 1, "", this), - new Among ( "ichi", -1, 1, "", this), - new Among ( "abili", -1, 1, "", this), - new Among ( "ibili", -1, 1, "", this), - new Among ( "ismi", -1, 1, "", this), - new Among ( "usioni", -1, 4, "", this), - new Among ( "azioni", -1, 2, "", this), - new Among ( "uzioni", -1, 4, "", this), - new Among ( "atori", -1, 2, "", this), - new Among ( "osi", -1, 1, "", this), - new Among ( "anti", -1, 1, "", this), - new Among ( "amenti", -1, 6, "", this), - new Among ( "imenti", -1, 6, "", this), - new Among ( "isti", -1, 1, "", this), - new Among ( "ivi", -1, 9, "", this), - new Among ( "ico", -1, 1, "", this), - new Among ( "ismo", -1, 1, "", this), - new Among ( "oso", -1, 1, "", this), - new Among ( "amento", -1, 6, "", this), - new Among ( "imento", -1, 6, "", this), - new Among ( "ivo", -1, 9, "", this), - new Among ( "it\u00E0", -1, 8, "", this), - new Among ( "ist\u00E0", -1, 1, "", this), - new Among ( "ist\u00E8", -1, 1, "", this), - new Among ( "ist\u00EC", -1, 1, "", this) - }; + private final static Among a_4[] = { + new Among ( "ic", -1, -1, "", methodObject ), + new Among ( "abil", -1, -1, "", methodObject ), + new Among ( "os", -1, -1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "isca", -1, 1, "", this), - new Among ( "enda", -1, 1, "", this), - new Among ( "ata", -1, 1, "", this), - new Among ( "ita", -1, 1, "", this), - new Among ( "uta", -1, 1, "", this), - new Among ( "ava", -1, 1, "", this), - new Among ( "eva", -1, 1, "", this), - new Among ( "iva", -1, 1, "", this), - new Among ( "erebbe", -1, 1, "", this), - new Among ( "irebbe", -1, 1, "", this), - new Among ( "isce", -1, 1, "", this), - new Among ( "ende", -1, 1, "", this), - new Among ( "are", -1, 1, "", this), - new Among ( "ere", -1, 1, "", this), - new Among ( "ire", -1, 1, "", this), - new Among ( "asse", -1, 1, "", this), - new Among ( "ate", -1, 1, "", this), - new Among ( "avate", 16, 1, "", this), - new Among ( "evate", 16, 1, "", this), - new Among ( "ivate", 16, 1, "", this), - new Among ( "ete", -1, 1, "", this), - new Among ( "erete", 20, 1, "", this), - new Among ( "irete", 20, 1, "", this), - new Among ( "ite", -1, 1, "", this), - new Among ( "ereste", -1, 1, "", this), - new Among ( "ireste", -1, 1, "", this), - new Among ( "ute", -1, 1, "", this), - new Among ( "erai", -1, 1, "", this), - new Among ( "irai", -1, 1, "", this), - new Among ( "isci", -1, 1, "", this), - new Among ( "endi", -1, 1, "", this), - new Among ( "erei", -1, 1, "", this), - new Among ( "irei", -1, 1, "", this), - new Among ( "assi", -1, 1, "", this), - new Among ( "ati", -1, 1, "", this), - new Among ( "iti", -1, 1, "", this), - new Among ( "eresti", -1, 1, "", this), - new Among ( "iresti", -1, 1, "", this), - new Among ( "uti", -1, 1, "", this), - new Among ( "avi", -1, 1, "", this), - new Among ( "evi", -1, 1, "", this), - new Among ( "ivi", -1, 1, "", this), - new Among ( "isco", -1, 1, "", this), - new Among ( "ando", -1, 1, "", this), - new Among ( "endo", -1, 1, "", this), - new Among ( "Yamo", -1, 1, "", this), - new Among ( "iamo", -1, 1, "", this), - new Among ( "avamo", -1, 1, "", this), - new Among ( "evamo", -1, 1, "", this), - new Among ( "ivamo", -1, 1, "", this), - new Among ( "eremo", -1, 1, "", this), - new Among ( "iremo", -1, 1, "", this), - new Among ( "assimo", -1, 1, "", this), - new Among ( "ammo", -1, 1, "", this), - new Among ( "emmo", -1, 1, "", this), - new Among ( "eremmo", 54, 1, "", this), - new Among ( "iremmo", 54, 1, "", this), - new Among ( "immo", -1, 1, "", this), - new Among ( "ano", -1, 1, "", this), - new Among ( "iscano", 58, 1, "", this), - new Among ( "avano", 58, 1, "", this), - new Among ( "evano", 58, 1, "", this), - new Among ( "ivano", 58, 1, "", this), - new Among ( "eranno", -1, 1, "", this), - new Among ( "iranno", -1, 1, "", this), - new Among ( "ono", -1, 1, "", this), - new Among ( "iscono", 65, 1, "", this), - new Among ( "arono", 65, 1, "", this), - new Among ( "erono", 65, 1, "", this), - new Among ( "irono", 65, 1, "", this), - new Among ( "erebbero", -1, 1, "", this), - new Among ( "irebbero", -1, 1, "", this), - new Among ( "assero", -1, 1, "", this), - new Among ( "essero", -1, 1, "", this), - new Among ( "issero", -1, 1, "", this), - new Among ( "ato", -1, 1, "", this), - new Among ( "ito", -1, 1, "", this), - new Among ( "uto", -1, 1, "", this), - new Among ( "avo", -1, 1, "", this), - new Among ( "evo", -1, 1, "", this), - new Among ( "ivo", -1, 1, "", this), - new Among ( "ar", -1, 1, "", this), - new Among ( "ir", -1, 1, "", this), - new Among ( "er\u00E0", -1, 1, "", this), - new Among ( "ir\u00E0", -1, 1, "", this), - new Among ( "er\u00F2", -1, 1, "", this), - new Among ( "ir\u00F2", -1, 1, "", this) - }; + private final static Among a_5[] = { + new Among ( "ic", -1, 1, "", methodObject ), + new Among ( "abil", -1, 1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; + private final static Among a_6[] = { + new Among ( "ica", -1, 1, "", methodObject ), + new Among ( "logia", -1, 3, "", methodObject ), + new Among ( "osa", -1, 1, "", methodObject ), + new Among ( "ista", -1, 1, "", methodObject ), + new Among ( "iva", -1, 9, "", methodObject ), + new Among ( "anza", -1, 1, "", methodObject ), + new Among ( "enza", -1, 5, "", methodObject ), + new Among ( "ice", -1, 1, "", methodObject ), + new Among ( "atrice", 7, 1, "", methodObject ), + new Among ( "iche", -1, 1, "", methodObject ), + new Among ( "logie", -1, 3, "", methodObject ), + new Among ( "abile", -1, 1, "", methodObject ), + new Among ( "ibile", -1, 1, "", methodObject ), + new Among ( "usione", -1, 4, "", methodObject ), + new Among ( "azione", -1, 2, "", methodObject ), + new Among ( "uzione", -1, 4, "", methodObject ), + new Among ( "atore", -1, 2, "", methodObject ), + new Among ( "ose", -1, 1, "", methodObject ), + new Among ( "ante", -1, 1, "", methodObject ), + new Among ( "mente", -1, 1, "", methodObject ), + new Among ( "amente", 19, 7, "", methodObject ), + new Among ( "iste", -1, 1, "", methodObject ), + new Among ( "ive", -1, 9, "", methodObject ), + new Among ( "anze", -1, 1, "", methodObject ), + new Among ( "enze", -1, 5, "", methodObject ), + new Among ( "ici", -1, 1, "", methodObject ), + new Among ( "atrici", 25, 1, "", methodObject ), + new Among ( "ichi", -1, 1, "", methodObject ), + new Among ( "abili", -1, 1, "", methodObject ), + new Among ( "ibili", -1, 1, "", methodObject ), + new Among ( "ismi", -1, 1, "", methodObject ), + new Among ( "usioni", -1, 4, "", methodObject ), + new Among ( "azioni", -1, 2, "", methodObject ), + new Among ( "uzioni", -1, 4, "", methodObject ), + new Among ( "atori", -1, 2, "", methodObject ), + new Among ( "osi", -1, 1, "", methodObject ), + new Among ( "anti", -1, 1, "", methodObject ), + new Among ( "amenti", -1, 6, "", methodObject ), + new Among ( "imenti", -1, 6, "", methodObject ), + new Among ( "isti", -1, 1, "", methodObject ), + new Among ( "ivi", -1, 9, "", methodObject ), + new Among ( "ico", -1, 1, "", methodObject ), + new Among ( "ismo", -1, 1, "", methodObject ), + new Among ( "oso", -1, 1, "", methodObject ), + new Among ( "amento", -1, 6, "", methodObject ), + new Among ( "imento", -1, 6, "", methodObject ), + new Among ( "ivo", -1, 9, "", methodObject ), + new Among ( "it\u00E0", -1, 8, "", methodObject ), + new Among ( "ist\u00E0", -1, 1, "", methodObject ), + new Among ( "ist\u00E8", -1, 1, "", methodObject ), + new Among ( "ist\u00EC", -1, 1, "", methodObject ) + }; - private static final char g_AEIO[] = {17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; + private final static Among a_7[] = { + new Among ( "isca", -1, 1, "", methodObject ), + new Among ( "enda", -1, 1, "", methodObject ), + new Among ( "ata", -1, 1, "", methodObject ), + new Among ( "ita", -1, 1, "", methodObject ), + new Among ( "uta", -1, 1, "", methodObject ), + new Among ( "ava", -1, 1, "", methodObject ), + new Among ( "eva", -1, 1, "", methodObject ), + new Among ( "iva", -1, 1, "", methodObject ), + new Among ( "erebbe", -1, 1, "", methodObject ), + new Among ( "irebbe", -1, 1, "", methodObject ), + new Among ( "isce", -1, 1, "", methodObject ), + new Among ( "ende", -1, 1, "", methodObject ), + new Among ( "are", -1, 1, "", methodObject ), + new Among ( "ere", -1, 1, "", methodObject ), + new Among ( "ire", -1, 1, "", methodObject ), + new Among ( "asse", -1, 1, "", methodObject ), + new Among ( "ate", -1, 1, "", methodObject ), + new Among ( "avate", 16, 1, "", methodObject ), + new Among ( "evate", 16, 1, "", methodObject ), + new Among ( "ivate", 16, 1, "", methodObject ), + new Among ( "ete", -1, 1, "", methodObject ), + new Among ( "erete", 20, 1, "", methodObject ), + new Among ( "irete", 20, 1, "", methodObject ), + new Among ( "ite", -1, 1, "", methodObject ), + new Among ( "ereste", -1, 1, "", methodObject ), + new Among ( "ireste", -1, 1, "", methodObject ), + new Among ( "ute", -1, 1, "", methodObject ), + new Among ( "erai", -1, 1, "", methodObject ), + new Among ( "irai", -1, 1, "", methodObject ), + new Among ( "isci", -1, 1, "", methodObject ), + new Among ( "endi", -1, 1, "", methodObject ), + new Among ( "erei", -1, 1, "", methodObject ), + new Among ( "irei", -1, 1, "", methodObject ), + new Among ( "assi", -1, 1, "", methodObject ), + new Among ( "ati", -1, 1, "", methodObject ), + new Among ( "iti", -1, 1, "", methodObject ), + new Among ( "eresti", -1, 1, "", methodObject ), + new Among ( "iresti", -1, 1, "", methodObject ), + new Among ( "uti", -1, 1, "", methodObject ), + new Among ( "avi", -1, 1, "", methodObject ), + new Among ( "evi", -1, 1, "", methodObject ), + new Among ( "ivi", -1, 1, "", methodObject ), + new Among ( "isco", -1, 1, "", methodObject ), + new Among ( "ando", -1, 1, "", methodObject ), + new Among ( "endo", -1, 1, "", methodObject ), + new Among ( "Yamo", -1, 1, "", methodObject ), + new Among ( "iamo", -1, 1, "", methodObject ), + new Among ( "avamo", -1, 1, "", methodObject ), + new Among ( "evamo", -1, 1, "", methodObject ), + new Among ( "ivamo", -1, 1, "", methodObject ), + new Among ( "eremo", -1, 1, "", methodObject ), + new Among ( "iremo", -1, 1, "", methodObject ), + new Among ( "assimo", -1, 1, "", methodObject ), + new Among ( "ammo", -1, 1, "", methodObject ), + new Among ( "emmo", -1, 1, "", methodObject ), + new Among ( "eremmo", 54, 1, "", methodObject ), + new Among ( "iremmo", 54, 1, "", methodObject ), + new Among ( "immo", -1, 1, "", methodObject ), + new Among ( "ano", -1, 1, "", methodObject ), + new Among ( "iscano", 58, 1, "", methodObject ), + new Among ( "avano", 58, 1, "", methodObject ), + new Among ( "evano", 58, 1, "", methodObject ), + new Among ( "ivano", 58, 1, "", methodObject ), + new Among ( "eranno", -1, 1, "", methodObject ), + new Among ( "iranno", -1, 1, "", methodObject ), + new Among ( "ono", -1, 1, "", methodObject ), + new Among ( "iscono", 65, 1, "", methodObject ), + new Among ( "arono", 65, 1, "", methodObject ), + new Among ( "erono", 65, 1, "", methodObject ), + new Among ( "irono", 65, 1, "", methodObject ), + new Among ( "erebbero", -1, 1, "", methodObject ), + new Among ( "irebbero", -1, 1, "", methodObject ), + new Among ( "assero", -1, 1, "", methodObject ), + new Among ( "essero", -1, 1, "", methodObject ), + new Among ( "issero", -1, 1, "", methodObject ), + new Among ( "ato", -1, 1, "", methodObject ), + new Among ( "ito", -1, 1, "", methodObject ), + new Among ( "uto", -1, 1, "", methodObject ), + new Among ( "avo", -1, 1, "", methodObject ), + new Among ( "evo", -1, 1, "", methodObject ), + new Among ( "ivo", -1, 1, "", methodObject ), + new Among ( "ar", -1, 1, "", methodObject ), + new Among ( "ir", -1, 1, "", methodObject ), + new Among ( "er\u00E0", -1, 1, "", methodObject ), + new Among ( "ir\u00E0", -1, 1, "", methodObject ), + new Among ( "er\u00F2", -1, 1, "", methodObject ), + new Among ( "ir\u00F2", -1, 1, "", methodObject ) + }; - private static final char g_CG[] = {17 }; + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2, 1 }; + private static final char g_AEIO[] = {17, 65, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128, 128, 8, 2 }; + + private static final char g_CG[] = {17 }; + private int I_p2; private int I_p1; private int I_pV; - private void copy_from(ItalianStemmer other) { - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_pV = other.I_pV; - super.copy_from(other); - } + private void copy_from(ItalianStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int among_var; int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 34 - // test, line 35 - v_1 = cursor; - // repeat, line 35 - replab0: while(true) - { - v_2 = cursor; - lab1: do { - // (, line 35 - // [, line 36 - bra = cursor; - // substring, line 36 - among_var = find_among(a_0, 7); - if (among_var == 0) + // (, line 34 + // test, line 35 + v_1 = cursor; + // repeat, line 35 + replab0: while(true) { - break lab1; - } - // ], line 36 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 37 - // <-, line 37 - slice_from("\u00E0"); - break; - case 2: - // (, line 38 - // <-, line 38 - slice_from("\u00E8"); - break; - case 3: - // (, line 39 - // <-, line 39 - slice_from("\u00EC"); - break; - case 4: - // (, line 40 - // <-, line 40 - slice_from("\u00F2"); - break; - case 5: - // (, line 41 - // <-, line 41 - slice_from("\u00F9"); - break; - case 6: - // (, line 42 - // <-, line 42 - slice_from("qU"); - break; - case 7: - // (, line 43 - // next, line 43 - if (cursor >= limit) + v_2 = cursor; + lab1: do { + // (, line 35 + // [, line 36 + bra = cursor; + // substring, line 36 + among_var = find_among(a_0, 7); + if (among_var == 0) { break lab1; } - cursor++; - break; + // ], line 36 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 37 + // <-, line 37 + slice_from("\u00E0"); + break; + case 2: + // (, line 38 + // <-, line 38 + slice_from("\u00E8"); + break; + case 3: + // (, line 39 + // <-, line 39 + slice_from("\u00EC"); + break; + case 4: + // (, line 40 + // <-, line 40 + slice_from("\u00F2"); + break; + case 5: + // (, line 41 + // <-, line 41 + slice_from("\u00F9"); + break; + case 6: + // (, line 42 + // <-, line 42 + slice_from("qU"); + break; + case 7: + // (, line 43 + // next, line 43 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_2; + break replab0; } - continue replab0; - } while (false); - cursor = v_2; - break replab0; - } - cursor = v_1; - // repeat, line 46 - replab2: while(true) - { - v_3 = cursor; - lab3: do { - // goto, line 46 - golab4: while(true) + cursor = v_1; + // repeat, line 46 + replab2: while(true) { - v_4 = cursor; - lab5: do { - // (, line 46 - if (!(in_grouping(g_v, 97, 249))) + v_3 = cursor; + lab3: do { + // goto, line 46 + golab4: while(true) { - break lab5; - } - // [, line 47 - bra = cursor; - // or, line 47 - lab6: do { - v_5 = cursor; - lab7: do { - // (, line 47 - // literal, line 47 - if (!(eq_s(1, "u"))) - { - break lab7; - } - // ], line 47 - ket = cursor; + v_4 = cursor; + lab5: do { + // (, line 46 if (!(in_grouping(g_v, 97, 249))) { - break lab7; + break lab5; } - // <-, line 47 - slice_from("U"); - break lab6; + // [, line 47 + bra = cursor; + // or, line 47 + lab6: do { + v_5 = cursor; + lab7: do { + // (, line 47 + // literal, line 47 + if (!(eq_s(1, "u"))) + { + break lab7; + } + // ], line 47 + ket = cursor; + if (!(in_grouping(g_v, 97, 249))) + { + break lab7; + } + // <-, line 47 + slice_from("U"); + break lab6; + } while (false); + cursor = v_5; + // (, line 48 + // literal, line 48 + if (!(eq_s(1, "i"))) + { + break lab5; + } + // ], line 48 + ket = cursor; + if (!(in_grouping(g_v, 97, 249))) + { + break lab5; + } + // <-, line 48 + slice_from("I"); + } while (false); + cursor = v_4; + break golab4; } while (false); - cursor = v_5; - // (, line 48 - // literal, line 48 - if (!(eq_s(1, "i"))) + cursor = v_4; + if (cursor >= limit) { - break lab5; + break lab3; } - // ], line 48 - ket = cursor; - if (!(in_grouping(g_v, 97, 249))) - { - break lab5; - } - // <-, line 48 - slice_from("I"); - } while (false); - cursor = v_4; - break golab4; + cursor++; + } + continue replab2; } while (false); - cursor = v_4; - if (cursor >= limit) - { - break lab3; - } - cursor++; + cursor = v_3; + break replab2; } - continue replab2; - } while (false); - cursor = v_3; - break replab2; - } - return true; - } + return true; + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; int v_3; int v_6; int v_8; - // (, line 52 - I_pV = limit; - I_p1 = limit; - I_p2 = limit; - // do, line 58 - v_1 = cursor; - lab0: do { - // (, line 58 - // or, line 60 - lab1: do { - v_2 = cursor; - lab2: do { - // (, line 59 - if (!(in_grouping(g_v, 97, 249))) - { - break lab2; - } - // or, line 59 - lab3: do { - v_3 = cursor; - lab4: do { + // (, line 52 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 58 + v_1 = cursor; + lab0: do { + // (, line 58 + // or, line 60 + lab1: do { + v_2 = cursor; + lab2: do { // (, line 59 - if (!(out_grouping(g_v, 97, 249))) + if (!(in_grouping(g_v, 97, 249))) { - break lab4; + break lab2; } - // gopast, line 59 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 249))) + // or, line 59 + lab3: do { + v_3 = cursor; + lab4: do { + // (, line 59 + if (!(out_grouping(g_v, 97, 249))) { - break lab6; + break lab4; } - break golab5; + // gopast, line 59 + golab5: while(true) + { + lab6: do { + if (!(in_grouping(g_v, 97, 249))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab4; + } + cursor++; + } + break lab3; } while (false); - if (cursor >= limit) + cursor = v_3; + // (, line 59 + if (!(in_grouping(g_v, 97, 249))) { - break lab4; + break lab2; } - cursor++; - } - break lab3; + // gopast, line 59 + golab7: while(true) + { + lab8: do { + if (!(out_grouping(g_v, 97, 249))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab2; + } + cursor++; + } + } while (false); + break lab1; } while (false); - cursor = v_3; - // (, line 59 - if (!(in_grouping(g_v, 97, 249))) + cursor = v_2; + // (, line 61 + if (!(out_grouping(g_v, 97, 249))) { - break lab2; + break lab0; } - // gopast, line 59 - golab7: while(true) - { - lab8: do { + // or, line 61 + lab9: do { + v_6 = cursor; + lab10: do { + // (, line 61 if (!(out_grouping(g_v, 97, 249))) { - break lab8; + break lab10; } - break golab7; + // gopast, line 61 + golab11: while(true) + { + lab12: do { + if (!(in_grouping(g_v, 97, 249))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab10; + } + cursor++; + } + break lab9; } while (false); + cursor = v_6; + // (, line 61 + if (!(in_grouping(g_v, 97, 249))) + { + break lab0; + } + // next, line 61 if (cursor >= limit) { - break lab2; + break lab0; } cursor++; - } + } while (false); } while (false); - break lab1; + // setmark pV, line 62 + I_pV = cursor; } while (false); - cursor = v_2; - // (, line 61 - if (!(out_grouping(g_v, 97, 249))) - { - break lab0; - } - // or, line 61 - lab9: do { - v_6 = cursor; - lab10: do { - // (, line 61 - if (!(out_grouping(g_v, 97, 249))) + cursor = v_1; + // do, line 64 + v_8 = cursor; + lab13: do { + // (, line 64 + // gopast, line 65 + golab14: while(true) + { + lab15: do { + if (!(in_grouping(g_v, 97, 249))) + { + break lab15; + } + break golab14; + } while (false); + if (cursor >= limit) { - break lab10; + break lab13; } - // gopast, line 61 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 249))) - { - break lab12; - } - break golab11; - } while (false); - if (cursor >= limit) + cursor++; + } + // gopast, line 65 + golab16: while(true) + { + lab17: do { + if (!(out_grouping(g_v, 97, 249))) { - break lab10; + break lab17; } - cursor++; + break golab16; + } while (false); + if (cursor >= limit) + { + break lab13; } - break lab9; - } while (false); - cursor = v_6; - // (, line 61 - if (!(in_grouping(g_v, 97, 249))) - { - break lab0; + cursor++; } - // next, line 61 - if (cursor >= limit) + // setmark p1, line 65 + I_p1 = cursor; + // gopast, line 66 + golab18: while(true) { - break lab0; + lab19: do { + if (!(in_grouping(g_v, 97, 249))) + { + break lab19; + } + break golab18; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - cursor++; - } while (false); - } while (false); - // setmark pV, line 62 - I_pV = cursor; - } while (false); - cursor = v_1; - // do, line 64 - v_8 = cursor; - lab13: do { - // (, line 64 - // gopast, line 65 - golab14: while(true) - { - lab15: do { - if (!(in_grouping(g_v, 97, 249))) + // gopast, line 66 + golab20: while(true) { - break lab15; + lab21: do { + if (!(out_grouping(g_v, 97, 249))) + { + break lab21; + } + break golab20; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - break golab14; + // setmark p2, line 66 + I_p2 = cursor; } while (false); - if (cursor >= limit) + cursor = v_8; + return true; + } + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 70 + replab0: while(true) { - break lab13; + v_1 = cursor; + lab1: do { + // (, line 70 + // [, line 72 + bra = cursor; + // substring, line 72 + among_var = find_among(a_1, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 72 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 73 + // <-, line 73 + slice_from("i"); + break; + case 2: + // (, line 74 + // <-, line 74 + slice_from("u"); + break; + case 3: + // (, line 75 + // next, line 75 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - cursor++; + return true; } - // gopast, line 65 - golab16: while(true) - { - lab17: do { - if (!(out_grouping(g_v, 97, 249))) - { - break lab17; - } - break golab16; - } while (false); - if (cursor >= limit) + + private boolean r_RV() { + if (!(I_pV <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p1, line 65 - I_p1 = cursor; - // gopast, line 66 - golab18: while(true) - { - lab19: do { - if (!(in_grouping(g_v, 97, 249))) - { - break lab19; - } - break golab18; - } while (false); - if (cursor >= limit) + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // gopast, line 66 - golab20: while(true) - { - lab21: do { - if (!(out_grouping(g_v, 97, 249))) - { - break lab21; - } - break golab20; - } while (false); - if (cursor >= limit) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p2, line 66 - I_p2 = cursor; - } while (false); - cursor = v_8; - return true; - } - private boolean r_postlude() { + private boolean r_attached_pronoun() { int among_var; - int v_1; - // repeat, line 70 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 70 - // [, line 72 + // (, line 86 + // [, line 87 + ket = cursor; + // substring, line 87 + if (find_among_b(a_2, 37) == 0) + { + return false; + } + // ], line 87 bra = cursor; - // substring, line 72 - among_var = find_among(a_1, 3); + // among, line 97 + among_var = find_among_b(a_3, 5); if (among_var == 0) { - break lab1; + return false; } - // ], line 72 + // (, line 97 + // call RV, line 97 + if (!r_RV()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 98 + // delete, line 98 + slice_del(); + break; + case 2: + // (, line 99 + // <-, line 99 + slice_from("e"); + break; + } + return true; + } + + private boolean r_standard_suffix() { + int among_var; + int v_1; + int v_2; + int v_3; + int v_4; + // (, line 103 + // [, line 104 ket = cursor; + // substring, line 104 + among_var = find_among_b(a_6, 51); + if (among_var == 0) + { + return false; + } + // ], line 104 + bra = cursor; switch(among_var) { case 0: - break lab1; + return false; case 1: - // (, line 73 - // <-, line 73 - slice_from("i"); + // (, line 111 + // call R2, line 111 + if (!r_R2()) + { + return false; + } + // delete, line 111 + slice_del(); break; case 2: - // (, line 74 - // <-, line 74 - slice_from("u"); + // (, line 113 + // call R2, line 113 + if (!r_R2()) + { + return false; + } + // delete, line 113 + slice_del(); + // try, line 114 + v_1 = limit - cursor; + lab0: do { + // (, line 114 + // [, line 114 + ket = cursor; + // literal, line 114 + if (!(eq_s_b(2, "ic"))) + { + cursor = limit - v_1; + break lab0; + } + // ], line 114 + bra = cursor; + // call R2, line 114 + if (!r_R2()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 114 + slice_del(); + } while (false); break; case 3: - // (, line 75 - // next, line 75 - if (cursor >= limit) + // (, line 117 + // call R2, line 117 + if (!r_R2()) { - break lab1; + return false; } - cursor++; + // <-, line 117 + slice_from("log"); break; + case 4: + // (, line 119 + // call R2, line 119 + if (!r_R2()) + { + return false; + } + // <-, line 119 + slice_from("u"); + break; + case 5: + // (, line 121 + // call R2, line 121 + if (!r_R2()) + { + return false; + } + // <-, line 121 + slice_from("ente"); + break; + case 6: + // (, line 123 + // call RV, line 123 + if (!r_RV()) + { + return false; + } + // delete, line 123 + slice_del(); + break; + case 7: + // (, line 124 + // call R1, line 125 + if (!r_R1()) + { + return false; + } + // delete, line 125 + slice_del(); + // try, line 126 + v_2 = limit - cursor; + lab1: do { + // (, line 126 + // [, line 127 + ket = cursor; + // substring, line 127 + among_var = find_among_b(a_4, 4); + if (among_var == 0) + { + cursor = limit - v_2; + break lab1; + } + // ], line 127 + bra = cursor; + // call R2, line 127 + if (!r_R2()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 127 + slice_del(); + switch(among_var) { + case 0: + cursor = limit - v_2; + break lab1; + case 1: + // (, line 128 + // [, line 128 + ket = cursor; + // literal, line 128 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_2; + break lab1; + } + // ], line 128 + bra = cursor; + // call R2, line 128 + if (!r_R2()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 128 + slice_del(); + break; + } + } while (false); + break; + case 8: + // (, line 133 + // call R2, line 134 + if (!r_R2()) + { + return false; + } + // delete, line 134 + slice_del(); + // try, line 135 + v_3 = limit - cursor; + lab2: do { + // (, line 135 + // [, line 136 + ket = cursor; + // substring, line 136 + among_var = find_among_b(a_5, 3); + if (among_var == 0) + { + cursor = limit - v_3; + break lab2; + } + // ], line 136 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_3; + break lab2; + case 1: + // (, line 137 + // call R2, line 137 + if (!r_R2()) + { + cursor = limit - v_3; + break lab2; + } + // delete, line 137 + slice_del(); + break; + } + } while (false); + break; + case 9: + // (, line 141 + // call R2, line 142 + if (!r_R2()) + { + return false; + } + // delete, line 142 + slice_del(); + // try, line 143 + v_4 = limit - cursor; + lab3: do { + // (, line 143 + // [, line 143 + ket = cursor; + // literal, line 143 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_4; + break lab3; + } + // ], line 143 + bra = cursor; + // call R2, line 143 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 143 + slice_del(); + // [, line 143 + ket = cursor; + // literal, line 143 + if (!(eq_s_b(2, "ic"))) + { + cursor = limit - v_4; + break lab3; + } + // ], line 143 + bra = cursor; + // call R2, line 143 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 143 + slice_del(); + } while (false); + break; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_RV() { - if (!(I_pV <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_attached_pronoun() { + private boolean r_verb_suffix() { int among_var; - // (, line 86 - // [, line 87 - ket = cursor; - // substring, line 87 - if (find_among_b(a_2, 37) == 0) - { - return false; - } - // ], line 87 - bra = cursor; - // among, line 97 - among_var = find_among_b(a_3, 5); - if (among_var == 0) - { - return false; - } - // (, line 97 - // call RV, line 97 - if (!r_RV()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 98 - // delete, line 98 - slice_del(); - break; - case 2: - // (, line 99 - // <-, line 99 - slice_from("e"); - break; - } - return true; - } - - private boolean r_standard_suffix() { - int among_var; int v_1; int v_2; - int v_3; - int v_4; - // (, line 103 - // [, line 104 - ket = cursor; - // substring, line 104 - among_var = find_among_b(a_6, 51); - if (among_var == 0) - { - return false; - } - // ], line 104 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 111 - // call R2, line 111 - if (!r_R2()) + // setlimit, line 148 + v_1 = limit - cursor; + // tomark, line 148 + if (cursor < I_pV) { return false; } - // delete, line 111 - slice_del(); - break; - case 2: - // (, line 113 - // call R2, line 113 - if (!r_R2()) + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 148 + // [, line 149 + ket = cursor; + // substring, line 149 + among_var = find_among_b(a_7, 87); + if (among_var == 0) { + limit_backward = v_2; return false; } - // delete, line 113 - slice_del(); - // try, line 114 + // ], line 149 + bra = cursor; + switch(among_var) { + case 0: + limit_backward = v_2; + return false; + case 1: + // (, line 163 + // delete, line 163 + slice_del(); + break; + } + limit_backward = v_2; + return true; + } + + private boolean r_vowel_suffix() { + int v_1; + int v_2; + // (, line 170 + // try, line 171 v_1 = limit - cursor; lab0: do { - // (, line 114 - // [, line 114 + // (, line 171 + // [, line 172 ket = cursor; - // literal, line 114 - if (!(eq_s_b(2, "ic"))) + if (!(in_grouping_b(g_AEIO, 97, 242))) { cursor = limit - v_1; break lab0; } - // ], line 114 + // ], line 172 bra = cursor; - // call R2, line 114 - if (!r_R2()) + // call RV, line 172 + if (!r_RV()) { cursor = limit - v_1; break lab0; } - // delete, line 114 + // delete, line 172 slice_del(); + // [, line 173 + ket = cursor; + // literal, line 173 + if (!(eq_s_b(1, "i"))) + { + cursor = limit - v_1; + break lab0; + } + // ], line 173 + bra = cursor; + // call RV, line 173 + if (!r_RV()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 173 + slice_del(); } while (false); - break; - case 3: - // (, line 117 - // call R2, line 117 - if (!r_R2()) - { - return false; - } - // <-, line 117 - slice_from("log"); - break; - case 4: - // (, line 119 - // call R2, line 119 - if (!r_R2()) - { - return false; - } - // <-, line 119 - slice_from("u"); - break; - case 5: - // (, line 121 - // call R2, line 121 - if (!r_R2()) - { - return false; - } - // <-, line 121 - slice_from("ente"); - break; - case 6: - // (, line 123 - // call RV, line 123 - if (!r_RV()) - { - return false; - } - // delete, line 123 - slice_del(); - break; - case 7: - // (, line 124 - // call R1, line 125 - if (!r_R1()) - { - return false; - } - // delete, line 125 - slice_del(); - // try, line 126 + // try, line 175 v_2 = limit - cursor; lab1: do { - // (, line 126 - // [, line 127 + // (, line 175 + // [, line 176 ket = cursor; - // substring, line 127 - among_var = find_among_b(a_4, 4); - if (among_var == 0) + // literal, line 176 + if (!(eq_s_b(1, "h"))) { cursor = limit - v_2; break lab1; } - // ], line 127 + // ], line 176 bra = cursor; - // call R2, line 127 - if (!r_R2()) + if (!(in_grouping_b(g_CG, 99, 103))) { cursor = limit - v_2; break lab1; } - // delete, line 127 + // call RV, line 176 + if (!r_RV()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 176 slice_del(); - switch(among_var) { - case 0: - cursor = limit - v_2; - break lab1; - case 1: - // (, line 128 - // [, line 128 - ket = cursor; - // literal, line 128 - if (!(eq_s_b(2, "at"))) - { - cursor = limit - v_2; - break lab1; - } - // ], line 128 - bra = cursor; - // call R2, line 128 - if (!r_R2()) - { - cursor = limit - v_2; - break lab1; - } - // delete, line 128 - slice_del(); - break; + } while (false); + return true; + } + + @Override + public boolean stem() { + int v_1; + int v_2; + int v_3; + int v_4; + int v_5; + int v_6; + int v_7; + // (, line 181 + // do, line 182 + v_1 = cursor; + lab0: do { + // call prelude, line 182 + if (!r_prelude()) + { + break lab0; } } while (false); - break; - case 8: - // (, line 133 - // call R2, line 134 - if (!r_R2()) - { - return false; - } - // delete, line 134 - slice_del(); - // try, line 135 + cursor = v_1; + // do, line 183 + v_2 = cursor; + lab1: do { + // call mark_regions, line 183 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 184 + limit_backward = cursor; cursor = limit; + // (, line 184 + // do, line 185 v_3 = limit - cursor; lab2: do { - // (, line 135 - // [, line 136 - ket = cursor; - // substring, line 136 - among_var = find_among_b(a_5, 3); - if (among_var == 0) + // call attached_pronoun, line 185 + if (!r_attached_pronoun()) { - cursor = limit - v_3; break lab2; } - // ], line 136 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_3; - break lab2; - case 1: - // (, line 137 - // call R2, line 137 - if (!r_R2()) - { - cursor = limit - v_3; - break lab2; - } - // delete, line 137 - slice_del(); - break; - } } while (false); - break; - case 9: - // (, line 141 - // call R2, line 142 - if (!r_R2()) - { - return false; - } - // delete, line 142 - slice_del(); - // try, line 143 + cursor = limit - v_3; + // do, line 186 v_4 = limit - cursor; lab3: do { - // (, line 143 - // [, line 143 - ket = cursor; - // literal, line 143 - if (!(eq_s_b(2, "at"))) + // (, line 186 + // or, line 186 + lab4: do { + v_5 = limit - cursor; + lab5: do { + // call standard_suffix, line 186 + if (!r_standard_suffix()) + { + break lab5; + } + break lab4; + } while (false); + cursor = limit - v_5; + // call verb_suffix, line 186 + if (!r_verb_suffix()) + { + break lab3; + } + } while (false); + } while (false); + cursor = limit - v_4; + // do, line 187 + v_6 = limit - cursor; + lab6: do { + // call vowel_suffix, line 187 + if (!r_vowel_suffix()) { - cursor = limit - v_4; - break lab3; + break lab6; } - // ], line 143 - bra = cursor; - // call R2, line 143 - if (!r_R2()) + } while (false); + cursor = limit - v_6; + cursor = limit_backward; // do, line 189 + v_7 = cursor; + lab7: do { + // call postlude, line 189 + if (!r_postlude()) { - cursor = limit - v_4; - break lab3; + break lab7; } - // delete, line 143 - slice_del(); - // [, line 143 - ket = cursor; - // literal, line 143 - if (!(eq_s_b(2, "ic"))) - { - cursor = limit - v_4; - break lab3; - } - // ], line 143 - bra = cursor; - // call R2, line 143 - if (!r_R2()) - { - cursor = limit - v_4; - break lab3; - } - // delete, line 143 - slice_del(); } while (false); - break; - } - return true; - } + cursor = v_7; + return true; + } - private boolean r_verb_suffix() { - int among_var; - int v_1; - int v_2; - // setlimit, line 148 - v_1 = limit - cursor; - // tomark, line 148 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 148 - // [, line 149 - ket = cursor; - // substring, line 149 - among_var = find_among_b(a_7, 87); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 149 - bra = cursor; - switch(among_var) { - case 0: - limit_backward = v_2; - return false; - case 1: - // (, line 163 - // delete, line 163 - slice_del(); - break; - } - limit_backward = v_2; - return true; + @Override + public boolean equals( Object o ) { + return o instanceof ItalianStemmer; } - private boolean r_vowel_suffix() { - int v_1; - int v_2; - // (, line 170 - // try, line 171 - v_1 = limit - cursor; - lab0: do { - // (, line 171 - // [, line 172 - ket = cursor; - if (!(in_grouping_b(g_AEIO, 97, 242))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 172 - bra = cursor; - // call RV, line 172 - if (!r_RV()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 172 - slice_del(); - // [, line 173 - ket = cursor; - // literal, line 173 - if (!(eq_s_b(1, "i"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 173 - bra = cursor; - // call RV, line 173 - if (!r_RV()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 173 - slice_del(); - } while (false); - // try, line 175 - v_2 = limit - cursor; - lab1: do { - // (, line 175 - // [, line 176 - ket = cursor; - // literal, line 176 - if (!(eq_s_b(1, "h"))) - { - cursor = limit - v_2; - break lab1; - } - // ], line 176 - bra = cursor; - if (!(in_grouping_b(g_CG, 99, 103))) - { - cursor = limit - v_2; - break lab1; - } - // call RV, line 176 - if (!r_RV()) - { - cursor = limit - v_2; - break lab1; - } - // delete, line 176 - slice_del(); - } while (false); - return true; + @Override + public int hashCode() { + return ItalianStemmer.class.getName().hashCode(); } - public boolean stem() { - int v_1; - int v_2; - int v_3; - int v_4; - int v_5; - int v_6; - int v_7; - // (, line 181 - // do, line 182 - v_1 = cursor; - lab0: do { - // call prelude, line 182 - if (!r_prelude()) - { - break lab0; - } - } while (false); - cursor = v_1; - // do, line 183 - v_2 = cursor; - lab1: do { - // call mark_regions, line 183 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 184 - limit_backward = cursor; cursor = limit; - // (, line 184 - // do, line 185 - v_3 = limit - cursor; - lab2: do { - // call attached_pronoun, line 185 - if (!r_attached_pronoun()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 186 - v_4 = limit - cursor; - lab3: do { - // (, line 186 - // or, line 186 - lab4: do { - v_5 = limit - cursor; - lab5: do { - // call standard_suffix, line 186 - if (!r_standard_suffix()) - { - break lab5; - } - break lab4; - } while (false); - cursor = limit - v_5; - // call verb_suffix, line 186 - if (!r_verb_suffix()) - { - break lab3; - } - } while (false); - } while (false); - cursor = limit - v_4; - // do, line 187 - v_6 = limit - cursor; - lab6: do { - // call vowel_suffix, line 187 - if (!r_vowel_suffix()) - { - break lab6; - } - } while (false); - cursor = limit - v_6; - cursor = limit_backward; // do, line 189 - v_7 = cursor; - lab7: do { - // call postlude, line 189 - if (!r_postlude()) - { - break lab7; - } - } while (false); - cursor = v_7; - return true; - } + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/KpStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/KpStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/KpStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/KpStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,253 +1,260 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class KpStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "nde", -1, 7, "", this), - new Among ( "en", -1, 6, "", this), - new Among ( "s", -1, 2, "", this), - new Among ( "'s", 2, 1, "", this), - new Among ( "es", 2, 4, "", this), - new Among ( "ies", 4, 3, "", this), - new Among ( "aus", 2, 5, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "de", -1, 5, "", this), - new Among ( "ge", -1, 2, "", this), - new Among ( "ische", -1, 4, "", this), - new Among ( "je", -1, 1, "", this), - new Among ( "lijke", -1, 3, "", this), - new Among ( "le", -1, 9, "", this), - new Among ( "ene", -1, 10, "", this), - new Among ( "re", -1, 8, "", this), - new Among ( "se", -1, 7, "", this), - new Among ( "te", -1, 6, "", this), - new Among ( "ieve", -1, 11, "", this) - }; + private final static KpStemmer methodObject = new KpStemmer (); - private Among a_2[] = { - new Among ( "heid", -1, 3, "", this), - new Among ( "fie", -1, 7, "", this), - new Among ( "gie", -1, 8, "", this), - new Among ( "atie", -1, 1, "", this), - new Among ( "isme", -1, 5, "", this), - new Among ( "ing", -1, 5, "", this), - new Among ( "arij", -1, 6, "", this), - new Among ( "erij", -1, 5, "", this), - new Among ( "sel", -1, 3, "", this), - new Among ( "rder", -1, 4, "", this), - new Among ( "ster", -1, 3, "", this), - new Among ( "iteit", -1, 2, "", this), - new Among ( "dst", -1, 10, "", this), - new Among ( "tst", -1, 9, "", this) - }; + private final static Among a_0[] = { + new Among ( "nde", -1, 7, "", methodObject ), + new Among ( "en", -1, 6, "", methodObject ), + new Among ( "s", -1, 2, "", methodObject ), + new Among ( "'s", 2, 1, "", methodObject ), + new Among ( "es", 2, 4, "", methodObject ), + new Among ( "ies", 4, 3, "", methodObject ), + new Among ( "aus", 2, 5, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "end", -1, 10, "", this), - new Among ( "atief", -1, 2, "", this), - new Among ( "erig", -1, 10, "", this), - new Among ( "achtig", -1, 9, "", this), - new Among ( "ioneel", -1, 1, "", this), - new Among ( "baar", -1, 3, "", this), - new Among ( "laar", -1, 5, "", this), - new Among ( "naar", -1, 4, "", this), - new Among ( "raar", -1, 6, "", this), - new Among ( "eriger", -1, 10, "", this), - new Among ( "achtiger", -1, 9, "", this), - new Among ( "lijker", -1, 8, "", this), - new Among ( "tant", -1, 7, "", this), - new Among ( "erigst", -1, 10, "", this), - new Among ( "achtigst", -1, 9, "", this), - new Among ( "lijkst", -1, 8, "", this) - }; + private final static Among a_1[] = { + new Among ( "de", -1, 5, "", methodObject ), + new Among ( "ge", -1, 2, "", methodObject ), + new Among ( "ische", -1, 4, "", methodObject ), + new Among ( "je", -1, 1, "", methodObject ), + new Among ( "lijke", -1, 3, "", methodObject ), + new Among ( "le", -1, 9, "", methodObject ), + new Among ( "ene", -1, 10, "", methodObject ), + new Among ( "re", -1, 8, "", methodObject ), + new Among ( "se", -1, 7, "", methodObject ), + new Among ( "te", -1, 6, "", methodObject ), + new Among ( "ieve", -1, 11, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ig", -1, 1, "", this), - new Among ( "iger", -1, 1, "", this), - new Among ( "igst", -1, 1, "", this) - }; + private final static Among a_2[] = { + new Among ( "heid", -1, 3, "", methodObject ), + new Among ( "fie", -1, 7, "", methodObject ), + new Among ( "gie", -1, 8, "", methodObject ), + new Among ( "atie", -1, 1, "", methodObject ), + new Among ( "isme", -1, 5, "", methodObject ), + new Among ( "ing", -1, 5, "", methodObject ), + new Among ( "arij", -1, 6, "", methodObject ), + new Among ( "erij", -1, 5, "", methodObject ), + new Among ( "sel", -1, 3, "", methodObject ), + new Among ( "rder", -1, 4, "", methodObject ), + new Among ( "ster", -1, 3, "", methodObject ), + new Among ( "iteit", -1, 2, "", methodObject ), + new Among ( "dst", -1, 10, "", methodObject ), + new Among ( "tst", -1, 9, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "ft", -1, 2, "", this), - new Among ( "kt", -1, 1, "", this), - new Among ( "pt", -1, 3, "", this) - }; + private final static Among a_3[] = { + new Among ( "end", -1, 10, "", methodObject ), + new Among ( "atief", -1, 2, "", methodObject ), + new Among ( "erig", -1, 10, "", methodObject ), + new Among ( "achtig", -1, 9, "", methodObject ), + new Among ( "ioneel", -1, 1, "", methodObject ), + new Among ( "baar", -1, 3, "", methodObject ), + new Among ( "laar", -1, 5, "", methodObject ), + new Among ( "naar", -1, 4, "", methodObject ), + new Among ( "raar", -1, 6, "", methodObject ), + new Among ( "eriger", -1, 10, "", methodObject ), + new Among ( "achtiger", -1, 9, "", methodObject ), + new Among ( "lijker", -1, 8, "", methodObject ), + new Among ( "tant", -1, 7, "", methodObject ), + new Among ( "erigst", -1, 10, "", methodObject ), + new Among ( "achtigst", -1, 9, "", methodObject ), + new Among ( "lijkst", -1, 8, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "bb", -1, 1, "", this), - new Among ( "cc", -1, 2, "", this), - new Among ( "dd", -1, 3, "", this), - new Among ( "ff", -1, 4, "", this), - new Among ( "gg", -1, 5, "", this), - new Among ( "hh", -1, 6, "", this), - new Among ( "jj", -1, 7, "", this), - new Among ( "kk", -1, 8, "", this), - new Among ( "ll", -1, 9, "", this), - new Among ( "mm", -1, 10, "", this), - new Among ( "nn", -1, 11, "", this), - new Among ( "pp", -1, 12, "", this), - new Among ( "qq", -1, 13, "", this), - new Among ( "rr", -1, 14, "", this), - new Among ( "ss", -1, 15, "", this), - new Among ( "tt", -1, 16, "", this), - new Among ( "v", -1, 21, "", this), - new Among ( "vv", 16, 17, "", this), - new Among ( "ww", -1, 18, "", this), - new Among ( "xx", -1, 19, "", this), - new Among ( "z", -1, 22, "", this), - new Among ( "zz", 20, 20, "", this) - }; + private final static Among a_4[] = { + new Among ( "ig", -1, 1, "", methodObject ), + new Among ( "iger", -1, 1, "", methodObject ), + new Among ( "igst", -1, 1, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "d", -1, 1, "", this), - new Among ( "t", -1, 2, "", this) - }; + private final static Among a_5[] = { + new Among ( "ft", -1, 2, "", methodObject ), + new Among ( "kt", -1, 1, "", methodObject ), + new Among ( "pt", -1, 3, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1 }; + private final static Among a_6[] = { + new Among ( "bb", -1, 1, "", methodObject ), + new Among ( "cc", -1, 2, "", methodObject ), + new Among ( "dd", -1, 3, "", methodObject ), + new Among ( "ff", -1, 4, "", methodObject ), + new Among ( "gg", -1, 5, "", methodObject ), + new Among ( "hh", -1, 6, "", methodObject ), + new Among ( "jj", -1, 7, "", methodObject ), + new Among ( "kk", -1, 8, "", methodObject ), + new Among ( "ll", -1, 9, "", methodObject ), + new Among ( "mm", -1, 10, "", methodObject ), + new Among ( "nn", -1, 11, "", methodObject ), + new Among ( "pp", -1, 12, "", methodObject ), + new Among ( "qq", -1, 13, "", methodObject ), + new Among ( "rr", -1, 14, "", methodObject ), + new Among ( "ss", -1, 15, "", methodObject ), + new Among ( "tt", -1, 16, "", methodObject ), + new Among ( "v", -1, 21, "", methodObject ), + new Among ( "vv", 16, 17, "", methodObject ), + new Among ( "ww", -1, 18, "", methodObject ), + new Among ( "xx", -1, 19, "", methodObject ), + new Among ( "z", -1, 22, "", methodObject ), + new Among ( "zz", 20, 20, "", methodObject ) + }; - private static final char g_v_WX[] = {17, 65, 208, 1 }; + private final static Among a_7[] = { + new Among ( "d", -1, 1, "", methodObject ), + new Among ( "t", -1, 2, "", methodObject ) + }; - private static final char g_AOU[] = {1, 64, 16 }; + private static final char g_v[] = {17, 65, 16, 1 }; - private static final char g_AIOU[] = {1, 65, 16 }; + private static final char g_v_WX[] = {17, 65, 208, 1 }; + private static final char g_AOU[] = {1, 64, 16 }; + + private static final char g_AIOU[] = {1, 65, 16 }; + private boolean B_GE_removed; private boolean B_stemmed; private boolean B_Y_found; private int I_p2; private int I_p1; private int I_x; - private StringBuffer S_ch = new StringBuffer(); + private java.lang.StringBuilder S_ch = new java.lang.StringBuilder(); - private void copy_from(KpStemmer other) { - B_GE_removed = other.B_GE_removed; - B_stemmed = other.B_stemmed; - B_Y_found = other.B_Y_found; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_x = other.I_x; - S_ch = other.S_ch; - super.copy_from(other); - } + private void copy_from(KpStemmer other) { + B_GE_removed = other.B_GE_removed; + B_stemmed = other.B_stemmed; + B_Y_found = other.B_Y_found; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_x = other.I_x; + S_ch = other.S_ch; + super.copy_from(other); + } - private boolean r_R1() { - // (, line 32 - // setmark x, line 32 - I_x = cursor; - if (!(I_x >= I_p1)) - { - return false; - } - return true; - } + private boolean r_R1() { + // (, line 32 + // setmark x, line 32 + I_x = cursor; + if (!(I_x >= I_p1)) + { + return false; + } + return true; + } - private boolean r_R2() { - // (, line 33 - // setmark x, line 33 - I_x = cursor; - if (!(I_x >= I_p2)) - { - return false; - } - return true; - } + private boolean r_R2() { + // (, line 33 + // setmark x, line 33 + I_x = cursor; + if (!(I_x >= I_p2)) + { + return false; + } + return true; + } - private boolean r_V() { + private boolean r_V() { int v_1; int v_2; - // test, line 35 - v_1 = limit - cursor; - // (, line 35 - // or, line 35 - lab0: do { - v_2 = limit - cursor; - lab1: do { - if (!(in_grouping_b(g_v, 97, 121))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 35 - if (!(eq_s_b(2, "ij"))) - { - return false; + // test, line 35 + v_1 = limit - cursor; + // (, line 35 + // or, line 35 + lab0: do { + v_2 = limit - cursor; + lab1: do { + if (!(in_grouping_b(g_v, 97, 121))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // literal, line 35 + if (!(eq_s_b(2, "ij"))) + { + return false; + } + } while (false); + cursor = limit - v_1; + return true; } - } while (false); - cursor = limit - v_1; - return true; - } - private boolean r_VX() { + private boolean r_VX() { int v_1; int v_2; - // test, line 36 - v_1 = limit - cursor; - // (, line 36 - // next, line 36 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // or, line 36 - lab0: do { - v_2 = limit - cursor; - lab1: do { - if (!(in_grouping_b(g_v, 97, 121))) + // test, line 36 + v_1 = limit - cursor; + // (, line 36 + // next, line 36 + if (cursor <= limit_backward) { - break lab1; + return false; } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 36 - if (!(eq_s_b(2, "ij"))) - { - return false; + cursor--; + // or, line 36 + lab0: do { + v_2 = limit - cursor; + lab1: do { + if (!(in_grouping_b(g_v, 97, 121))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // literal, line 36 + if (!(eq_s_b(2, "ij"))) + { + return false; + } + } while (false); + cursor = limit - v_1; + return true; } - } while (false); - cursor = limit - v_1; - return true; - } - private boolean r_C() { + private boolean r_C() { int v_1; int v_2; - // test, line 37 - v_1 = limit - cursor; - // (, line 37 - // not, line 37 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 37 - if (!(eq_s_b(2, "ij"))) + // test, line 37 + v_1 = limit - cursor; + // (, line 37 + // not, line 37 { - break lab0; + v_2 = limit - cursor; + lab0: do { + // literal, line 37 + if (!(eq_s_b(2, "ij"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_2; - } - if (!(out_grouping_b(g_v, 97, 121))) - { - return false; - } - cursor = limit - v_1; - return true; - } + if (!(out_grouping_b(g_v, 97, 121))) + { + return false; + } + cursor = limit - v_1; + return true; + } - private boolean r_lengthen_V() { + private boolean r_lengthen_V() { int v_1; int v_2; int v_3; @@ -256,1600 +263,1601 @@ int v_6; int v_7; int v_8; - // do, line 39 - v_1 = limit - cursor; - lab0: do { - // (, line 39 - if (!(out_grouping_b(g_v_WX, 97, 121))) - { - break lab0; - } - // [, line 40 - ket = cursor; - // or, line 40 - lab1: do { - v_2 = limit - cursor; - lab2: do { - // (, line 40 - if (!(in_grouping_b(g_AOU, 97, 117))) + // do, line 39 + v_1 = limit - cursor; + lab0: do { + // (, line 39 + if (!(out_grouping_b(g_v_WX, 97, 121))) { - break lab2; + break lab0; } - // ], line 40 - bra = cursor; - // test, line 40 - v_3 = limit - cursor; - // (, line 40 + // [, line 40 + ket = cursor; // or, line 40 - lab3: do { - v_4 = limit - cursor; - lab4: do { - if (!(out_grouping_b(g_v, 97, 121))) + lab1: do { + v_2 = limit - cursor; + lab2: do { + // (, line 40 + if (!(in_grouping_b(g_AOU, 97, 117))) { - break lab4; + break lab2; } - break lab3; + // ], line 40 + bra = cursor; + // test, line 40 + v_3 = limit - cursor; + // (, line 40 + // or, line 40 + lab3: do { + v_4 = limit - cursor; + lab4: do { + if (!(out_grouping_b(g_v, 97, 121))) + { + break lab4; + } + break lab3; + } while (false); + cursor = limit - v_4; + // atlimit, line 40 + if (cursor > limit_backward) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + break lab1; } while (false); - cursor = limit - v_4; - // atlimit, line 40 - if (cursor > limit_backward) + cursor = limit - v_2; + // (, line 41 + // literal, line 41 + if (!(eq_s_b(1, "e"))) { - break lab2; + break lab0; } - } while (false); - cursor = limit - v_3; - break lab1; - } while (false); - cursor = limit - v_2; - // (, line 41 - // literal, line 41 - if (!(eq_s_b(1, "e"))) - { - break lab0; - } - // ], line 41 - bra = cursor; - // test, line 41 - v_5 = limit - cursor; - // (, line 41 - // or, line 41 - lab5: do { - v_6 = limit - cursor; - lab6: do { - if (!(out_grouping_b(g_v, 97, 121))) + // ], line 41 + bra = cursor; + // test, line 41 + v_5 = limit - cursor; + // (, line 41 + // or, line 41 + lab5: do { + v_6 = limit - cursor; + lab6: do { + if (!(out_grouping_b(g_v, 97, 121))) + { + break lab6; + } + break lab5; + } while (false); + cursor = limit - v_6; + // atlimit, line 41 + if (cursor > limit_backward) + { + break lab0; + } + } while (false); + // not, line 42 { - break lab6; + v_7 = limit - cursor; + lab7: do { + if (!(in_grouping_b(g_AIOU, 97, 117))) + { + break lab7; + } + break lab0; + } while (false); + cursor = limit - v_7; } - break lab5; + // not, line 43 + { + v_8 = limit - cursor; + lab8: do { + // (, line 43 + // next, line 43 + if (cursor <= limit_backward) + { + break lab8; + } + cursor--; + if (!(in_grouping_b(g_AIOU, 97, 117))) + { + break lab8; + } + if (!(out_grouping_b(g_v, 97, 121))) + { + break lab8; + } + break lab0; + } while (false); + cursor = limit - v_8; + } + cursor = limit - v_5; } while (false); - cursor = limit - v_6; - // atlimit, line 41 - if (cursor > limit_backward) + // -> ch, line 44 + S_ch = slice_to(S_ch); + // <+ ch, line 44 { - break lab0; + int c = cursor; + insert(cursor, cursor, S_ch); + cursor = c; } } while (false); - // not, line 42 - { - v_7 = limit - cursor; - lab7: do { - if (!(in_grouping_b(g_AIOU, 97, 117))) - { - break lab7; - } - break lab0; - } while (false); - cursor = limit - v_7; - } - // not, line 43 - { - v_8 = limit - cursor; - lab8: do { - // (, line 43 - // next, line 43 - if (cursor <= limit_backward) - { - break lab8; - } - cursor--; - if (!(in_grouping_b(g_AIOU, 97, 117))) - { - break lab8; - } - if (!(out_grouping_b(g_v, 97, 121))) - { - break lab8; - } - break lab0; - } while (false); - cursor = limit - v_8; - } - cursor = limit - v_5; - } while (false); - // -> ch, line 44 - S_ch = slice_to(S_ch); - // <+ ch, line 44 - { - int c = cursor; - insert(cursor, cursor, S_ch); - cursor = c; + cursor = limit - v_1; + return true; } - } while (false); - cursor = limit - v_1; - return true; - } - private boolean r_Step_1() { + private boolean r_Step_1() { int among_var; int v_1; int v_2; int v_3; int v_4; - // (, line 48 - // [, line 49 - ket = cursor; - // among, line 49 - among_var = find_among_b(a_0, 7); - if (among_var == 0) - { - return false; - } - // (, line 49 - // ], line 49 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 51 - // delete, line 51 - slice_del(); - break; - case 2: - // (, line 52 - // call R1, line 52 - if (!r_R1()) + // (, line 48 + // [, line 49 + ket = cursor; + // among, line 49 + among_var = find_among_b(a_0, 7); + if (among_var == 0) { return false; } - // not, line 52 - { - v_1 = limit - cursor; - lab0: do { + // (, line 49 + // ], line 49 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 51 + // delete, line 51 + slice_del(); + break; + case 2: // (, line 52 - // literal, line 52 - if (!(eq_s_b(1, "t"))) - { - break lab0; - } // call R1, line 52 if (!r_R1()) { - break lab0; + return false; } - return false; - } while (false); - cursor = limit - v_1; - } - // call C, line 52 - if (!r_C()) - { - return false; - } - // delete, line 52 - slice_del(); - break; - case 3: - // (, line 53 - // call R1, line 53 - if (!r_R1()) - { - return false; - } - // <-, line 53 - slice_from("ie"); - break; - case 4: - // (, line 55 - // or, line 55 - lab1: do { - v_2 = limit - cursor; - lab2: do { - // (, line 55 - // literal, line 55 - if (!(eq_s_b(2, "ar"))) + // not, line 52 { - break lab2; + v_1 = limit - cursor; + lab0: do { + // (, line 52 + // literal, line 52 + if (!(eq_s_b(1, "t"))) + { + break lab0; + } + // call R1, line 52 + if (!r_R1()) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_1; } - // call R1, line 55 - if (!r_R1()) - { - break lab2; - } - // call C, line 55 + // call C, line 52 if (!r_C()) { - break lab2; + return false; } - // ], line 55 - bra = cursor; - // delete, line 55 + // delete, line 52 slice_del(); - // call lengthen_V, line 55 - if (!r_lengthen_V()) + break; + case 3: + // (, line 53 + // call R1, line 53 + if (!r_R1()) { - break lab2; + return false; } - break lab1; - } while (false); - cursor = limit - v_2; - lab3: do { - // (, line 56 - // literal, line 56 - if (!(eq_s_b(2, "er"))) - { - break lab3; - } - // call R1, line 56 + // <-, line 53 + slice_from("ie"); + break; + case 4: + // (, line 55 + // or, line 55 + lab1: do { + v_2 = limit - cursor; + lab2: do { + // (, line 55 + // literal, line 55 + if (!(eq_s_b(2, "ar"))) + { + break lab2; + } + // call R1, line 55 + if (!r_R1()) + { + break lab2; + } + // call C, line 55 + if (!r_C()) + { + break lab2; + } + // ], line 55 + bra = cursor; + // delete, line 55 + slice_del(); + // call lengthen_V, line 55 + if (!r_lengthen_V()) + { + break lab2; + } + break lab1; + } while (false); + cursor = limit - v_2; + lab3: do { + // (, line 56 + // literal, line 56 + if (!(eq_s_b(2, "er"))) + { + break lab3; + } + // call R1, line 56 + if (!r_R1()) + { + break lab3; + } + // call C, line 56 + if (!r_C()) + { + break lab3; + } + // ], line 56 + bra = cursor; + // delete, line 56 + slice_del(); + break lab1; + } while (false); + cursor = limit - v_2; + // (, line 57 + // call R1, line 57 + if (!r_R1()) + { + return false; + } + // call C, line 57 + if (!r_C()) + { + return false; + } + // <-, line 57 + slice_from("e"); + } while (false); + break; + case 5: + // (, line 59 + // call R1, line 59 if (!r_R1()) { - break lab3; + return false; } - // call C, line 56 - if (!r_C()) + // call V, line 59 + if (!r_V()) { - break lab3; + return false; } - // ], line 56 - bra = cursor; - // delete, line 56 - slice_del(); - break lab1; - } while (false); - cursor = limit - v_2; - // (, line 57 - // call R1, line 57 - if (!r_R1()) - { - return false; - } - // call C, line 57 - if (!r_C()) - { - return false; - } - // <-, line 57 - slice_from("e"); - } while (false); - break; - case 5: - // (, line 59 - // call R1, line 59 - if (!r_R1()) - { - return false; + // <-, line 59 + slice_from("au"); + break; + case 6: + // (, line 60 + // or, line 60 + lab4: do { + v_3 = limit - cursor; + lab5: do { + // (, line 60 + // literal, line 60 + if (!(eq_s_b(3, "hed"))) + { + break lab5; + } + // call R1, line 60 + if (!r_R1()) + { + break lab5; + } + // ], line 60 + bra = cursor; + // <-, line 60 + slice_from("heid"); + break lab4; + } while (false); + cursor = limit - v_3; + lab6: do { + // (, line 61 + // literal, line 61 + if (!(eq_s_b(2, "nd"))) + { + break lab6; + } + // delete, line 61 + slice_del(); + break lab4; + } while (false); + cursor = limit - v_3; + lab7: do { + // (, line 62 + // literal, line 62 + if (!(eq_s_b(1, "d"))) + { + break lab7; + } + // call R1, line 62 + if (!r_R1()) + { + break lab7; + } + // call C, line 62 + if (!r_C()) + { + break lab7; + } + // ], line 62 + bra = cursor; + // delete, line 62 + slice_del(); + break lab4; + } while (false); + cursor = limit - v_3; + lab8: do { + // (, line 63 + // or, line 63 + lab9: do { + v_4 = limit - cursor; + lab10: do { + // literal, line 63 + if (!(eq_s_b(1, "i"))) + { + break lab10; + } + break lab9; + } while (false); + cursor = limit - v_4; + // literal, line 63 + if (!(eq_s_b(1, "j"))) + { + break lab8; + } + } while (false); + // call V, line 63 + if (!r_V()) + { + break lab8; + } + // delete, line 63 + slice_del(); + break lab4; + } while (false); + cursor = limit - v_3; + // (, line 64 + // call R1, line 64 + if (!r_R1()) + { + return false; + } + // call C, line 64 + if (!r_C()) + { + return false; + } + // delete, line 64 + slice_del(); + // call lengthen_V, line 64 + if (!r_lengthen_V()) + { + return false; + } + } while (false); + break; + case 7: + // (, line 65 + // <-, line 65 + slice_from("nd"); + break; } - // call V, line 59 - if (!r_V()) + return true; + } + + private boolean r_Step_2() { + int among_var; + int v_1; + // (, line 70 + // [, line 71 + ket = cursor; + // among, line 71 + among_var = find_among_b(a_1, 11); + if (among_var == 0) { return false; } - // <-, line 59 - slice_from("au"); - break; - case 6: - // (, line 60 - // or, line 60 - lab4: do { - v_3 = limit - cursor; - lab5: do { - // (, line 60 - // literal, line 60 - if (!(eq_s_b(3, "hed"))) - { - break lab5; - } - // call R1, line 60 - if (!r_R1()) - { - break lab5; - } - // ], line 60 - bra = cursor; - // <-, line 60 - slice_from("heid"); - break lab4; - } while (false); - cursor = limit - v_3; - lab6: do { - // (, line 61 - // literal, line 61 - if (!(eq_s_b(2, "nd"))) - { - break lab6; - } - // delete, line 61 - slice_del(); - break lab4; - } while (false); - cursor = limit - v_3; - lab7: do { - // (, line 62 - // literal, line 62 - if (!(eq_s_b(1, "d"))) - { - break lab7; - } - // call R1, line 62 - if (!r_R1()) - { - break lab7; - } - // call C, line 62 - if (!r_C()) - { - break lab7; - } - // ], line 62 - bra = cursor; - // delete, line 62 - slice_del(); - break lab4; - } while (false); - cursor = limit - v_3; - lab8: do { - // (, line 63 - // or, line 63 - lab9: do { - v_4 = limit - cursor; - lab10: do { - // literal, line 63 - if (!(eq_s_b(1, "i"))) + // (, line 71 + // ], line 71 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 72 + // or, line 72 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 72 + // literal, line 72 + if (!(eq_s_b(2, "'t"))) { - break lab10; + break lab1; } - break lab9; + // ], line 72 + bra = cursor; + // delete, line 72 + slice_del(); + break lab0; } while (false); - cursor = limit - v_4; - // literal, line 63 - if (!(eq_s_b(1, "j"))) + cursor = limit - v_1; + lab2: do { + // (, line 73 + // literal, line 73 + if (!(eq_s_b(2, "et"))) + { + break lab2; + } + // ], line 73 + bra = cursor; + // call R1, line 73 + if (!r_R1()) + { + break lab2; + } + // call C, line 73 + if (!r_C()) + { + break lab2; + } + // delete, line 73 + slice_del(); + break lab0; + } while (false); + cursor = limit - v_1; + lab3: do { + // (, line 74 + // literal, line 74 + if (!(eq_s_b(3, "rnt"))) + { + break lab3; + } + // ], line 74 + bra = cursor; + // <-, line 74 + slice_from("rn"); + break lab0; + } while (false); + cursor = limit - v_1; + lab4: do { + // (, line 75 + // literal, line 75 + if (!(eq_s_b(1, "t"))) + { + break lab4; + } + // ], line 75 + bra = cursor; + // call R1, line 75 + if (!r_R1()) + { + break lab4; + } + // call VX, line 75 + if (!r_VX()) + { + break lab4; + } + // delete, line 75 + slice_del(); + break lab0; + } while (false); + cursor = limit - v_1; + lab5: do { + // (, line 76 + // literal, line 76 + if (!(eq_s_b(3, "ink"))) + { + break lab5; + } + // ], line 76 + bra = cursor; + // <-, line 76 + slice_from("ing"); + break lab0; + } while (false); + cursor = limit - v_1; + lab6: do { + // (, line 77 + // literal, line 77 + if (!(eq_s_b(2, "mp"))) + { + break lab6; + } + // ], line 77 + bra = cursor; + // <-, line 77 + slice_from("m"); + break lab0; + } while (false); + cursor = limit - v_1; + lab7: do { + // (, line 78 + // literal, line 78 + if (!(eq_s_b(1, "'"))) + { + break lab7; + } + // ], line 78 + bra = cursor; + // call R1, line 78 + if (!r_R1()) + { + break lab7; + } + // delete, line 78 + slice_del(); + break lab0; + } while (false); + cursor = limit - v_1; + // (, line 79 + // ], line 79 + bra = cursor; + // call R1, line 79 + if (!r_R1()) { - break lab8; + return false; } + // call C, line 79 + if (!r_C()) + { + return false; + } + // delete, line 79 + slice_del(); } while (false); - // call V, line 63 - if (!r_V()) + break; + case 2: + // (, line 80 + // call R1, line 80 + if (!r_R1()) { - break lab8; + return false; } - // delete, line 63 - slice_del(); - break lab4; - } while (false); - cursor = limit - v_3; - // (, line 64 - // call R1, line 64 - if (!r_R1()) - { - return false; - } - // call C, line 64 - if (!r_C()) - { - return false; - } - // delete, line 64 - slice_del(); - // call lengthen_V, line 64 - if (!r_lengthen_V()) - { - return false; - } - } while (false); - break; - case 7: - // (, line 65 - // <-, line 65 - slice_from("nd"); - break; - } - return true; - } - - private boolean r_Step_2() { - int among_var; - int v_1; - // (, line 70 - // [, line 71 - ket = cursor; - // among, line 71 - among_var = find_among_b(a_1, 11); - if (among_var == 0) - { - return false; - } - // (, line 71 - // ], line 71 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 72 - // or, line 72 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 72 - // literal, line 72 - if (!(eq_s_b(2, "'t"))) + // <-, line 80 + slice_from("g"); + break; + case 3: + // (, line 81 + // call R1, line 81 + if (!r_R1()) { - break lab1; + return false; } - // ], line 72 - bra = cursor; - // delete, line 72 - slice_del(); - break lab0; - } while (false); - cursor = limit - v_1; - lab2: do { - // (, line 73 - // literal, line 73 - if (!(eq_s_b(2, "et"))) + // <-, line 81 + slice_from("lijk"); + break; + case 4: + // (, line 82 + // call R1, line 82 + if (!r_R1()) { - break lab2; + return false; } - // ], line 73 - bra = cursor; - // call R1, line 73 + // <-, line 82 + slice_from("isch"); + break; + case 5: + // (, line 83 + // call R1, line 83 if (!r_R1()) { - break lab2; + return false; } - // call C, line 73 + // call C, line 83 if (!r_C()) { - break lab2; + return false; } - // delete, line 73 + // delete, line 83 slice_del(); - break lab0; - } while (false); - cursor = limit - v_1; - lab3: do { - // (, line 74 - // literal, line 74 - if (!(eq_s_b(3, "rnt"))) + break; + case 6: + // (, line 84 + // call R1, line 84 + if (!r_R1()) { - break lab3; + return false; } - // ], line 74 - bra = cursor; - // <-, line 74 - slice_from("rn"); - break lab0; - } while (false); - cursor = limit - v_1; - lab4: do { - // (, line 75 - // literal, line 75 - if (!(eq_s_b(1, "t"))) + // <-, line 84 + slice_from("t"); + break; + case 7: + // (, line 85 + // call R1, line 85 + if (!r_R1()) { - break lab4; + return false; } - // ], line 75 - bra = cursor; - // call R1, line 75 + // <-, line 85 + slice_from("s"); + break; + case 8: + // (, line 86 + // call R1, line 86 if (!r_R1()) { - break lab4; + return false; } - // call VX, line 75 - if (!r_VX()) + // <-, line 86 + slice_from("r"); + break; + case 9: + // (, line 87 + // call R1, line 87 + if (!r_R1()) { - break lab4; + return false; } - // delete, line 75 + // delete, line 87 slice_del(); - break lab0; - } while (false); - cursor = limit - v_1; - lab5: do { - // (, line 76 - // literal, line 76 - if (!(eq_s_b(3, "ink"))) + // attach, line 87 + insert(cursor, cursor, "l"); + // call lengthen_V, line 87 + if (!r_lengthen_V()) { - break lab5; + return false; } - // ], line 76 - bra = cursor; - // <-, line 76 - slice_from("ing"); - break lab0; - } while (false); - cursor = limit - v_1; - lab6: do { - // (, line 77 - // literal, line 77 - if (!(eq_s_b(2, "mp"))) + break; + case 10: + // (, line 88 + // call R1, line 88 + if (!r_R1()) { - break lab6; + return false; } - // ], line 77 - bra = cursor; - // <-, line 77 - slice_from("m"); - break lab0; - } while (false); - cursor = limit - v_1; - lab7: do { - // (, line 78 - // literal, line 78 - if (!(eq_s_b(1, "'"))) + // call C, line 88 + if (!r_C()) { - break lab7; + return false; } - // ], line 78 - bra = cursor; - // call R1, line 78 + // delete, line 88 + slice_del(); + // attach, line 88 + insert(cursor, cursor, "en"); + // call lengthen_V, line 88 + if (!r_lengthen_V()) + { + return false; + } + break; + case 11: + // (, line 89 + // call R1, line 89 if (!r_R1()) { - break lab7; + return false; } - // delete, line 78 - slice_del(); - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 79 - // ], line 79 - bra = cursor; - // call R1, line 79 - if (!r_R1()) - { - return false; - } - // call C, line 79 - if (!r_C()) - { - return false; - } - // delete, line 79 - slice_del(); - } while (false); - break; - case 2: - // (, line 80 - // call R1, line 80 - if (!r_R1()) - { - return false; + // call C, line 89 + if (!r_C()) + { + return false; + } + // <-, line 89 + slice_from("ief"); + break; } - // <-, line 80 - slice_from("g"); - break; - case 3: - // (, line 81 - // call R1, line 81 - if (!r_R1()) - { - return false; - } - // <-, line 81 - slice_from("lijk"); - break; - case 4: - // (, line 82 - // call R1, line 82 - if (!r_R1()) - { - return false; - } - // <-, line 82 - slice_from("isch"); - break; - case 5: - // (, line 83 - // call R1, line 83 - if (!r_R1()) - { - return false; - } - // call C, line 83 - if (!r_C()) - { - return false; - } - // delete, line 83 - slice_del(); - break; - case 6: - // (, line 84 - // call R1, line 84 - if (!r_R1()) - { - return false; - } - // <-, line 84 - slice_from("t"); - break; - case 7: - // (, line 85 - // call R1, line 85 - if (!r_R1()) - { - return false; - } - // <-, line 85 - slice_from("s"); - break; - case 8: - // (, line 86 - // call R1, line 86 - if (!r_R1()) - { - return false; - } - // <-, line 86 - slice_from("r"); - break; - case 9: - // (, line 87 - // call R1, line 87 - if (!r_R1()) - { - return false; - } - // delete, line 87 - slice_del(); - // attach, line 87 - insert(cursor, cursor, "l"); - // call lengthen_V, line 87 - if (!r_lengthen_V()) - { - return false; - } - break; - case 10: - // (, line 88 - // call R1, line 88 - if (!r_R1()) - { - return false; - } - // call C, line 88 - if (!r_C()) - { - return false; - } - // delete, line 88 - slice_del(); - // attach, line 88 - insert(cursor, cursor, "en"); - // call lengthen_V, line 88 - if (!r_lengthen_V()) - { - return false; - } - break; - case 11: - // (, line 89 - // call R1, line 89 - if (!r_R1()) - { - return false; - } - // call C, line 89 - if (!r_C()) - { - return false; - } - // <-, line 89 - slice_from("ief"); - break; - } - return true; - } + return true; + } - private boolean r_Step_3() { + private boolean r_Step_3() { int among_var; - // (, line 94 - // [, line 95 - ket = cursor; - // among, line 95 - among_var = find_among_b(a_2, 14); - if (among_var == 0) - { - return false; - } - // (, line 95 - // ], line 95 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 96 - // call R1, line 96 - if (!r_R1()) - { - return false; - } - // <-, line 96 - slice_from("eer"); - break; - case 2: - // (, line 97 - // call R1, line 97 - if (!r_R1()) - { - return false; - } - // delete, line 97 - slice_del(); - // call lengthen_V, line 97 - if (!r_lengthen_V()) - { - return false; - } - break; - case 3: - // (, line 100 - // call R1, line 100 - if (!r_R1()) - { - return false; - } - // delete, line 100 - slice_del(); - break; - case 4: - // (, line 101 - // <-, line 101 - slice_from("r"); - break; - case 5: - // (, line 104 - // call R1, line 104 - if (!r_R1()) - { - return false; - } - // delete, line 104 - slice_del(); - // call lengthen_V, line 104 - if (!r_lengthen_V()) - { - return false; - } - break; - case 6: - // (, line 105 - // call R1, line 105 - if (!r_R1()) - { - return false; - } - // call C, line 105 - if (!r_C()) - { - return false; - } - // <-, line 105 - slice_from("aar"); - break; - case 7: - // (, line 106 - // call R2, line 106 - if (!r_R2()) - { - return false; - } - // delete, line 106 - slice_del(); - // attach, line 106 - insert(cursor, cursor, "f"); - // call lengthen_V, line 106 - if (!r_lengthen_V()) - { - return false; - } - break; - case 8: - // (, line 107 - // call R2, line 107 - if (!r_R2()) - { - return false; - } - // delete, line 107 - slice_del(); - // attach, line 107 - insert(cursor, cursor, "g"); - // call lengthen_V, line 107 - if (!r_lengthen_V()) - { - return false; - } - break; - case 9: - // (, line 108 - // call R1, line 108 - if (!r_R1()) - { - return false; - } - // call C, line 108 - if (!r_C()) - { - return false; - } - // <-, line 108 - slice_from("t"); - break; - case 10: - // (, line 109 - // call R1, line 109 - if (!r_R1()) - { - return false; - } - // call C, line 109 - if (!r_C()) - { - return false; - } - // <-, line 109 - slice_from("d"); - break; - } - return true; - } - - private boolean r_Step_4() { - int among_var; - int v_1; - // (, line 114 - // or, line 134 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 115 - // [, line 115 + // (, line 94 + // [, line 95 ket = cursor; - // among, line 115 - among_var = find_among_b(a_3, 16); + // among, line 95 + among_var = find_among_b(a_2, 14); if (among_var == 0) { - break lab1; + return false; } - // (, line 115 - // ], line 115 + // (, line 95 + // ], line 95 bra = cursor; switch(among_var) { case 0: - break lab1; + return false; case 1: - // (, line 116 - // call R1, line 116 + // (, line 96 + // call R1, line 96 if (!r_R1()) { - break lab1; + return false; } - // <-, line 116 - slice_from("ie"); + // <-, line 96 + slice_from("eer"); break; case 2: - // (, line 117 - // call R1, line 117 + // (, line 97 + // call R1, line 97 if (!r_R1()) { - break lab1; + return false; } - // <-, line 117 - slice_from("eer"); + // delete, line 97 + slice_del(); + // call lengthen_V, line 97 + if (!r_lengthen_V()) + { + return false; + } break; case 3: - // (, line 118 - // call R1, line 118 + // (, line 100 + // call R1, line 100 if (!r_R1()) { - break lab1; + return false; } - // delete, line 118 + // delete, line 100 slice_del(); break; case 4: - // (, line 119 - // call R1, line 119 - if (!r_R1()) - { - break lab1; - } - // call V, line 119 - if (!r_V()) - { - break lab1; - } - // <-, line 119 - slice_from("n"); + // (, line 101 + // <-, line 101 + slice_from("r"); break; case 5: - // (, line 120 - // call R1, line 120 + // (, line 104 + // call R1, line 104 if (!r_R1()) { - break lab1; + return false; } - // call V, line 120 - if (!r_V()) + // delete, line 104 + slice_del(); + // call lengthen_V, line 104 + if (!r_lengthen_V()) { - break lab1; + return false; } - // <-, line 120 - slice_from("l"); break; case 6: - // (, line 121 - // call R1, line 121 + // (, line 105 + // call R1, line 105 if (!r_R1()) { - break lab1; + return false; } - // call V, line 121 - if (!r_V()) + // call C, line 105 + if (!r_C()) { - break lab1; + return false; } - // <-, line 121 - slice_from("r"); + // <-, line 105 + slice_from("aar"); break; case 7: - // (, line 122 - // call R1, line 122 - if (!r_R1()) + // (, line 106 + // call R2, line 106 + if (!r_R2()) { - break lab1; + return false; } - // <-, line 122 - slice_from("teer"); + // delete, line 106 + slice_del(); + // attach, line 106 + insert(cursor, cursor, "f"); + // call lengthen_V, line 106 + if (!r_lengthen_V()) + { + return false; + } break; case 8: - // (, line 124 - // call R1, line 124 - if (!r_R1()) + // (, line 107 + // call R2, line 107 + if (!r_R2()) { - break lab1; + return false; } - // <-, line 124 - slice_from("lijk"); + // delete, line 107 + slice_del(); + // attach, line 107 + insert(cursor, cursor, "g"); + // call lengthen_V, line 107 + if (!r_lengthen_V()) + { + return false; + } break; case 9: - // (, line 127 - // call R1, line 127 + // (, line 108 + // call R1, line 108 if (!r_R1()) { - break lab1; + return false; } - // delete, line 127 - slice_del(); + // call C, line 108 + if (!r_C()) + { + return false; + } + // <-, line 108 + slice_from("t"); break; case 10: - // (, line 131 - // call R1, line 131 + // (, line 109 + // call R1, line 109 if (!r_R1()) { - break lab1; + return false; } - // call C, line 131 + // call C, line 109 if (!r_C()) { - break lab1; + return false; } - // delete, line 131 - slice_del(); - // call lengthen_V, line 131 - if (!r_lengthen_V()) - { - break lab1; - } + // <-, line 109 + slice_from("d"); break; } - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 135 - // [, line 135 - ket = cursor; - // among, line 135 - among_var = find_among_b(a_4, 3); - if (among_var == 0) - { - return false; + return true; } - // (, line 135 - // ], line 135 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 138 - // call R1, line 138 - if (!r_R1()) + + private boolean r_Step_4() { + int among_var; + int v_1; + // (, line 114 + // or, line 134 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 115 + // [, line 115 + ket = cursor; + // among, line 115 + among_var = find_among_b(a_3, 16); + if (among_var == 0) + { + break lab1; + } + // (, line 115 + // ], line 115 + bra = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 116 + // call R1, line 116 + if (!r_R1()) + { + break lab1; + } + // <-, line 116 + slice_from("ie"); + break; + case 2: + // (, line 117 + // call R1, line 117 + if (!r_R1()) + { + break lab1; + } + // <-, line 117 + slice_from("eer"); + break; + case 3: + // (, line 118 + // call R1, line 118 + if (!r_R1()) + { + break lab1; + } + // delete, line 118 + slice_del(); + break; + case 4: + // (, line 119 + // call R1, line 119 + if (!r_R1()) + { + break lab1; + } + // call V, line 119 + if (!r_V()) + { + break lab1; + } + // <-, line 119 + slice_from("n"); + break; + case 5: + // (, line 120 + // call R1, line 120 + if (!r_R1()) + { + break lab1; + } + // call V, line 120 + if (!r_V()) + { + break lab1; + } + // <-, line 120 + slice_from("l"); + break; + case 6: + // (, line 121 + // call R1, line 121 + if (!r_R1()) + { + break lab1; + } + // call V, line 121 + if (!r_V()) + { + break lab1; + } + // <-, line 121 + slice_from("r"); + break; + case 7: + // (, line 122 + // call R1, line 122 + if (!r_R1()) + { + break lab1; + } + // <-, line 122 + slice_from("teer"); + break; + case 8: + // (, line 124 + // call R1, line 124 + if (!r_R1()) + { + break lab1; + } + // <-, line 124 + slice_from("lijk"); + break; + case 9: + // (, line 127 + // call R1, line 127 + if (!r_R1()) + { + break lab1; + } + // delete, line 127 + slice_del(); + break; + case 10: + // (, line 131 + // call R1, line 131 + if (!r_R1()) + { + break lab1; + } + // call C, line 131 + if (!r_C()) + { + break lab1; + } + // delete, line 131 + slice_del(); + // call lengthen_V, line 131 + if (!r_lengthen_V()) + { + break lab1; + } + break; + } + break lab0; + } while (false); + cursor = limit - v_1; + // (, line 135 + // [, line 135 + ket = cursor; + // among, line 135 + among_var = find_among_b(a_4, 3); + if (among_var == 0) { return false; } - // call C, line 138 - if (!r_C()) - { - return false; + // (, line 135 + // ], line 135 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 138 + // call R1, line 138 + if (!r_R1()) + { + return false; + } + // call C, line 138 + if (!r_C()) + { + return false; + } + // delete, line 138 + slice_del(); + // call lengthen_V, line 138 + if (!r_lengthen_V()) + { + return false; + } + break; } - // delete, line 138 - slice_del(); - // call lengthen_V, line 138 - if (!r_lengthen_V()) - { - return false; - } - break; + } while (false); + return true; } - } while (false); - return true; - } - private boolean r_Step_7() { + private boolean r_Step_7() { int among_var; - // (, line 144 - // [, line 145 - ket = cursor; - // among, line 145 - among_var = find_among_b(a_5, 3); - if (among_var == 0) - { - return false; - } - // (, line 145 - // ], line 145 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 146 - // <-, line 146 - slice_from("k"); - break; - case 2: - // (, line 147 - // <-, line 147 - slice_from("f"); - break; - case 3: - // (, line 148 - // <-, line 148 - slice_from("p"); - break; - } - return true; - } + // (, line 144 + // [, line 145 + ket = cursor; + // among, line 145 + among_var = find_among_b(a_5, 3); + if (among_var == 0) + { + return false; + } + // (, line 145 + // ], line 145 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 146 + // <-, line 146 + slice_from("k"); + break; + case 2: + // (, line 147 + // <-, line 147 + slice_from("f"); + break; + case 3: + // (, line 148 + // <-, line 148 + slice_from("p"); + break; + } + return true; + } - private boolean r_Step_6() { + private boolean r_Step_6() { int among_var; - // (, line 153 - // [, line 154 - ket = cursor; - // among, line 154 - among_var = find_among_b(a_6, 22); - if (among_var == 0) - { - return false; - } - // (, line 154 - // ], line 154 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 155 - // <-, line 155 - slice_from("b"); - break; - case 2: - // (, line 156 - // <-, line 156 - slice_from("c"); - break; - case 3: - // (, line 157 - // <-, line 157 - slice_from("d"); - break; - case 4: - // (, line 158 - // <-, line 158 - slice_from("f"); - break; - case 5: - // (, line 159 - // <-, line 159 - slice_from("g"); - break; - case 6: - // (, line 160 - // <-, line 160 - slice_from("h"); - break; - case 7: - // (, line 161 - // <-, line 161 - slice_from("j"); - break; - case 8: - // (, line 162 - // <-, line 162 - slice_from("k"); - break; - case 9: - // (, line 163 - // <-, line 163 - slice_from("l"); - break; - case 10: - // (, line 164 - // <-, line 164 - slice_from("m"); - break; - case 11: - // (, line 165 - // <-, line 165 - slice_from("n"); - break; - case 12: - // (, line 166 - // <-, line 166 - slice_from("p"); - break; - case 13: - // (, line 167 - // <-, line 167 - slice_from("q"); - break; - case 14: - // (, line 168 - // <-, line 168 - slice_from("r"); - break; - case 15: - // (, line 169 - // <-, line 169 - slice_from("s"); - break; - case 16: - // (, line 170 - // <-, line 170 - slice_from("t"); - break; - case 17: - // (, line 171 - // <-, line 171 - slice_from("v"); - break; - case 18: - // (, line 172 - // <-, line 172 - slice_from("w"); - break; - case 19: - // (, line 173 - // <-, line 173 - slice_from("x"); - break; - case 20: - // (, line 174 - // <-, line 174 - slice_from("z"); - break; - case 21: - // (, line 175 - // <-, line 175 - slice_from("f"); - break; - case 22: - // (, line 176 - // <-, line 176 - slice_from("s"); - break; - } - return true; - } + // (, line 153 + // [, line 154 + ket = cursor; + // among, line 154 + among_var = find_among_b(a_6, 22); + if (among_var == 0) + { + return false; + } + // (, line 154 + // ], line 154 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 155 + // <-, line 155 + slice_from("b"); + break; + case 2: + // (, line 156 + // <-, line 156 + slice_from("c"); + break; + case 3: + // (, line 157 + // <-, line 157 + slice_from("d"); + break; + case 4: + // (, line 158 + // <-, line 158 + slice_from("f"); + break; + case 5: + // (, line 159 + // <-, line 159 + slice_from("g"); + break; + case 6: + // (, line 160 + // <-, line 160 + slice_from("h"); + break; + case 7: + // (, line 161 + // <-, line 161 + slice_from("j"); + break; + case 8: + // (, line 162 + // <-, line 162 + slice_from("k"); + break; + case 9: + // (, line 163 + // <-, line 163 + slice_from("l"); + break; + case 10: + // (, line 164 + // <-, line 164 + slice_from("m"); + break; + case 11: + // (, line 165 + // <-, line 165 + slice_from("n"); + break; + case 12: + // (, line 166 + // <-, line 166 + slice_from("p"); + break; + case 13: + // (, line 167 + // <-, line 167 + slice_from("q"); + break; + case 14: + // (, line 168 + // <-, line 168 + slice_from("r"); + break; + case 15: + // (, line 169 + // <-, line 169 + slice_from("s"); + break; + case 16: + // (, line 170 + // <-, line 170 + slice_from("t"); + break; + case 17: + // (, line 171 + // <-, line 171 + slice_from("v"); + break; + case 18: + // (, line 172 + // <-, line 172 + slice_from("w"); + break; + case 19: + // (, line 173 + // <-, line 173 + slice_from("x"); + break; + case 20: + // (, line 174 + // <-, line 174 + slice_from("z"); + break; + case 21: + // (, line 175 + // <-, line 175 + slice_from("f"); + break; + case 22: + // (, line 176 + // <-, line 176 + slice_from("s"); + break; + } + return true; + } - private boolean r_Step_1c() { + private boolean r_Step_1c() { int among_var; int v_1; int v_2; - // (, line 181 - // [, line 182 - ket = cursor; - // among, line 182 - among_var = find_among_b(a_7, 2); - if (among_var == 0) - { - return false; - } - // (, line 182 - // ], line 182 - bra = cursor; - // call R1, line 182 - if (!r_R1()) - { - return false; - } - // call C, line 182 - if (!r_C()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 183 - // not, line 183 + // (, line 181 + // [, line 182 + ket = cursor; + // among, line 182 + among_var = find_among_b(a_7, 2); + if (among_var == 0) { - v_1 = limit - cursor; - lab0: do { + return false; + } + // (, line 182 + // ], line 182 + bra = cursor; + // call R1, line 182 + if (!r_R1()) + { + return false; + } + // call C, line 182 + if (!r_C()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: // (, line 183 - // literal, line 183 - if (!(eq_s_b(1, "n"))) + // not, line 183 { - break lab0; + v_1 = limit - cursor; + lab0: do { + // (, line 183 + // literal, line 183 + if (!(eq_s_b(1, "n"))) + { + break lab0; + } + // call R1, line 183 + if (!r_R1()) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_1; } - // call R1, line 183 - if (!r_R1()) + // delete, line 183 + slice_del(); + break; + case 2: + // (, line 184 + // not, line 184 { - break lab0; + v_2 = limit - cursor; + lab1: do { + // (, line 184 + // literal, line 184 + if (!(eq_s_b(1, "h"))) + { + break lab1; + } + // call R1, line 184 + if (!r_R1()) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_2; } + // delete, line 184 + slice_del(); + break; + } + return true; + } + + private boolean r_Lose_prefix() { + int v_1; + int v_2; + int v_3; + // (, line 189 + // [, line 190 + bra = cursor; + // literal, line 190 + if (!(eq_s(2, "ge"))) + { + return false; + } + // ], line 190 + ket = cursor; + // test, line 190 + v_1 = cursor; + // hop, line 190 + { + int c = cursor + 3; + if (0 > c || c > limit) + { return false; - } while (false); - cursor = limit - v_1; + } + cursor = c; } - // delete, line 183 - slice_del(); - break; - case 2: - // (, line 184 - // not, line 184 + cursor = v_1; + // (, line 190 + // goto, line 190 + golab0: while(true) { - v_2 = limit - cursor; + v_2 = cursor; lab1: do { - // (, line 184 - // literal, line 184 - if (!(eq_s_b(1, "h"))) + if (!(in_grouping(g_v, 97, 121))) { break lab1; } - // call R1, line 184 - if (!r_R1()) + cursor = v_2; + break golab0; + } while (false); + cursor = v_2; + if (cursor >= limit) + { + return false; + } + cursor++; + } + // goto, line 190 + golab2: while(true) + { + v_3 = cursor; + lab3: do { + if (!(out_grouping(g_v, 97, 121))) { - break lab1; + break lab3; } - return false; + cursor = v_3; + break golab2; } while (false); - cursor = limit - v_2; + cursor = v_3; + if (cursor >= limit) + { + return false; + } + cursor++; } - // delete, line 184 + // set GE_removed, line 191 + B_GE_removed = true; + // delete, line 192 slice_del(); - break; - } - return true; - } + return true; + } - private boolean r_Lose_prefix() { - int v_1; + private boolean r_Lose_infix() { int v_2; int v_3; - // (, line 189 - // [, line 190 - bra = cursor; - // literal, line 190 - if (!(eq_s(2, "ge"))) - { - return false; - } - // ], line 190 - ket = cursor; - // test, line 190 - v_1 = cursor; - // hop, line 190 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = v_1; - // (, line 190 - // goto, line 190 - golab0: while(true) - { - v_2 = cursor; - lab1: do { - if (!(in_grouping(g_v, 97, 121))) + int v_4; + // (, line 195 + // next, line 196 + if (cursor >= limit) { - break lab1; + return false; } - cursor = v_2; - break golab0; - } while (false); - cursor = v_2; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // goto, line 190 - golab2: while(true) - { - v_3 = cursor; - lab3: do { - if (!(out_grouping(g_v, 97, 121))) + cursor++; + // gopast, line 197 + golab0: while(true) { - break lab3; + lab1: do { + // (, line 197 + // [, line 197 + bra = cursor; + // literal, line 197 + if (!(eq_s(2, "ge"))) + { + break lab1; + } + // ], line 197 + ket = cursor; + break golab0; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; } - cursor = v_3; - break golab2; - } while (false); - cursor = v_3; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // set GE_removed, line 191 - B_GE_removed = true; - // delete, line 192 - slice_del(); - return true; - } - - private boolean r_Lose_infix() { - int v_2; - int v_3; - int v_4; - // (, line 195 - // next, line 196 - if (cursor >= limit) - { - return false; - } - cursor++; - // gopast, line 197 - golab0: while(true) - { - lab1: do { - // (, line 197 - // [, line 197 - bra = cursor; - // literal, line 197 - if (!(eq_s(2, "ge"))) + // test, line 197 + v_2 = cursor; + // hop, line 197 { - break lab1; + int c = cursor + 3; + if (0 > c || c > limit) + { + return false; + } + cursor = c; } - // ], line 197 - ket = cursor; - break golab0; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // test, line 197 - v_2 = cursor; - // hop, line 197 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = v_2; - // (, line 197 - // goto, line 197 - golab2: while(true) - { - v_3 = cursor; - lab3: do { - if (!(in_grouping(g_v, 97, 121))) + cursor = v_2; + // (, line 197 + // goto, line 197 + golab2: while(true) { - break lab3; + v_3 = cursor; + lab3: do { + if (!(in_grouping(g_v, 97, 121))) + { + break lab3; + } + cursor = v_3; + break golab2; + } while (false); + cursor = v_3; + if (cursor >= limit) + { + return false; + } + cursor++; } - cursor = v_3; - break golab2; - } while (false); - cursor = v_3; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // goto, line 197 - golab4: while(true) - { - v_4 = cursor; - lab5: do { - if (!(out_grouping(g_v, 97, 121))) + // goto, line 197 + golab4: while(true) { - break lab5; + v_4 = cursor; + lab5: do { + if (!(out_grouping(g_v, 97, 121))) + { + break lab5; + } + cursor = v_4; + break golab4; + } while (false); + cursor = v_4; + if (cursor >= limit) + { + return false; + } + cursor++; } - cursor = v_4; - break golab4; - } while (false); - cursor = v_4; - if (cursor >= limit) - { - return false; + // set GE_removed, line 198 + B_GE_removed = true; + // delete, line 199 + slice_del(); + return true; } - cursor++; - } - // set GE_removed, line 198 - B_GE_removed = true; - // delete, line 199 - slice_del(); - return true; - } - private boolean r_measure() { + private boolean r_measure() { int v_1; int v_2; int v_5; int v_6; int v_9; int v_10; - // (, line 202 - // do, line 203 - v_1 = cursor; - lab0: do { - // (, line 203 - // tolimit, line 204 - cursor = limit; - // setmark p1, line 205 - I_p1 = cursor; - // setmark p2, line 206 - I_p2 = cursor; - } while (false); - cursor = v_1; - // do, line 208 - v_2 = cursor; - lab1: do { - // (, line 208 - // repeat, line 209 - replab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 121))) + // (, line 202 + // do, line 203 + v_1 = cursor; + lab0: do { + // (, line 203 + // tolimit, line 204 + cursor = limit; + // setmark p1, line 205 + I_p1 = cursor; + // setmark p2, line 206 + I_p2 = cursor; + } while (false); + cursor = v_1; + // do, line 208 + v_2 = cursor; + lab1: do { + // (, line 208 + // repeat, line 209 + replab2: while(true) { - break lab3; + lab3: do { + if (!(out_grouping(g_v, 97, 121))) + { + break lab3; + } + continue replab2; + } while (false); + break replab2; } - continue replab2; - } while (false); - break replab2; - } - // atleast, line 209 - { - int v_4 = 1; - // atleast, line 209 - replab4: while(true) - { - v_5 = cursor; - lab5: do { - // (, line 209 - // or, line 209 - lab6: do { - v_6 = cursor; - lab7: do { - // literal, line 209 - if (!(eq_s(2, "ij"))) - { - break lab7; - } - break lab6; + // atleast, line 209 + { + int v_4 = 1; + // atleast, line 209 + replab4: while(true) + { + v_5 = cursor; + lab5: do { + // (, line 209 + // or, line 209 + lab6: do { + v_6 = cursor; + lab7: do { + // literal, line 209 + if (!(eq_s(2, "ij"))) + { + break lab7; + } + break lab6; + } while (false); + cursor = v_6; + if (!(in_grouping(g_v, 97, 121))) + { + break lab5; + } + } while (false); + v_4--; + continue replab4; } while (false); - cursor = v_6; - if (!(in_grouping(g_v, 97, 121))) + cursor = v_5; + break replab4; + } + if (v_4 > 0) + { + break lab1; + } + } + if (!(out_grouping(g_v, 97, 121))) + { + break lab1; + } + // setmark p1, line 209 + I_p1 = cursor; + // repeat, line 210 + replab8: while(true) + { + lab9: do { + if (!(out_grouping(g_v, 97, 121))) { - break lab5; + break lab9; } + continue replab8; } while (false); - v_4--; - continue replab4; - } while (false); - cursor = v_5; - break replab4; - } - if (v_4 > 0) - { - break lab1; - } - } - if (!(out_grouping(g_v, 97, 121))) - { - break lab1; - } - // setmark p1, line 209 - I_p1 = cursor; - // repeat, line 210 - replab8: while(true) - { - lab9: do { + break replab8; + } + // atleast, line 210 + { + int v_8 = 1; + // atleast, line 210 + replab10: while(true) + { + v_9 = cursor; + lab11: do { + // (, line 210 + // or, line 210 + lab12: do { + v_10 = cursor; + lab13: do { + // literal, line 210 + if (!(eq_s(2, "ij"))) + { + break lab13; + } + break lab12; + } while (false); + cursor = v_10; + if (!(in_grouping(g_v, 97, 121))) + { + break lab11; + } + } while (false); + v_8--; + continue replab10; + } while (false); + cursor = v_9; + break replab10; + } + if (v_8 > 0) + { + break lab1; + } + } if (!(out_grouping(g_v, 97, 121))) { - break lab9; + break lab1; } - continue replab8; + // setmark p2, line 210 + I_p2 = cursor; } while (false); - break replab8; + cursor = v_2; + return true; } - // atleast, line 210 - { - int v_8 = 1; - // atleast, line 210 - replab10: while(true) - { - v_9 = cursor; - lab11: do { - // (, line 210 - // or, line 210 - lab12: do { - v_10 = cursor; - lab13: do { - // literal, line 210 - if (!(eq_s(2, "ij"))) - { - break lab13; - } - break lab12; - } while (false); - cursor = v_10; - if (!(in_grouping(g_v, 97, 121))) - { - break lab11; - } - } while (false); - v_8--; - continue replab10; - } while (false); - cursor = v_9; - break replab10; - } - if (v_8 > 0) - { - break lab1; - } - } - if (!(out_grouping(g_v, 97, 121))) - { - break lab1; - } - // setmark p2, line 210 - I_p2 = cursor; - } while (false); - cursor = v_2; - return true; - } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -1869,313 +1877,325 @@ int v_18; int v_19; int v_20; - // (, line 214 - // unset Y_found, line 216 - B_Y_found = false; - // unset stemmed, line 217 - B_stemmed = false; - // do, line 218 - v_1 = cursor; - lab0: do { - // (, line 218 - // [, line 218 - bra = cursor; - // literal, line 218 - if (!(eq_s(1, "y"))) - { - break lab0; - } - // ], line 218 - ket = cursor; - // <-, line 218 - slice_from("Y"); - // set Y_found, line 218 - B_Y_found = true; - } while (false); - cursor = v_1; - // do, line 219 - v_2 = cursor; - lab1: do { - // repeat, line 219 - replab2: while(true) - { - v_3 = cursor; - lab3: do { - // (, line 219 - // goto, line 219 - golab4: while(true) + // (, line 214 + // unset Y_found, line 216 + B_Y_found = false; + // unset stemmed, line 217 + B_stemmed = false; + // do, line 218 + v_1 = cursor; + lab0: do { + // (, line 218 + // [, line 218 + bra = cursor; + // literal, line 218 + if (!(eq_s(1, "y"))) { - v_4 = cursor; - lab5: do { + break lab0; + } + // ], line 218 + ket = cursor; + // <-, line 218 + slice_from("Y"); + // set Y_found, line 218 + B_Y_found = true; + } while (false); + cursor = v_1; + // do, line 219 + v_2 = cursor; + lab1: do { + // repeat, line 219 + replab2: while(true) + { + v_3 = cursor; + lab3: do { // (, line 219 - if (!(in_grouping(g_v, 97, 121))) + // goto, line 219 + golab4: while(true) { - break lab5; + v_4 = cursor; + lab5: do { + // (, line 219 + if (!(in_grouping(g_v, 97, 121))) + { + break lab5; + } + // [, line 219 + bra = cursor; + // literal, line 219 + if (!(eq_s(1, "y"))) + { + break lab5; + } + // ], line 219 + ket = cursor; + cursor = v_4; + break golab4; + } while (false); + cursor = v_4; + if (cursor >= limit) + { + break lab3; + } + cursor++; } - // [, line 219 - bra = cursor; - // literal, line 219 - if (!(eq_s(1, "y"))) + // <-, line 219 + slice_from("Y"); + // set Y_found, line 219 + B_Y_found = true; + continue replab2; + } while (false); + cursor = v_3; + break replab2; + } + } while (false); + cursor = v_2; + // call measure, line 221 + if (!r_measure()) + { + return false; + } + // backwards, line 223 + limit_backward = cursor; cursor = limit; + // (, line 223 + // do, line 224 + v_5 = limit - cursor; + lab6: do { + // (, line 224 + // call Step_1, line 224 + if (!r_Step_1()) + { + break lab6; + } + // set stemmed, line 224 + B_stemmed = true; + } while (false); + cursor = limit - v_5; + // do, line 225 + v_6 = limit - cursor; + lab7: do { + // (, line 225 + // call Step_2, line 225 + if (!r_Step_2()) + { + break lab7; + } + // set stemmed, line 225 + B_stemmed = true; + } while (false); + cursor = limit - v_6; + // do, line 226 + v_7 = limit - cursor; + lab8: do { + // (, line 226 + // call Step_3, line 226 + if (!r_Step_3()) + { + break lab8; + } + // set stemmed, line 226 + B_stemmed = true; + } while (false); + cursor = limit - v_7; + // do, line 227 + v_8 = limit - cursor; + lab9: do { + // (, line 227 + // call Step_4, line 227 + if (!r_Step_4()) + { + break lab9; + } + // set stemmed, line 227 + B_stemmed = true; + } while (false); + cursor = limit - v_8; + cursor = limit_backward; // unset GE_removed, line 229 + B_GE_removed = false; + // do, line 230 + v_9 = cursor; + lab10: do { + // (, line 230 + // and, line 230 + v_10 = cursor; + // call Lose_prefix, line 230 + if (!r_Lose_prefix()) + { + break lab10; + } + cursor = v_10; + // call measure, line 230 + if (!r_measure()) + { + break lab10; + } + } while (false); + cursor = v_9; + // backwards, line 231 + limit_backward = cursor; cursor = limit; + // (, line 231 + // do, line 232 + v_11 = limit - cursor; + lab11: do { + // (, line 232 + // Boolean test GE_removed, line 232 + if (!(B_GE_removed)) + { + break lab11; + } + // call Step_1c, line 232 + if (!r_Step_1c()) + { + break lab11; + } + } while (false); + cursor = limit - v_11; + cursor = limit_backward; // unset GE_removed, line 234 + B_GE_removed = false; + // do, line 235 + v_12 = cursor; + lab12: do { + // (, line 235 + // and, line 235 + v_13 = cursor; + // call Lose_infix, line 235 + if (!r_Lose_infix()) + { + break lab12; + } + cursor = v_13; + // call measure, line 235 + if (!r_measure()) + { + break lab12; + } + } while (false); + cursor = v_12; + // backwards, line 236 + limit_backward = cursor; cursor = limit; + // (, line 236 + // do, line 237 + v_14 = limit - cursor; + lab13: do { + // (, line 237 + // Boolean test GE_removed, line 237 + if (!(B_GE_removed)) + { + break lab13; + } + // call Step_1c, line 237 + if (!r_Step_1c()) + { + break lab13; + } + } while (false); + cursor = limit - v_14; + cursor = limit_backward; // backwards, line 239 + limit_backward = cursor; cursor = limit; + // (, line 239 + // do, line 240 + v_15 = limit - cursor; + lab14: do { + // (, line 240 + // call Step_7, line 240 + if (!r_Step_7()) + { + break lab14; + } + // set stemmed, line 240 + B_stemmed = true; + } while (false); + cursor = limit - v_15; + // do, line 241 + v_16 = limit - cursor; + lab15: do { + // (, line 241 + // or, line 241 + lab16: do { + lab17: do { + // Boolean test stemmed, line 241 + if (!(B_stemmed)) { - break lab5; + break lab17; } - // ], line 219 - ket = cursor; - cursor = v_4; - break golab4; + break lab16; } while (false); - cursor = v_4; - if (cursor >= limit) + // Boolean test GE_removed, line 241 + if (!(B_GE_removed)) { - break lab3; + break lab15; } - cursor++; - } - // <-, line 219 - slice_from("Y"); - // set Y_found, line 219 - B_Y_found = true; - continue replab2; - } while (false); - cursor = v_3; - break replab2; - } - } while (false); - cursor = v_2; - // call measure, line 221 - if (!r_measure()) - { - return false; - } - // backwards, line 223 - limit_backward = cursor; cursor = limit; - // (, line 223 - // do, line 224 - v_5 = limit - cursor; - lab6: do { - // (, line 224 - // call Step_1, line 224 - if (!r_Step_1()) - { - break lab6; - } - // set stemmed, line 224 - B_stemmed = true; - } while (false); - cursor = limit - v_5; - // do, line 225 - v_6 = limit - cursor; - lab7: do { - // (, line 225 - // call Step_2, line 225 - if (!r_Step_2()) - { - break lab7; - } - // set stemmed, line 225 - B_stemmed = true; - } while (false); - cursor = limit - v_6; - // do, line 226 - v_7 = limit - cursor; - lab8: do { - // (, line 226 - // call Step_3, line 226 - if (!r_Step_3()) - { - break lab8; - } - // set stemmed, line 226 - B_stemmed = true; - } while (false); - cursor = limit - v_7; - // do, line 227 - v_8 = limit - cursor; - lab9: do { - // (, line 227 - // call Step_4, line 227 - if (!r_Step_4()) - { - break lab9; - } - // set stemmed, line 227 - B_stemmed = true; - } while (false); - cursor = limit - v_8; - cursor = limit_backward; // unset GE_removed, line 229 - B_GE_removed = false; - // do, line 230 - v_9 = cursor; - lab10: do { - // (, line 230 - // and, line 230 - v_10 = cursor; - // call Lose_prefix, line 230 - if (!r_Lose_prefix()) - { - break lab10; - } - cursor = v_10; - // call measure, line 230 - if (!r_measure()) - { - break lab10; - } - } while (false); - cursor = v_9; - // backwards, line 231 - limit_backward = cursor; cursor = limit; - // (, line 231 - // do, line 232 - v_11 = limit - cursor; - lab11: do { - // (, line 232 - // Boolean test GE_removed, line 232 - if (!(B_GE_removed)) - { - break lab11; - } - // call Step_1c, line 232 - if (!r_Step_1c()) - { - break lab11; - } - } while (false); - cursor = limit - v_11; - cursor = limit_backward; // unset GE_removed, line 234 - B_GE_removed = false; - // do, line 235 - v_12 = cursor; - lab12: do { - // (, line 235 - // and, line 235 - v_13 = cursor; - // call Lose_infix, line 235 - if (!r_Lose_infix()) - { - break lab12; - } - cursor = v_13; - // call measure, line 235 - if (!r_measure()) - { - break lab12; - } - } while (false); - cursor = v_12; - // backwards, line 236 - limit_backward = cursor; cursor = limit; - // (, line 236 - // do, line 237 - v_14 = limit - cursor; - lab13: do { - // (, line 237 - // Boolean test GE_removed, line 237 - if (!(B_GE_removed)) - { - break lab13; - } - // call Step_1c, line 237 - if (!r_Step_1c()) - { - break lab13; - } - } while (false); - cursor = limit - v_14; - cursor = limit_backward; // backwards, line 239 - limit_backward = cursor; cursor = limit; - // (, line 239 - // do, line 240 - v_15 = limit - cursor; - lab14: do { - // (, line 240 - // call Step_7, line 240 - if (!r_Step_7()) - { - break lab14; - } - // set stemmed, line 240 - B_stemmed = true; - } while (false); - cursor = limit - v_15; - // do, line 241 - v_16 = limit - cursor; - lab15: do { - // (, line 241 - // or, line 241 - lab16: do { - lab17: do { - // Boolean test stemmed, line 241 - if (!(B_stemmed)) + } while (false); + // call Step_6, line 241 + if (!r_Step_6()) { - break lab17; + break lab15; } - break lab16; } while (false); - // Boolean test GE_removed, line 241 - if (!(B_GE_removed)) - { - break lab15; - } - } while (false); - // call Step_6, line 241 - if (!r_Step_6()) - { - break lab15; - } - } while (false); - cursor = limit - v_16; - cursor = limit_backward; // do, line 243 - v_18 = cursor; - lab18: do { - // (, line 243 - // Boolean test Y_found, line 243 - if (!(B_Y_found)) - { - break lab18; - } - // repeat, line 243 - replab19: while(true) - { - v_19 = cursor; - lab20: do { + cursor = limit - v_16; + cursor = limit_backward; // do, line 243 + v_18 = cursor; + lab18: do { // (, line 243 - // goto, line 243 - golab21: while(true) + // Boolean test Y_found, line 243 + if (!(B_Y_found)) { - v_20 = cursor; - lab22: do { + break lab18; + } + // repeat, line 243 + replab19: while(true) + { + v_19 = cursor; + lab20: do { // (, line 243 - // [, line 243 - bra = cursor; - // literal, line 243 - if (!(eq_s(1, "Y"))) + // goto, line 243 + golab21: while(true) { - break lab22; + v_20 = cursor; + lab22: do { + // (, line 243 + // [, line 243 + bra = cursor; + // literal, line 243 + if (!(eq_s(1, "Y"))) + { + break lab22; + } + // ], line 243 + ket = cursor; + cursor = v_20; + break golab21; + } while (false); + cursor = v_20; + if (cursor >= limit) + { + break lab20; + } + cursor++; } - // ], line 243 - ket = cursor; - cursor = v_20; - break golab21; + // <-, line 243 + slice_from("y"); + continue replab19; } while (false); - cursor = v_20; - if (cursor >= limit) - { - break lab20; - } - cursor++; + cursor = v_19; + break replab19; } - // <-, line 243 - slice_from("y"); - continue replab19; } while (false); - cursor = v_19; - break replab19; + cursor = v_18; + return true; } - } while (false); - cursor = v_18; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof KpStemmer; } + @Override + public int hashCode() { + return KpStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/LovinsStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/LovinsStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/LovinsStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/LovinsStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,1567 +1,1574 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class LovinsStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "d", -1, -1, "", this), - new Among ( "f", -1, -1, "", this), - new Among ( "ph", -1, -1, "", this), - new Among ( "th", -1, -1, "", this), - new Among ( "l", -1, -1, "", this), - new Among ( "er", -1, -1, "", this), - new Among ( "or", -1, -1, "", this), - new Among ( "es", -1, -1, "", this), - new Among ( "t", -1, -1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "s'", -1, 1, "r_A", this), - new Among ( "a", -1, 1, "r_A", this), - new Among ( "ia", 1, 1, "r_A", this), - new Among ( "ata", 1, 1, "r_A", this), - new Among ( "ic", -1, 1, "r_A", this), - new Among ( "aic", 4, 1, "r_A", this), - new Among ( "allic", 4, 1, "r_BB", this), - new Among ( "aric", 4, 1, "r_A", this), - new Among ( "atic", 4, 1, "r_B", this), - new Among ( "itic", 4, 1, "r_H", this), - new Among ( "antic", 4, 1, "r_C", this), - new Among ( "istic", 4, 1, "r_A", this), - new Among ( "alistic", 11, 1, "r_B", this), - new Among ( "aristic", 11, 1, "r_A", this), - new Among ( "ivistic", 11, 1, "r_A", this), - new Among ( "ed", -1, 1, "r_E", this), - new Among ( "anced", 15, 1, "r_B", this), - new Among ( "enced", 15, 1, "r_A", this), - new Among ( "ished", 15, 1, "r_A", this), - new Among ( "ied", 15, 1, "r_A", this), - new Among ( "ened", 15, 1, "r_E", this), - new Among ( "ioned", 15, 1, "r_A", this), - new Among ( "ated", 15, 1, "r_I", this), - new Among ( "ented", 15, 1, "r_C", this), - new Among ( "ized", 15, 1, "r_F", this), - new Among ( "arized", 24, 1, "r_A", this), - new Among ( "oid", -1, 1, "r_A", this), - new Among ( "aroid", 26, 1, "r_A", this), - new Among ( "hood", -1, 1, "r_A", this), - new Among ( "ehood", 28, 1, "r_A", this), - new Among ( "ihood", 28, 1, "r_A", this), - new Among ( "elihood", 30, 1, "r_E", this), - new Among ( "ward", -1, 1, "r_A", this), - new Among ( "e", -1, 1, "r_A", this), - new Among ( "ae", 33, 1, "r_A", this), - new Among ( "ance", 33, 1, "r_B", this), - new Among ( "icance", 35, 1, "r_A", this), - new Among ( "ence", 33, 1, "r_A", this), - new Among ( "ide", 33, 1, "r_L", this), - new Among ( "icide", 38, 1, "r_A", this), - new Among ( "otide", 38, 1, "r_A", this), - new Among ( "age", 33, 1, "r_B", this), - new Among ( "able", 33, 1, "r_A", this), - new Among ( "atable", 42, 1, "r_A", this), - new Among ( "izable", 42, 1, "r_E", this), - new Among ( "arizable", 44, 1, "r_A", this), - new Among ( "ible", 33, 1, "r_A", this), - new Among ( "encible", 46, 1, "r_A", this), - new Among ( "ene", 33, 1, "r_E", this), - new Among ( "ine", 33, 1, "r_M", this), - new Among ( "idine", 49, 1, "r_I", this), - new Among ( "one", 33, 1, "r_R", this), - new Among ( "ature", 33, 1, "r_E", this), - new Among ( "eature", 52, 1, "r_Z", this), - new Among ( "ese", 33, 1, "r_A", this), - new Among ( "wise", 33, 1, "r_A", this), - new Among ( "ate", 33, 1, "r_A", this), - new Among ( "entiate", 56, 1, "r_A", this), - new Among ( "inate", 56, 1, "r_A", this), - new Among ( "ionate", 56, 1, "r_D", this), - new Among ( "ite", 33, 1, "r_AA", this), - new Among ( "ive", 33, 1, "r_A", this), - new Among ( "ative", 61, 1, "r_A", this), - new Among ( "ize", 33, 1, "r_F", this), - new Among ( "alize", 63, 1, "r_A", this), - new Among ( "icalize", 64, 1, "r_A", this), - new Among ( "ialize", 64, 1, "r_A", this), - new Among ( "entialize", 66, 1, "r_A", this), - new Among ( "ionalize", 64, 1, "r_A", this), - new Among ( "arize", 63, 1, "r_A", this), - new Among ( "ing", -1, 1, "r_N", this), - new Among ( "ancing", 70, 1, "r_B", this), - new Among ( "encing", 70, 1, "r_A", this), - new Among ( "aging", 70, 1, "r_B", this), - new Among ( "ening", 70, 1, "r_E", this), - new Among ( "ioning", 70, 1, "r_A", this), - new Among ( "ating", 70, 1, "r_I", this), - new Among ( "enting", 70, 1, "r_C", this), - new Among ( "ying", 70, 1, "r_B", this), - new Among ( "izing", 70, 1, "r_F", this), - new Among ( "arizing", 79, 1, "r_A", this), - new Among ( "ish", -1, 1, "r_C", this), - new Among ( "yish", 81, 1, "r_A", this), - new Among ( "i", -1, 1, "r_A", this), - new Among ( "al", -1, 1, "r_BB", this), - new Among ( "ical", 84, 1, "r_A", this), - new Among ( "aical", 85, 1, "r_A", this), - new Among ( "istical", 85, 1, "r_A", this), - new Among ( "oidal", 84, 1, "r_A", this), - new Among ( "eal", 84, 1, "r_Y", this), - new Among ( "ial", 84, 1, "r_A", this), - new Among ( "ancial", 90, 1, "r_A", this), - new Among ( "arial", 90, 1, "r_A", this), - new Among ( "ential", 90, 1, "r_A", this), - new Among ( "ional", 84, 1, "r_A", this), - new Among ( "ational", 94, 1, "r_B", this), - new Among ( "izational", 95, 1, "r_A", this), - new Among ( "ental", 84, 1, "r_A", this), - new Among ( "ful", -1, 1, "r_A", this), - new Among ( "eful", 98, 1, "r_A", this), - new Among ( "iful", 98, 1, "r_A", this), - new Among ( "yl", -1, 1, "r_R", this), - new Among ( "ism", -1, 1, "r_B", this), - new Among ( "icism", 102, 1, "r_A", this), - new Among ( "oidism", 102, 1, "r_A", this), - new Among ( "alism", 102, 1, "r_B", this), - new Among ( "icalism", 105, 1, "r_A", this), - new Among ( "ionalism", 105, 1, "r_A", this), - new Among ( "inism", 102, 1, "r_J", this), - new Among ( "ativism", 102, 1, "r_A", this), - new Among ( "um", -1, 1, "r_U", this), - new Among ( "ium", 110, 1, "r_A", this), - new Among ( "ian", -1, 1, "r_A", this), - new Among ( "ician", 112, 1, "r_A", this), - new Among ( "en", -1, 1, "r_F", this), - new Among ( "ogen", 114, 1, "r_A", this), - new Among ( "on", -1, 1, "r_S", this), - new Among ( "ion", 116, 1, "r_Q", this), - new Among ( "ation", 117, 1, "r_B", this), - new Among ( "ication", 118, 1, "r_G", this), - new Among ( "entiation", 118, 1, "r_A", this), - new Among ( "ination", 118, 1, "r_A", this), - new Among ( "isation", 118, 1, "r_A", this), - new Among ( "arisation", 122, 1, "r_A", this), - new Among ( "entation", 118, 1, "r_A", this), - new Among ( "ization", 118, 1, "r_F", this), - new Among ( "arization", 125, 1, "r_A", this), - new Among ( "action", 117, 1, "r_G", this), - new Among ( "o", -1, 1, "r_A", this), - new Among ( "ar", -1, 1, "r_X", this), - new Among ( "ear", 129, 1, "r_Y", this), - new Among ( "ier", -1, 1, "r_A", this), - new Among ( "ariser", -1, 1, "r_A", this), - new Among ( "izer", -1, 1, "r_F", this), - new Among ( "arizer", 133, 1, "r_A", this), - new Among ( "or", -1, 1, "r_T", this), - new Among ( "ator", 135, 1, "r_A", this), - new Among ( "s", -1, 1, "r_W", this), - new Among ( "'s", 137, 1, "r_A", this), - new Among ( "as", 137, 1, "r_B", this), - new Among ( "ics", 137, 1, "r_A", this), - new Among ( "istics", 140, 1, "r_A", this), - new Among ( "es", 137, 1, "r_E", this), - new Among ( "ances", 142, 1, "r_B", this), - new Among ( "ences", 142, 1, "r_A", this), - new Among ( "ides", 142, 1, "r_L", this), - new Among ( "oides", 145, 1, "r_A", this), - new Among ( "ages", 142, 1, "r_B", this), - new Among ( "ies", 142, 1, "r_P", this), - new Among ( "acies", 148, 1, "r_A", this), - new Among ( "ancies", 148, 1, "r_A", this), - new Among ( "encies", 148, 1, "r_A", this), - new Among ( "aries", 148, 1, "r_A", this), - new Among ( "ities", 148, 1, "r_A", this), - new Among ( "alities", 153, 1, "r_A", this), - new Among ( "ivities", 153, 1, "r_A", this), - new Among ( "ines", 142, 1, "r_M", this), - new Among ( "nesses", 142, 1, "r_A", this), - new Among ( "ates", 142, 1, "r_A", this), - new Among ( "atives", 142, 1, "r_A", this), - new Among ( "ings", 137, 1, "r_N", this), - new Among ( "is", 137, 1, "r_A", this), - new Among ( "als", 137, 1, "r_BB", this), - new Among ( "ials", 162, 1, "r_A", this), - new Among ( "entials", 163, 1, "r_A", this), - new Among ( "ionals", 162, 1, "r_A", this), - new Among ( "isms", 137, 1, "r_B", this), - new Among ( "ians", 137, 1, "r_A", this), - new Among ( "icians", 167, 1, "r_A", this), - new Among ( "ions", 137, 1, "r_B", this), - new Among ( "ations", 169, 1, "r_B", this), - new Among ( "arisations", 170, 1, "r_A", this), - new Among ( "entations", 170, 1, "r_A", this), - new Among ( "izations", 170, 1, "r_A", this), - new Among ( "arizations", 173, 1, "r_A", this), - new Among ( "ars", 137, 1, "r_O", this), - new Among ( "iers", 137, 1, "r_A", this), - new Among ( "izers", 137, 1, "r_F", this), - new Among ( "ators", 137, 1, "r_A", this), - new Among ( "less", 137, 1, "r_A", this), - new Among ( "eless", 179, 1, "r_A", this), - new Among ( "ness", 137, 1, "r_A", this), - new Among ( "eness", 181, 1, "r_E", this), - new Among ( "ableness", 182, 1, "r_A", this), - new Among ( "eableness", 183, 1, "r_E", this), - new Among ( "ibleness", 182, 1, "r_A", this), - new Among ( "ateness", 182, 1, "r_A", this), - new Among ( "iteness", 182, 1, "r_A", this), - new Among ( "iveness", 182, 1, "r_A", this), - new Among ( "ativeness", 188, 1, "r_A", this), - new Among ( "ingness", 181, 1, "r_A", this), - new Among ( "ishness", 181, 1, "r_A", this), - new Among ( "iness", 181, 1, "r_A", this), - new Among ( "ariness", 192, 1, "r_E", this), - new Among ( "alness", 181, 1, "r_A", this), - new Among ( "icalness", 194, 1, "r_A", this), - new Among ( "antialness", 194, 1, "r_A", this), - new Among ( "entialness", 194, 1, "r_A", this), - new Among ( "ionalness", 194, 1, "r_A", this), - new Among ( "fulness", 181, 1, "r_A", this), - new Among ( "lessness", 181, 1, "r_A", this), - new Among ( "ousness", 181, 1, "r_A", this), - new Among ( "eousness", 201, 1, "r_A", this), - new Among ( "iousness", 201, 1, "r_A", this), - new Among ( "itousness", 201, 1, "r_A", this), - new Among ( "entness", 181, 1, "r_A", this), - new Among ( "ants", 137, 1, "r_B", this), - new Among ( "ists", 137, 1, "r_A", this), - new Among ( "icists", 207, 1, "r_A", this), - new Among ( "us", 137, 1, "r_V", this), - new Among ( "ous", 209, 1, "r_A", this), - new Among ( "eous", 210, 1, "r_A", this), - new Among ( "aceous", 211, 1, "r_A", this), - new Among ( "antaneous", 211, 1, "r_A", this), - new Among ( "ious", 210, 1, "r_A", this), - new Among ( "acious", 214, 1, "r_B", this), - new Among ( "itous", 210, 1, "r_A", this), - new Among ( "ant", -1, 1, "r_B", this), - new Among ( "icant", 217, 1, "r_A", this), - new Among ( "ent", -1, 1, "r_C", this), - new Among ( "ement", 219, 1, "r_A", this), - new Among ( "izement", 220, 1, "r_A", this), - new Among ( "ist", -1, 1, "r_A", this), - new Among ( "icist", 222, 1, "r_A", this), - new Among ( "alist", 222, 1, "r_A", this), - new Among ( "icalist", 224, 1, "r_A", this), - new Among ( "ialist", 224, 1, "r_A", this), - new Among ( "ionist", 222, 1, "r_A", this), - new Among ( "entist", 222, 1, "r_A", this), - new Among ( "y", -1, 1, "r_B", this), - new Among ( "acy", 229, 1, "r_A", this), - new Among ( "ancy", 229, 1, "r_B", this), - new Among ( "ency", 229, 1, "r_A", this), - new Among ( "ly", 229, 1, "r_B", this), - new Among ( "ealy", 233, 1, "r_Y", this), - new Among ( "ably", 233, 1, "r_A", this), - new Among ( "ibly", 233, 1, "r_A", this), - new Among ( "edly", 233, 1, "r_E", this), - new Among ( "iedly", 237, 1, "r_A", this), - new Among ( "ely", 233, 1, "r_E", this), - new Among ( "ately", 239, 1, "r_A", this), - new Among ( "ively", 239, 1, "r_A", this), - new Among ( "atively", 241, 1, "r_A", this), - new Among ( "ingly", 233, 1, "r_B", this), - new Among ( "atingly", 243, 1, "r_A", this), - new Among ( "ily", 233, 1, "r_A", this), - new Among ( "lily", 245, 1, "r_A", this), - new Among ( "arily", 245, 1, "r_A", this), - new Among ( "ally", 233, 1, "r_B", this), - new Among ( "ically", 248, 1, "r_A", this), - new Among ( "aically", 249, 1, "r_A", this), - new Among ( "allically", 249, 1, "r_C", this), - new Among ( "istically", 249, 1, "r_A", this), - new Among ( "alistically", 252, 1, "r_B", this), - new Among ( "oidally", 248, 1, "r_A", this), - new Among ( "ially", 248, 1, "r_A", this), - new Among ( "entially", 255, 1, "r_A", this), - new Among ( "ionally", 248, 1, "r_A", this), - new Among ( "ationally", 257, 1, "r_B", this), - new Among ( "izationally", 258, 1, "r_B", this), - new Among ( "entally", 248, 1, "r_A", this), - new Among ( "fully", 233, 1, "r_A", this), - new Among ( "efully", 261, 1, "r_A", this), - new Among ( "ifully", 261, 1, "r_A", this), - new Among ( "enly", 233, 1, "r_E", this), - new Among ( "arly", 233, 1, "r_K", this), - new Among ( "early", 265, 1, "r_Y", this), - new Among ( "lessly", 233, 1, "r_A", this), - new Among ( "ously", 233, 1, "r_A", this), - new Among ( "eously", 268, 1, "r_A", this), - new Among ( "iously", 268, 1, "r_A", this), - new Among ( "ently", 233, 1, "r_A", this), - new Among ( "ary", 229, 1, "r_F", this), - new Among ( "ery", 229, 1, "r_E", this), - new Among ( "icianry", 229, 1, "r_A", this), - new Among ( "atory", 229, 1, "r_A", this), - new Among ( "ity", 229, 1, "r_A", this), - new Among ( "acity", 276, 1, "r_A", this), - new Among ( "icity", 276, 1, "r_A", this), - new Among ( "eity", 276, 1, "r_A", this), - new Among ( "ality", 276, 1, "r_A", this), - new Among ( "icality", 280, 1, "r_A", this), - new Among ( "iality", 280, 1, "r_A", this), - new Among ( "antiality", 282, 1, "r_A", this), - new Among ( "entiality", 282, 1, "r_A", this), - new Among ( "ionality", 280, 1, "r_A", this), - new Among ( "elity", 276, 1, "r_A", this), - new Among ( "ability", 276, 1, "r_A", this), - new Among ( "izability", 287, 1, "r_A", this), - new Among ( "arizability", 288, 1, "r_A", this), - new Among ( "ibility", 276, 1, "r_A", this), - new Among ( "inity", 276, 1, "r_CC", this), - new Among ( "arity", 276, 1, "r_B", this), - new Among ( "ivity", 276, 1, "r_A", this) - }; + private final static LovinsStemmer methodObject = new LovinsStemmer (); - private Among a_2[] = { - new Among ( "bb", -1, -1, "", this), - new Among ( "dd", -1, -1, "", this), - new Among ( "gg", -1, -1, "", this), - new Among ( "ll", -1, -1, "", this), - new Among ( "mm", -1, -1, "", this), - new Among ( "nn", -1, -1, "", this), - new Among ( "pp", -1, -1, "", this), - new Among ( "rr", -1, -1, "", this), - new Among ( "ss", -1, -1, "", this), - new Among ( "tt", -1, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "d", -1, -1, "", methodObject ), + new Among ( "f", -1, -1, "", methodObject ), + new Among ( "ph", -1, -1, "", methodObject ), + new Among ( "th", -1, -1, "", methodObject ), + new Among ( "l", -1, -1, "", methodObject ), + new Among ( "er", -1, -1, "", methodObject ), + new Among ( "or", -1, -1, "", methodObject ), + new Among ( "es", -1, -1, "", methodObject ), + new Among ( "t", -1, -1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "uad", -1, 18, "", this), - new Among ( "vad", -1, 19, "", this), - new Among ( "cid", -1, 20, "", this), - new Among ( "lid", -1, 21, "", this), - new Among ( "erid", -1, 22, "", this), - new Among ( "pand", -1, 23, "", this), - new Among ( "end", -1, 24, "", this), - new Among ( "ond", -1, 25, "", this), - new Among ( "lud", -1, 26, "", this), - new Among ( "rud", -1, 27, "", this), - new Among ( "ul", -1, 9, "", this), - new Among ( "her", -1, 28, "", this), - new Among ( "metr", -1, 7, "", this), - new Among ( "istr", -1, 6, "", this), - new Among ( "urs", -1, 5, "", this), - new Among ( "uct", -1, 2, "", this), - new Among ( "et", -1, 32, "", this), - new Among ( "mit", -1, 29, "", this), - new Among ( "ent", -1, 30, "", this), - new Among ( "umpt", -1, 3, "", this), - new Among ( "rpt", -1, 4, "", this), - new Among ( "ert", -1, 31, "", this), - new Among ( "yt", -1, 33, "", this), - new Among ( "iev", -1, 1, "", this), - new Among ( "olv", -1, 8, "", this), - new Among ( "ax", -1, 14, "", this), - new Among ( "ex", -1, 15, "", this), - new Among ( "bex", 26, 10, "", this), - new Among ( "dex", 26, 11, "", this), - new Among ( "pex", 26, 12, "", this), - new Among ( "tex", 26, 13, "", this), - new Among ( "ix", -1, 16, "", this), - new Among ( "lux", -1, 17, "", this), - new Among ( "yz", -1, 34, "", this) - }; + private final static Among a_1[] = { + new Among ( "s'", -1, 1, "r_A", methodObject ), + new Among ( "a", -1, 1, "r_A", methodObject ), + new Among ( "ia", 1, 1, "r_A", methodObject ), + new Among ( "ata", 1, 1, "r_A", methodObject ), + new Among ( "ic", -1, 1, "r_A", methodObject ), + new Among ( "aic", 4, 1, "r_A", methodObject ), + new Among ( "allic", 4, 1, "r_BB", methodObject ), + new Among ( "aric", 4, 1, "r_A", methodObject ), + new Among ( "atic", 4, 1, "r_B", methodObject ), + new Among ( "itic", 4, 1, "r_H", methodObject ), + new Among ( "antic", 4, 1, "r_C", methodObject ), + new Among ( "istic", 4, 1, "r_A", methodObject ), + new Among ( "alistic", 11, 1, "r_B", methodObject ), + new Among ( "aristic", 11, 1, "r_A", methodObject ), + new Among ( "ivistic", 11, 1, "r_A", methodObject ), + new Among ( "ed", -1, 1, "r_E", methodObject ), + new Among ( "anced", 15, 1, "r_B", methodObject ), + new Among ( "enced", 15, 1, "r_A", methodObject ), + new Among ( "ished", 15, 1, "r_A", methodObject ), + new Among ( "ied", 15, 1, "r_A", methodObject ), + new Among ( "ened", 15, 1, "r_E", methodObject ), + new Among ( "ioned", 15, 1, "r_A", methodObject ), + new Among ( "ated", 15, 1, "r_I", methodObject ), + new Among ( "ented", 15, 1, "r_C", methodObject ), + new Among ( "ized", 15, 1, "r_F", methodObject ), + new Among ( "arized", 24, 1, "r_A", methodObject ), + new Among ( "oid", -1, 1, "r_A", methodObject ), + new Among ( "aroid", 26, 1, "r_A", methodObject ), + new Among ( "hood", -1, 1, "r_A", methodObject ), + new Among ( "ehood", 28, 1, "r_A", methodObject ), + new Among ( "ihood", 28, 1, "r_A", methodObject ), + new Among ( "elihood", 30, 1, "r_E", methodObject ), + new Among ( "ward", -1, 1, "r_A", methodObject ), + new Among ( "e", -1, 1, "r_A", methodObject ), + new Among ( "ae", 33, 1, "r_A", methodObject ), + new Among ( "ance", 33, 1, "r_B", methodObject ), + new Among ( "icance", 35, 1, "r_A", methodObject ), + new Among ( "ence", 33, 1, "r_A", methodObject ), + new Among ( "ide", 33, 1, "r_L", methodObject ), + new Among ( "icide", 38, 1, "r_A", methodObject ), + new Among ( "otide", 38, 1, "r_A", methodObject ), + new Among ( "age", 33, 1, "r_B", methodObject ), + new Among ( "able", 33, 1, "r_A", methodObject ), + new Among ( "atable", 42, 1, "r_A", methodObject ), + new Among ( "izable", 42, 1, "r_E", methodObject ), + new Among ( "arizable", 44, 1, "r_A", methodObject ), + new Among ( "ible", 33, 1, "r_A", methodObject ), + new Among ( "encible", 46, 1, "r_A", methodObject ), + new Among ( "ene", 33, 1, "r_E", methodObject ), + new Among ( "ine", 33, 1, "r_M", methodObject ), + new Among ( "idine", 49, 1, "r_I", methodObject ), + new Among ( "one", 33, 1, "r_R", methodObject ), + new Among ( "ature", 33, 1, "r_E", methodObject ), + new Among ( "eature", 52, 1, "r_Z", methodObject ), + new Among ( "ese", 33, 1, "r_A", methodObject ), + new Among ( "wise", 33, 1, "r_A", methodObject ), + new Among ( "ate", 33, 1, "r_A", methodObject ), + new Among ( "entiate", 56, 1, "r_A", methodObject ), + new Among ( "inate", 56, 1, "r_A", methodObject ), + new Among ( "ionate", 56, 1, "r_D", methodObject ), + new Among ( "ite", 33, 1, "r_AA", methodObject ), + new Among ( "ive", 33, 1, "r_A", methodObject ), + new Among ( "ative", 61, 1, "r_A", methodObject ), + new Among ( "ize", 33, 1, "r_F", methodObject ), + new Among ( "alize", 63, 1, "r_A", methodObject ), + new Among ( "icalize", 64, 1, "r_A", methodObject ), + new Among ( "ialize", 64, 1, "r_A", methodObject ), + new Among ( "entialize", 66, 1, "r_A", methodObject ), + new Among ( "ionalize", 64, 1, "r_A", methodObject ), + new Among ( "arize", 63, 1, "r_A", methodObject ), + new Among ( "ing", -1, 1, "r_N", methodObject ), + new Among ( "ancing", 70, 1, "r_B", methodObject ), + new Among ( "encing", 70, 1, "r_A", methodObject ), + new Among ( "aging", 70, 1, "r_B", methodObject ), + new Among ( "ening", 70, 1, "r_E", methodObject ), + new Among ( "ioning", 70, 1, "r_A", methodObject ), + new Among ( "ating", 70, 1, "r_I", methodObject ), + new Among ( "enting", 70, 1, "r_C", methodObject ), + new Among ( "ying", 70, 1, "r_B", methodObject ), + new Among ( "izing", 70, 1, "r_F", methodObject ), + new Among ( "arizing", 79, 1, "r_A", methodObject ), + new Among ( "ish", -1, 1, "r_C", methodObject ), + new Among ( "yish", 81, 1, "r_A", methodObject ), + new Among ( "i", -1, 1, "r_A", methodObject ), + new Among ( "al", -1, 1, "r_BB", methodObject ), + new Among ( "ical", 84, 1, "r_A", methodObject ), + new Among ( "aical", 85, 1, "r_A", methodObject ), + new Among ( "istical", 85, 1, "r_A", methodObject ), + new Among ( "oidal", 84, 1, "r_A", methodObject ), + new Among ( "eal", 84, 1, "r_Y", methodObject ), + new Among ( "ial", 84, 1, "r_A", methodObject ), + new Among ( "ancial", 90, 1, "r_A", methodObject ), + new Among ( "arial", 90, 1, "r_A", methodObject ), + new Among ( "ential", 90, 1, "r_A", methodObject ), + new Among ( "ional", 84, 1, "r_A", methodObject ), + new Among ( "ational", 94, 1, "r_B", methodObject ), + new Among ( "izational", 95, 1, "r_A", methodObject ), + new Among ( "ental", 84, 1, "r_A", methodObject ), + new Among ( "ful", -1, 1, "r_A", methodObject ), + new Among ( "eful", 98, 1, "r_A", methodObject ), + new Among ( "iful", 98, 1, "r_A", methodObject ), + new Among ( "yl", -1, 1, "r_R", methodObject ), + new Among ( "ism", -1, 1, "r_B", methodObject ), + new Among ( "icism", 102, 1, "r_A", methodObject ), + new Among ( "oidism", 102, 1, "r_A", methodObject ), + new Among ( "alism", 102, 1, "r_B", methodObject ), + new Among ( "icalism", 105, 1, "r_A", methodObject ), + new Among ( "ionalism", 105, 1, "r_A", methodObject ), + new Among ( "inism", 102, 1, "r_J", methodObject ), + new Among ( "ativism", 102, 1, "r_A", methodObject ), + new Among ( "um", -1, 1, "r_U", methodObject ), + new Among ( "ium", 110, 1, "r_A", methodObject ), + new Among ( "ian", -1, 1, "r_A", methodObject ), + new Among ( "ician", 112, 1, "r_A", methodObject ), + new Among ( "en", -1, 1, "r_F", methodObject ), + new Among ( "ogen", 114, 1, "r_A", methodObject ), + new Among ( "on", -1, 1, "r_S", methodObject ), + new Among ( "ion", 116, 1, "r_Q", methodObject ), + new Among ( "ation", 117, 1, "r_B", methodObject ), + new Among ( "ication", 118, 1, "r_G", methodObject ), + new Among ( "entiation", 118, 1, "r_A", methodObject ), + new Among ( "ination", 118, 1, "r_A", methodObject ), + new Among ( "isation", 118, 1, "r_A", methodObject ), + new Among ( "arisation", 122, 1, "r_A", methodObject ), + new Among ( "entation", 118, 1, "r_A", methodObject ), + new Among ( "ization", 118, 1, "r_F", methodObject ), + new Among ( "arization", 125, 1, "r_A", methodObject ), + new Among ( "action", 117, 1, "r_G", methodObject ), + new Among ( "o", -1, 1, "r_A", methodObject ), + new Among ( "ar", -1, 1, "r_X", methodObject ), + new Among ( "ear", 129, 1, "r_Y", methodObject ), + new Among ( "ier", -1, 1, "r_A", methodObject ), + new Among ( "ariser", -1, 1, "r_A", methodObject ), + new Among ( "izer", -1, 1, "r_F", methodObject ), + new Among ( "arizer", 133, 1, "r_A", methodObject ), + new Among ( "or", -1, 1, "r_T", methodObject ), + new Among ( "ator", 135, 1, "r_A", methodObject ), + new Among ( "s", -1, 1, "r_W", methodObject ), + new Among ( "'s", 137, 1, "r_A", methodObject ), + new Among ( "as", 137, 1, "r_B", methodObject ), + new Among ( "ics", 137, 1, "r_A", methodObject ), + new Among ( "istics", 140, 1, "r_A", methodObject ), + new Among ( "es", 137, 1, "r_E", methodObject ), + new Among ( "ances", 142, 1, "r_B", methodObject ), + new Among ( "ences", 142, 1, "r_A", methodObject ), + new Among ( "ides", 142, 1, "r_L", methodObject ), + new Among ( "oides", 145, 1, "r_A", methodObject ), + new Among ( "ages", 142, 1, "r_B", methodObject ), + new Among ( "ies", 142, 1, "r_P", methodObject ), + new Among ( "acies", 148, 1, "r_A", methodObject ), + new Among ( "ancies", 148, 1, "r_A", methodObject ), + new Among ( "encies", 148, 1, "r_A", methodObject ), + new Among ( "aries", 148, 1, "r_A", methodObject ), + new Among ( "ities", 148, 1, "r_A", methodObject ), + new Among ( "alities", 153, 1, "r_A", methodObject ), + new Among ( "ivities", 153, 1, "r_A", methodObject ), + new Among ( "ines", 142, 1, "r_M", methodObject ), + new Among ( "nesses", 142, 1, "r_A", methodObject ), + new Among ( "ates", 142, 1, "r_A", methodObject ), + new Among ( "atives", 142, 1, "r_A", methodObject ), + new Among ( "ings", 137, 1, "r_N", methodObject ), + new Among ( "is", 137, 1, "r_A", methodObject ), + new Among ( "als", 137, 1, "r_BB", methodObject ), + new Among ( "ials", 162, 1, "r_A", methodObject ), + new Among ( "entials", 163, 1, "r_A", methodObject ), + new Among ( "ionals", 162, 1, "r_A", methodObject ), + new Among ( "isms", 137, 1, "r_B", methodObject ), + new Among ( "ians", 137, 1, "r_A", methodObject ), + new Among ( "icians", 167, 1, "r_A", methodObject ), + new Among ( "ions", 137, 1, "r_B", methodObject ), + new Among ( "ations", 169, 1, "r_B", methodObject ), + new Among ( "arisations", 170, 1, "r_A", methodObject ), + new Among ( "entations", 170, 1, "r_A", methodObject ), + new Among ( "izations", 170, 1, "r_A", methodObject ), + new Among ( "arizations", 173, 1, "r_A", methodObject ), + new Among ( "ars", 137, 1, "r_O", methodObject ), + new Among ( "iers", 137, 1, "r_A", methodObject ), + new Among ( "izers", 137, 1, "r_F", methodObject ), + new Among ( "ators", 137, 1, "r_A", methodObject ), + new Among ( "less", 137, 1, "r_A", methodObject ), + new Among ( "eless", 179, 1, "r_A", methodObject ), + new Among ( "ness", 137, 1, "r_A", methodObject ), + new Among ( "eness", 181, 1, "r_E", methodObject ), + new Among ( "ableness", 182, 1, "r_A", methodObject ), + new Among ( "eableness", 183, 1, "r_E", methodObject ), + new Among ( "ibleness", 182, 1, "r_A", methodObject ), + new Among ( "ateness", 182, 1, "r_A", methodObject ), + new Among ( "iteness", 182, 1, "r_A", methodObject ), + new Among ( "iveness", 182, 1, "r_A", methodObject ), + new Among ( "ativeness", 188, 1, "r_A", methodObject ), + new Among ( "ingness", 181, 1, "r_A", methodObject ), + new Among ( "ishness", 181, 1, "r_A", methodObject ), + new Among ( "iness", 181, 1, "r_A", methodObject ), + new Among ( "ariness", 192, 1, "r_E", methodObject ), + new Among ( "alness", 181, 1, "r_A", methodObject ), + new Among ( "icalness", 194, 1, "r_A", methodObject ), + new Among ( "antialness", 194, 1, "r_A", methodObject ), + new Among ( "entialness", 194, 1, "r_A", methodObject ), + new Among ( "ionalness", 194, 1, "r_A", methodObject ), + new Among ( "fulness", 181, 1, "r_A", methodObject ), + new Among ( "lessness", 181, 1, "r_A", methodObject ), + new Among ( "ousness", 181, 1, "r_A", methodObject ), + new Among ( "eousness", 201, 1, "r_A", methodObject ), + new Among ( "iousness", 201, 1, "r_A", methodObject ), + new Among ( "itousness", 201, 1, "r_A", methodObject ), + new Among ( "entness", 181, 1, "r_A", methodObject ), + new Among ( "ants", 137, 1, "r_B", methodObject ), + new Among ( "ists", 137, 1, "r_A", methodObject ), + new Among ( "icists", 207, 1, "r_A", methodObject ), + new Among ( "us", 137, 1, "r_V", methodObject ), + new Among ( "ous", 209, 1, "r_A", methodObject ), + new Among ( "eous", 210, 1, "r_A", methodObject ), + new Among ( "aceous", 211, 1, "r_A", methodObject ), + new Among ( "antaneous", 211, 1, "r_A", methodObject ), + new Among ( "ious", 210, 1, "r_A", methodObject ), + new Among ( "acious", 214, 1, "r_B", methodObject ), + new Among ( "itous", 210, 1, "r_A", methodObject ), + new Among ( "ant", -1, 1, "r_B", methodObject ), + new Among ( "icant", 217, 1, "r_A", methodObject ), + new Among ( "ent", -1, 1, "r_C", methodObject ), + new Among ( "ement", 219, 1, "r_A", methodObject ), + new Among ( "izement", 220, 1, "r_A", methodObject ), + new Among ( "ist", -1, 1, "r_A", methodObject ), + new Among ( "icist", 222, 1, "r_A", methodObject ), + new Among ( "alist", 222, 1, "r_A", methodObject ), + new Among ( "icalist", 224, 1, "r_A", methodObject ), + new Among ( "ialist", 224, 1, "r_A", methodObject ), + new Among ( "ionist", 222, 1, "r_A", methodObject ), + new Among ( "entist", 222, 1, "r_A", methodObject ), + new Among ( "y", -1, 1, "r_B", methodObject ), + new Among ( "acy", 229, 1, "r_A", methodObject ), + new Among ( "ancy", 229, 1, "r_B", methodObject ), + new Among ( "ency", 229, 1, "r_A", methodObject ), + new Among ( "ly", 229, 1, "r_B", methodObject ), + new Among ( "ealy", 233, 1, "r_Y", methodObject ), + new Among ( "ably", 233, 1, "r_A", methodObject ), + new Among ( "ibly", 233, 1, "r_A", methodObject ), + new Among ( "edly", 233, 1, "r_E", methodObject ), + new Among ( "iedly", 237, 1, "r_A", methodObject ), + new Among ( "ely", 233, 1, "r_E", methodObject ), + new Among ( "ately", 239, 1, "r_A", methodObject ), + new Among ( "ively", 239, 1, "r_A", methodObject ), + new Among ( "atively", 241, 1, "r_A", methodObject ), + new Among ( "ingly", 233, 1, "r_B", methodObject ), + new Among ( "atingly", 243, 1, "r_A", methodObject ), + new Among ( "ily", 233, 1, "r_A", methodObject ), + new Among ( "lily", 245, 1, "r_A", methodObject ), + new Among ( "arily", 245, 1, "r_A", methodObject ), + new Among ( "ally", 233, 1, "r_B", methodObject ), + new Among ( "ically", 248, 1, "r_A", methodObject ), + new Among ( "aically", 249, 1, "r_A", methodObject ), + new Among ( "allically", 249, 1, "r_C", methodObject ), + new Among ( "istically", 249, 1, "r_A", methodObject ), + new Among ( "alistically", 252, 1, "r_B", methodObject ), + new Among ( "oidally", 248, 1, "r_A", methodObject ), + new Among ( "ially", 248, 1, "r_A", methodObject ), + new Among ( "entially", 255, 1, "r_A", methodObject ), + new Among ( "ionally", 248, 1, "r_A", methodObject ), + new Among ( "ationally", 257, 1, "r_B", methodObject ), + new Among ( "izationally", 258, 1, "r_B", methodObject ), + new Among ( "entally", 248, 1, "r_A", methodObject ), + new Among ( "fully", 233, 1, "r_A", methodObject ), + new Among ( "efully", 261, 1, "r_A", methodObject ), + new Among ( "ifully", 261, 1, "r_A", methodObject ), + new Among ( "enly", 233, 1, "r_E", methodObject ), + new Among ( "arly", 233, 1, "r_K", methodObject ), + new Among ( "early", 265, 1, "r_Y", methodObject ), + new Among ( "lessly", 233, 1, "r_A", methodObject ), + new Among ( "ously", 233, 1, "r_A", methodObject ), + new Among ( "eously", 268, 1, "r_A", methodObject ), + new Among ( "iously", 268, 1, "r_A", methodObject ), + new Among ( "ently", 233, 1, "r_A", methodObject ), + new Among ( "ary", 229, 1, "r_F", methodObject ), + new Among ( "ery", 229, 1, "r_E", methodObject ), + new Among ( "icianry", 229, 1, "r_A", methodObject ), + new Among ( "atory", 229, 1, "r_A", methodObject ), + new Among ( "ity", 229, 1, "r_A", methodObject ), + new Among ( "acity", 276, 1, "r_A", methodObject ), + new Among ( "icity", 276, 1, "r_A", methodObject ), + new Among ( "eity", 276, 1, "r_A", methodObject ), + new Among ( "ality", 276, 1, "r_A", methodObject ), + new Among ( "icality", 280, 1, "r_A", methodObject ), + new Among ( "iality", 280, 1, "r_A", methodObject ), + new Among ( "antiality", 282, 1, "r_A", methodObject ), + new Among ( "entiality", 282, 1, "r_A", methodObject ), + new Among ( "ionality", 280, 1, "r_A", methodObject ), + new Among ( "elity", 276, 1, "r_A", methodObject ), + new Among ( "ability", 276, 1, "r_A", methodObject ), + new Among ( "izability", 287, 1, "r_A", methodObject ), + new Among ( "arizability", 288, 1, "r_A", methodObject ), + new Among ( "ibility", 276, 1, "r_A", methodObject ), + new Among ( "inity", 276, 1, "r_CC", methodObject ), + new Among ( "arity", 276, 1, "r_B", methodObject ), + new Among ( "ivity", 276, 1, "r_A", methodObject ) + }; + private final static Among a_2[] = { + new Among ( "bb", -1, -1, "", methodObject ), + new Among ( "dd", -1, -1, "", methodObject ), + new Among ( "gg", -1, -1, "", methodObject ), + new Among ( "ll", -1, -1, "", methodObject ), + new Among ( "mm", -1, -1, "", methodObject ), + new Among ( "nn", -1, -1, "", methodObject ), + new Among ( "pp", -1, -1, "", methodObject ), + new Among ( "rr", -1, -1, "", methodObject ), + new Among ( "ss", -1, -1, "", methodObject ), + new Among ( "tt", -1, -1, "", methodObject ) + }; - private void copy_from(LovinsStemmer other) { - super.copy_from(other); - } + private final static Among a_3[] = { + new Among ( "uad", -1, 18, "", methodObject ), + new Among ( "vad", -1, 19, "", methodObject ), + new Among ( "cid", -1, 20, "", methodObject ), + new Among ( "lid", -1, 21, "", methodObject ), + new Among ( "erid", -1, 22, "", methodObject ), + new Among ( "pand", -1, 23, "", methodObject ), + new Among ( "end", -1, 24, "", methodObject ), + new Among ( "ond", -1, 25, "", methodObject ), + new Among ( "lud", -1, 26, "", methodObject ), + new Among ( "rud", -1, 27, "", methodObject ), + new Among ( "ul", -1, 9, "", methodObject ), + new Among ( "her", -1, 28, "", methodObject ), + new Among ( "metr", -1, 7, "", methodObject ), + new Among ( "istr", -1, 6, "", methodObject ), + new Among ( "urs", -1, 5, "", methodObject ), + new Among ( "uct", -1, 2, "", methodObject ), + new Among ( "et", -1, 32, "", methodObject ), + new Among ( "mit", -1, 29, "", methodObject ), + new Among ( "ent", -1, 30, "", methodObject ), + new Among ( "umpt", -1, 3, "", methodObject ), + new Among ( "rpt", -1, 4, "", methodObject ), + new Among ( "ert", -1, 31, "", methodObject ), + new Among ( "yt", -1, 33, "", methodObject ), + new Among ( "iev", -1, 1, "", methodObject ), + new Among ( "olv", -1, 8, "", methodObject ), + new Among ( "ax", -1, 14, "", methodObject ), + new Among ( "ex", -1, 15, "", methodObject ), + new Among ( "bex", 26, 10, "", methodObject ), + new Among ( "dex", 26, 11, "", methodObject ), + new Among ( "pex", 26, 12, "", methodObject ), + new Among ( "tex", 26, 13, "", methodObject ), + new Among ( "ix", -1, 16, "", methodObject ), + new Among ( "lux", -1, 17, "", methodObject ), + new Among ( "yz", -1, 34, "", methodObject ) + }; - private boolean r_A() { - // (, line 21 - // hop, line 21 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; + + private void copy_from(LovinsStemmer other) { + super.copy_from(other); } - cursor = c; - } - return true; - } - private boolean r_B() { - // (, line 22 - // hop, line 22 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; + private boolean r_A() { + // (, line 21 + // hop, line 21 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + return true; } - cursor = c; - } - return true; - } - private boolean r_C() { - // (, line 23 - // hop, line 23 - { - int c = cursor - 4; - if (limit_backward > c || c > limit) - { - return false; + private boolean r_B() { + // (, line 22 + // hop, line 22 + { + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + return true; } - cursor = c; - } - return true; - } - private boolean r_D() { - // (, line 24 - // hop, line 24 - { - int c = cursor - 5; - if (limit_backward > c || c > limit) - { - return false; + private boolean r_C() { + // (, line 23 + // hop, line 23 + { + int c = cursor - 4; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + return true; } - cursor = c; - } - return true; - } - private boolean r_E() { + private boolean r_D() { + // (, line 24 + // hop, line 24 + { + int c = cursor - 5; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + return true; + } + + private boolean r_E() { int v_1; int v_2; - // (, line 25 - // test, line 25 - v_1 = limit - cursor; - // hop, line 25 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 25 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 25 - if (!(eq_s_b(1, "e"))) + // (, line 25 + // test, line 25 + v_1 = limit - cursor; + // hop, line 25 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - return true; - } + cursor = limit - v_1; + // not, line 25 + { + v_2 = limit - cursor; + lab0: do { + // literal, line 25 + if (!(eq_s_b(1, "e"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; + } + return true; + } - private boolean r_F() { + private boolean r_F() { int v_1; int v_2; - // (, line 26 - // test, line 26 - v_1 = limit - cursor; - // hop, line 26 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 26 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 26 - if (!(eq_s_b(1, "e"))) + // (, line 26 + // test, line 26 + v_1 = limit - cursor; + // hop, line 26 { - break lab0; + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - return true; - } + cursor = limit - v_1; + // not, line 26 + { + v_2 = limit - cursor; + lab0: do { + // literal, line 26 + if (!(eq_s_b(1, "e"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; + } + return true; + } - private boolean r_G() { + private boolean r_G() { int v_1; - // (, line 27 - // test, line 27 - v_1 = limit - cursor; - // hop, line 27 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; + // (, line 27 + // test, line 27 + v_1 = limit - cursor; + // hop, line 27 + { + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + cursor = limit - v_1; + // literal, line 27 + if (!(eq_s_b(1, "f"))) + { + return false; + } + return true; } - cursor = c; - } - cursor = limit - v_1; - // literal, line 27 - if (!(eq_s_b(1, "f"))) - { - return false; - } - return true; - } - private boolean r_H() { + private boolean r_H() { int v_1; int v_2; - // (, line 28 - // test, line 28 - v_1 = limit - cursor; - // hop, line 28 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 28 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 28 - if (!(eq_s_b(1, "t"))) + // (, line 28 + // test, line 28 + v_1 = limit - cursor; + // hop, line 28 { - break lab1; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 28 - if (!(eq_s_b(2, "ll"))) - { - return false; + cursor = limit - v_1; + // or, line 28 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 28 + if (!(eq_s_b(1, "t"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // literal, line 28 + if (!(eq_s_b(2, "ll"))) + { + return false; + } + } while (false); + return true; } - } while (false); - return true; - } - private boolean r_I() { + private boolean r_I() { int v_1; int v_2; int v_3; - // (, line 29 - // test, line 29 - v_1 = limit - cursor; - // hop, line 29 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 29 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 29 - if (!(eq_s_b(1, "o"))) + // (, line 29 + // test, line 29 + v_1 = limit - cursor; + // hop, line 29 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 29 - { - v_3 = limit - cursor; - lab1: do { - // literal, line 29 - if (!(eq_s_b(1, "e"))) + cursor = limit - v_1; + // not, line 29 { - break lab1; + v_2 = limit - cursor; + lab0: do { + // literal, line 29 + if (!(eq_s_b(1, "o"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_3; - } - return true; - } + // not, line 29 + { + v_3 = limit - cursor; + lab1: do { + // literal, line 29 + if (!(eq_s_b(1, "e"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_3; + } + return true; + } - private boolean r_J() { + private boolean r_J() { int v_1; int v_2; int v_3; - // (, line 30 - // test, line 30 - v_1 = limit - cursor; - // hop, line 30 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 30 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 30 - if (!(eq_s_b(1, "a"))) + // (, line 30 + // test, line 30 + v_1 = limit - cursor; + // hop, line 30 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 30 - { - v_3 = limit - cursor; - lab1: do { - // literal, line 30 - if (!(eq_s_b(1, "e"))) + cursor = limit - v_1; + // not, line 30 { - break lab1; + v_2 = limit - cursor; + lab0: do { + // literal, line 30 + if (!(eq_s_b(1, "a"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_3; - } - return true; - } + // not, line 30 + { + v_3 = limit - cursor; + lab1: do { + // literal, line 30 + if (!(eq_s_b(1, "e"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_3; + } + return true; + } - private boolean r_K() { + private boolean r_K() { int v_1; int v_2; - // (, line 31 - // test, line 31 - v_1 = limit - cursor; - // hop, line 31 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 31 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 31 - if (!(eq_s_b(1, "l"))) + // (, line 31 + // test, line 31 + v_1 = limit - cursor; + // hop, line 31 { - break lab1; + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - lab2: do { - // literal, line 31 - if (!(eq_s_b(1, "i"))) - { - break lab2; - } - break lab0; - } while (false); - cursor = limit - v_2; - // (, line 31 - // literal, line 31 - if (!(eq_s_b(1, "e"))) - { - return false; + cursor = limit - v_1; + // or, line 31 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 31 + if (!(eq_s_b(1, "l"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + lab2: do { + // literal, line 31 + if (!(eq_s_b(1, "i"))) + { + break lab2; + } + break lab0; + } while (false); + cursor = limit - v_2; + // (, line 31 + // literal, line 31 + if (!(eq_s_b(1, "e"))) + { + return false; + } + // next, line 31 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // literal, line 31 + if (!(eq_s_b(1, "u"))) + { + return false; + } + } while (false); + return true; } - // next, line 31 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // literal, line 31 - if (!(eq_s_b(1, "u"))) - { - return false; - } - } while (false); - return true; - } - private boolean r_L() { + private boolean r_L() { int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 32 - // test, line 32 - v_1 = limit - cursor; - // hop, line 32 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 32 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 32 - if (!(eq_s_b(1, "u"))) + // (, line 32 + // test, line 32 + v_1 = limit - cursor; + // hop, line 32 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 32 - { - v_3 = limit - cursor; - lab1: do { - // literal, line 32 - if (!(eq_s_b(1, "x"))) + cursor = limit - v_1; + // not, line 32 { - break lab1; + v_2 = limit - cursor; + lab0: do { + // literal, line 32 + if (!(eq_s_b(1, "u"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_3; - } - // not, line 32 - { - v_4 = limit - cursor; - lab2: do { - // (, line 32 - // literal, line 32 - if (!(eq_s_b(1, "s"))) + // not, line 32 { - break lab2; + v_3 = limit - cursor; + lab1: do { + // literal, line 32 + if (!(eq_s_b(1, "x"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_3; } // not, line 32 { - v_5 = limit - cursor; - lab3: do { + v_4 = limit - cursor; + lab2: do { + // (, line 32 // literal, line 32 - if (!(eq_s_b(1, "o"))) + if (!(eq_s_b(1, "s"))) { - break lab3; + break lab2; } - break lab2; + // not, line 32 + { + v_5 = limit - cursor; + lab3: do { + // literal, line 32 + if (!(eq_s_b(1, "o"))) + { + break lab3; + } + break lab2; + } while (false); + cursor = limit - v_5; + } + return false; } while (false); - cursor = limit - v_5; + cursor = limit - v_4; } - return false; - } while (false); - cursor = limit - v_4; - } - return true; - } + return true; + } - private boolean r_M() { + private boolean r_M() { int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 33 - // test, line 33 - v_1 = limit - cursor; - // hop, line 33 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 33 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 33 - if (!(eq_s_b(1, "a"))) + // (, line 33 + // test, line 33 + v_1 = limit - cursor; + // hop, line 33 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 33 - { - v_3 = limit - cursor; - lab1: do { - // literal, line 33 - if (!(eq_s_b(1, "c"))) + cursor = limit - v_1; + // not, line 33 { - break lab1; + v_2 = limit - cursor; + lab0: do { + // literal, line 33 + if (!(eq_s_b(1, "a"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_3; - } - // not, line 33 - { - v_4 = limit - cursor; - lab2: do { - // literal, line 33 - if (!(eq_s_b(1, "e"))) + // not, line 33 { - break lab2; + v_3 = limit - cursor; + lab1: do { + // literal, line 33 + if (!(eq_s_b(1, "c"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_3; } - return false; - } while (false); - cursor = limit - v_4; - } - // not, line 33 - { - v_5 = limit - cursor; - lab3: do { - // literal, line 33 - if (!(eq_s_b(1, "m"))) + // not, line 33 { - break lab3; - } - return false; - } while (false); - cursor = limit - v_5; - } - return true; - } - - private boolean r_N() { - int v_1; - int v_2; - int v_3; - // (, line 34 - // test, line 34 - v_1 = limit - cursor; - // hop, line 34 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // (, line 34 - // hop, line 34 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - // or, line 34 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // not, line 34 - { - v_3 = limit - cursor; + v_4 = limit - cursor; lab2: do { - // literal, line 34 - if (!(eq_s_b(1, "s"))) + // literal, line 33 + if (!(eq_s_b(1, "e"))) { break lab2; } - break lab1; + return false; } while (false); - cursor = limit - v_3; + cursor = limit - v_4; } - break lab0; - } while (false); - cursor = limit - v_2; - // hop, line 34 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) + // not, line 33 { - return false; + v_5 = limit - cursor; + lab3: do { + // literal, line 33 + if (!(eq_s_b(1, "m"))) + { + break lab3; + } + return false; + } while (false); + cursor = limit - v_5; } - cursor = c; + return true; } - } while (false); - return true; - } - private boolean r_O() { + private boolean r_N() { int v_1; int v_2; - // (, line 35 - // test, line 35 - v_1 = limit - cursor; - // hop, line 35 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 35 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 35 - if (!(eq_s_b(1, "l"))) + int v_3; + // (, line 34 + // test, line 34 + v_1 = limit - cursor; + // hop, line 34 { - break lab1; + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 35 - if (!(eq_s_b(1, "i"))) - { - return false; + cursor = limit - v_1; + // (, line 34 + // hop, line 34 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + // or, line 34 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // not, line 34 + { + v_3 = limit - cursor; + lab2: do { + // literal, line 34 + if (!(eq_s_b(1, "s"))) + { + break lab2; + } + break lab1; + } while (false); + cursor = limit - v_3; + } + break lab0; + } while (false); + cursor = limit - v_2; + // hop, line 34 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + } while (false); + return true; } - } while (false); - return true; - } - private boolean r_P() { + private boolean r_O() { int v_1; int v_2; - // (, line 36 - // test, line 36 - v_1 = limit - cursor; - // hop, line 36 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; + // (, line 35 + // test, line 35 + v_1 = limit - cursor; + // hop, line 35 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + cursor = limit - v_1; + // or, line 35 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 35 + if (!(eq_s_b(1, "l"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // literal, line 35 + if (!(eq_s_b(1, "i"))) + { + return false; + } + } while (false); + return true; } - cursor = c; - } - cursor = limit - v_1; - // not, line 36 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 36 - if (!(eq_s_b(1, "c"))) + + private boolean r_P() { + int v_1; + int v_2; + // (, line 36 + // test, line 36 + v_1 = limit - cursor; + // hop, line 36 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - return true; - } + cursor = limit - v_1; + // not, line 36 + { + v_2 = limit - cursor; + lab0: do { + // literal, line 36 + if (!(eq_s_b(1, "c"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; + } + return true; + } - private boolean r_Q() { + private boolean r_Q() { int v_1; int v_2; int v_3; int v_4; - // (, line 37 - // test, line 37 - v_1 = limit - cursor; - // hop, line 37 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // test, line 37 - v_2 = limit - cursor; - // hop, line 37 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_2; - // not, line 37 - { - v_3 = limit - cursor; - lab0: do { - // literal, line 37 - if (!(eq_s_b(1, "l"))) + // (, line 37 + // test, line 37 + v_1 = limit - cursor; + // hop, line 37 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_3; - } - // not, line 37 - { - v_4 = limit - cursor; - lab1: do { - // literal, line 37 - if (!(eq_s_b(1, "n"))) + cursor = limit - v_1; + // test, line 37 + v_2 = limit - cursor; + // hop, line 37 { - break lab1; + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_4; - } - return true; - } + cursor = limit - v_2; + // not, line 37 + { + v_3 = limit - cursor; + lab0: do { + // literal, line 37 + if (!(eq_s_b(1, "l"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_3; + } + // not, line 37 + { + v_4 = limit - cursor; + lab1: do { + // literal, line 37 + if (!(eq_s_b(1, "n"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_4; + } + return true; + } - private boolean r_R() { + private boolean r_R() { int v_1; int v_2; - // (, line 38 - // test, line 38 - v_1 = limit - cursor; - // hop, line 38 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 38 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 38 - if (!(eq_s_b(1, "n"))) + // (, line 38 + // test, line 38 + v_1 = limit - cursor; + // hop, line 38 { - break lab1; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 38 - if (!(eq_s_b(1, "r"))) - { - return false; + cursor = limit - v_1; + // or, line 38 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 38 + if (!(eq_s_b(1, "n"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // literal, line 38 + if (!(eq_s_b(1, "r"))) + { + return false; + } + } while (false); + return true; } - } while (false); - return true; - } - private boolean r_S() { + private boolean r_S() { int v_1; int v_2; int v_3; - // (, line 39 - // test, line 39 - v_1 = limit - cursor; - // hop, line 39 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 39 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 39 - if (!(eq_s_b(2, "dr"))) + // (, line 39 + // test, line 39 + v_1 = limit - cursor; + // hop, line 39 { - break lab1; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - // (, line 39 - // literal, line 39 - if (!(eq_s_b(1, "t"))) - { - return false; - } - // not, line 39 - { - v_3 = limit - cursor; - lab2: do { + cursor = limit - v_1; + // or, line 39 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 39 + if (!(eq_s_b(2, "dr"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // (, line 39 // literal, line 39 if (!(eq_s_b(1, "t"))) { - break lab2; + return false; } - return false; + // not, line 39 + { + v_3 = limit - cursor; + lab2: do { + // literal, line 39 + if (!(eq_s_b(1, "t"))) + { + break lab2; + } + return false; + } while (false); + cursor = limit - v_3; + } } while (false); - cursor = limit - v_3; + return true; } - } while (false); - return true; - } - private boolean r_T() { + private boolean r_T() { int v_1; int v_2; int v_3; - // (, line 40 - // test, line 40 - v_1 = limit - cursor; - // hop, line 40 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 40 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 40 - if (!(eq_s_b(1, "s"))) + // (, line 40 + // test, line 40 + v_1 = limit - cursor; + // hop, line 40 { - break lab1; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - // (, line 40 - // literal, line 40 - if (!(eq_s_b(1, "t"))) - { - return false; - } - // not, line 40 - { - v_3 = limit - cursor; - lab2: do { + cursor = limit - v_1; + // or, line 40 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 40 + if (!(eq_s_b(1, "s"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + // (, line 40 // literal, line 40 - if (!(eq_s_b(1, "o"))) + if (!(eq_s_b(1, "t"))) { - break lab2; + return false; } - return false; + // not, line 40 + { + v_3 = limit - cursor; + lab2: do { + // literal, line 40 + if (!(eq_s_b(1, "o"))) + { + break lab2; + } + return false; + } while (false); + cursor = limit - v_3; + } } while (false); - cursor = limit - v_3; + return true; } - } while (false); - return true; - } - private boolean r_U() { + private boolean r_U() { int v_1; int v_2; - // (, line 41 - // test, line 41 - v_1 = limit - cursor; - // hop, line 41 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // or, line 41 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 41 - if (!(eq_s_b(1, "l"))) + // (, line 41 + // test, line 41 + v_1 = limit - cursor; + // hop, line 41 { - break lab1; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - lab2: do { - // literal, line 41 - if (!(eq_s_b(1, "m"))) + cursor = limit - v_1; + // or, line 41 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 41 + if (!(eq_s_b(1, "l"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + lab2: do { + // literal, line 41 + if (!(eq_s_b(1, "m"))) + { + break lab2; + } + break lab0; + } while (false); + cursor = limit - v_2; + lab3: do { + // literal, line 41 + if (!(eq_s_b(1, "n"))) + { + break lab3; + } + break lab0; + } while (false); + cursor = limit - v_2; + // literal, line 41 + if (!(eq_s_b(1, "r"))) + { + return false; + } + } while (false); + return true; + } + + private boolean r_V() { + int v_1; + // (, line 42 + // test, line 42 + v_1 = limit - cursor; + // hop, line 42 { - break lab2; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - lab3: do { - // literal, line 41 - if (!(eq_s_b(1, "n"))) + cursor = limit - v_1; + // literal, line 42 + if (!(eq_s_b(1, "c"))) { - break lab3; + return false; } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 41 - if (!(eq_s_b(1, "r"))) - { - return false; + return true; } - } while (false); - return true; - } - private boolean r_V() { + private boolean r_W() { int v_1; - // (, line 42 - // test, line 42 - v_1 = limit - cursor; - // hop, line 42 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // literal, line 42 - if (!(eq_s_b(1, "c"))) - { - return false; - } - return true; - } - - private boolean r_W() { - int v_1; int v_2; int v_3; - // (, line 43 - // test, line 43 - v_1 = limit - cursor; - // hop, line 43 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 43 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 43 - if (!(eq_s_b(1, "s"))) + // (, line 43 + // test, line 43 + v_1 = limit - cursor; + // hop, line 43 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 43 - { - v_3 = limit - cursor; - lab1: do { - // literal, line 43 - if (!(eq_s_b(1, "u"))) + cursor = limit - v_1; + // not, line 43 { - break lab1; + v_2 = limit - cursor; + lab0: do { + // literal, line 43 + if (!(eq_s_b(1, "s"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_3; - } - return true; - } + // not, line 43 + { + v_3 = limit - cursor; + lab1: do { + // literal, line 43 + if (!(eq_s_b(1, "u"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_3; + } + return true; + } - private boolean r_X() { + private boolean r_X() { int v_1; int v_2; - // (, line 44 - // test, line 44 - v_1 = limit - cursor; - // hop, line 44 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; + // (, line 44 + // test, line 44 + v_1 = limit - cursor; + // hop, line 44 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + cursor = limit - v_1; + // or, line 44 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 44 + if (!(eq_s_b(1, "l"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_2; + lab2: do { + // literal, line 44 + if (!(eq_s_b(1, "i"))) + { + break lab2; + } + break lab0; + } while (false); + cursor = limit - v_2; + // (, line 44 + // literal, line 44 + if (!(eq_s_b(1, "e"))) + { + return false; + } + // next, line 44 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // literal, line 44 + if (!(eq_s_b(1, "u"))) + { + return false; + } + } while (false); + return true; } - cursor = c; - } - cursor = limit - v_1; - // or, line 44 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 44 - if (!(eq_s_b(1, "l"))) + + private boolean r_Y() { + int v_1; + // (, line 45 + // test, line 45 + v_1 = limit - cursor; + // hop, line 45 { - break lab1; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - break lab0; - } while (false); - cursor = limit - v_2; - lab2: do { - // literal, line 44 - if (!(eq_s_b(1, "i"))) + cursor = limit - v_1; + // literal, line 45 + if (!(eq_s_b(2, "in"))) { - break lab2; + return false; } - break lab0; - } while (false); - cursor = limit - v_2; - // (, line 44 - // literal, line 44 - if (!(eq_s_b(1, "e"))) - { - return false; + return true; } - // next, line 44 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // literal, line 44 - if (!(eq_s_b(1, "u"))) - { - return false; - } - } while (false); - return true; - } - private boolean r_Y() { + private boolean r_Z() { int v_1; - // (, line 45 - // test, line 45 - v_1 = limit - cursor; - // hop, line 45 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // literal, line 45 - if (!(eq_s_b(2, "in"))) - { - return false; - } - return true; - } - - private boolean r_Z() { - int v_1; int v_2; - // (, line 46 - // test, line 46 - v_1 = limit - cursor; - // hop, line 46 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 46 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 46 - if (!(eq_s_b(1, "f"))) + // (, line 46 + // test, line 46 + v_1 = limit - cursor; + // hop, line 46 { - break lab0; + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - return true; - } + cursor = limit - v_1; + // not, line 46 + { + v_2 = limit - cursor; + lab0: do { + // literal, line 46 + if (!(eq_s_b(1, "f"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; + } + return true; + } - private boolean r_AA() { + private boolean r_AA() { int v_1; - // (, line 47 - // test, line 47 - v_1 = limit - cursor; - // hop, line 47 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; + // (, line 47 + // test, line 47 + v_1 = limit - cursor; + // hop, line 47 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + cursor = limit - v_1; + // among, line 47 + if (find_among_b(a_0, 9) == 0) + { + return false; + } + return true; } - cursor = c; - } - cursor = limit - v_1; - // among, line 47 - if (find_among_b(a_0, 9) == 0) - { - return false; - } - return true; - } - private boolean r_BB() { + private boolean r_BB() { int v_1; int v_2; int v_3; - // (, line 49 - // test, line 49 - v_1 = limit - cursor; - // hop, line 49 - { - int c = cursor - 3; - if (limit_backward > c || c > limit) - { - return false; - } - cursor = c; - } - cursor = limit - v_1; - // not, line 49 - { - v_2 = limit - cursor; - lab0: do { - // literal, line 49 - if (!(eq_s_b(3, "met"))) + // (, line 49 + // test, line 49 + v_1 = limit - cursor; + // hop, line 49 { - break lab0; + int c = cursor - 3; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 49 - { - v_3 = limit - cursor; - lab1: do { - // literal, line 49 - if (!(eq_s_b(4, "ryst"))) + cursor = limit - v_1; + // not, line 49 { - break lab1; + v_2 = limit - cursor; + lab0: do { + // literal, line 49 + if (!(eq_s_b(3, "met"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_3; - } - return true; - } + // not, line 49 + { + v_3 = limit - cursor; + lab1: do { + // literal, line 49 + if (!(eq_s_b(4, "ryst"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_3; + } + return true; + } - private boolean r_CC() { + private boolean r_CC() { int v_1; - // (, line 50 - // test, line 50 - v_1 = limit - cursor; - // hop, line 50 - { - int c = cursor - 2; - if (limit_backward > c || c > limit) - { - return false; + // (, line 50 + // test, line 50 + v_1 = limit - cursor; + // hop, line 50 + { + int c = cursor - 2; + if (limit_backward > c || c > limit) + { + return false; + } + cursor = c; + } + cursor = limit - v_1; + // literal, line 50 + if (!(eq_s_b(1, "l"))) + { + return false; + } + return true; } - cursor = c; - } - cursor = limit - v_1; - // literal, line 50 - if (!(eq_s_b(1, "l"))) - { - return false; - } - return true; - } - private boolean r_endings() { + private boolean r_endings() { int among_var; - // (, line 55 - // [, line 56 - ket = cursor; - // substring, line 56 - among_var = find_among_b(a_1, 294); - if (among_var == 0) - { - return false; - } - // ], line 56 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 145 - // delete, line 145 - slice_del(); - break; - } - return true; - } + // (, line 55 + // [, line 56 + ket = cursor; + // substring, line 56 + among_var = find_among_b(a_1, 294); + if (among_var == 0) + { + return false; + } + // ], line 56 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 145 + // delete, line 145 + slice_del(); + break; + } + return true; + } - private boolean r_undouble() { + private boolean r_undouble() { int v_1; - // (, line 151 - // test, line 152 - v_1 = limit - cursor; - // substring, line 152 - if (find_among_b(a_2, 10) == 0) - { - return false; - } - cursor = limit - v_1; - // [, line 154 - ket = cursor; - // next, line 154 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 154 - bra = cursor; - // delete, line 154 - slice_del(); - return true; - } + // (, line 151 + // test, line 152 + v_1 = limit - cursor; + // substring, line 152 + if (find_among_b(a_2, 10) == 0) + { + return false; + } + cursor = limit - v_1; + // [, line 154 + ket = cursor; + // next, line 154 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // ], line 154 + bra = cursor; + // delete, line 154 + slice_del(); + return true; + } - private boolean r_respell() { + private boolean r_respell() { int among_var; int v_1; int v_2; @@ -1571,338 +1578,351 @@ int v_6; int v_7; int v_8; - // (, line 159 - // [, line 160 - ket = cursor; - // substring, line 160 - among_var = find_among_b(a_3, 34); - if (among_var == 0) - { - return false; - } - // ], line 160 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 161 - // <-, line 161 - slice_from("ief"); - break; - case 2: - // (, line 162 - // <-, line 162 - slice_from("uc"); - break; - case 3: - // (, line 163 - // <-, line 163 - slice_from("um"); - break; - case 4: - // (, line 164 - // <-, line 164 - slice_from("rb"); - break; - case 5: - // (, line 165 - // <-, line 165 - slice_from("ur"); - break; - case 6: - // (, line 166 - // <-, line 166 - slice_from("ister"); - break; - case 7: - // (, line 167 - // <-, line 167 - slice_from("meter"); - break; - case 8: - // (, line 168 - // <-, line 168 - slice_from("olut"); - break; - case 9: - // (, line 169 - // not, line 169 + // (, line 159 + // [, line 160 + ket = cursor; + // substring, line 160 + among_var = find_among_b(a_3, 34); + if (among_var == 0) { - v_1 = limit - cursor; - lab0: do { - // literal, line 169 - if (!(eq_s_b(1, "a"))) + return false; + } + // ], line 160 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 161 + // <-, line 161 + slice_from("ief"); + break; + case 2: + // (, line 162 + // <-, line 162 + slice_from("uc"); + break; + case 3: + // (, line 163 + // <-, line 163 + slice_from("um"); + break; + case 4: + // (, line 164 + // <-, line 164 + slice_from("rb"); + break; + case 5: + // (, line 165 + // <-, line 165 + slice_from("ur"); + break; + case 6: + // (, line 166 + // <-, line 166 + slice_from("ister"); + break; + case 7: + // (, line 167 + // <-, line 167 + slice_from("meter"); + break; + case 8: + // (, line 168 + // <-, line 168 + slice_from("olut"); + break; + case 9: + // (, line 169 + // not, line 169 { - break lab0; + v_1 = limit - cursor; + lab0: do { + // literal, line 169 + if (!(eq_s_b(1, "a"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_1; } - return false; - } while (false); - cursor = limit - v_1; - } - // not, line 169 - { - v_2 = limit - cursor; - lab1: do { - // literal, line 169 - if (!(eq_s_b(1, "i"))) + // not, line 169 { - break lab1; + v_2 = limit - cursor; + lab1: do { + // literal, line 169 + if (!(eq_s_b(1, "i"))) + { + break lab1; + } + return false; + } while (false); + cursor = limit - v_2; } - return false; - } while (false); - cursor = limit - v_2; - } - // not, line 169 - { - v_3 = limit - cursor; - lab2: do { - // literal, line 169 - if (!(eq_s_b(1, "o"))) + // not, line 169 { - break lab2; + v_3 = limit - cursor; + lab2: do { + // literal, line 169 + if (!(eq_s_b(1, "o"))) + { + break lab2; + } + return false; + } while (false); + cursor = limit - v_3; } - return false; - } while (false); - cursor = limit - v_3; - } - // <-, line 169 - slice_from("l"); - break; - case 10: - // (, line 170 - // <-, line 170 - slice_from("bic"); - break; - case 11: - // (, line 171 - // <-, line 171 - slice_from("dic"); - break; - case 12: - // (, line 172 - // <-, line 172 - slice_from("pic"); - break; - case 13: - // (, line 173 - // <-, line 173 - slice_from("tic"); - break; - case 14: - // (, line 174 - // <-, line 174 - slice_from("ac"); - break; - case 15: - // (, line 175 - // <-, line 175 - slice_from("ec"); - break; - case 16: - // (, line 176 - // <-, line 176 - slice_from("ic"); - break; - case 17: - // (, line 177 - // <-, line 177 - slice_from("luc"); - break; - case 18: - // (, line 178 - // <-, line 178 - slice_from("uas"); - break; - case 19: - // (, line 179 - // <-, line 179 - slice_from("vas"); - break; - case 20: - // (, line 180 - // <-, line 180 - slice_from("cis"); - break; - case 21: - // (, line 181 - // <-, line 181 - slice_from("lis"); - break; - case 22: - // (, line 182 - // <-, line 182 - slice_from("eris"); - break; - case 23: - // (, line 183 - // <-, line 183 - slice_from("pans"); - break; - case 24: - // (, line 184 - // not, line 184 - { - v_4 = limit - cursor; - lab3: do { - // literal, line 184 - if (!(eq_s_b(1, "s"))) + // <-, line 169 + slice_from("l"); + break; + case 10: + // (, line 170 + // <-, line 170 + slice_from("bic"); + break; + case 11: + // (, line 171 + // <-, line 171 + slice_from("dic"); + break; + case 12: + // (, line 172 + // <-, line 172 + slice_from("pic"); + break; + case 13: + // (, line 173 + // <-, line 173 + slice_from("tic"); + break; + case 14: + // (, line 174 + // <-, line 174 + slice_from("ac"); + break; + case 15: + // (, line 175 + // <-, line 175 + slice_from("ec"); + break; + case 16: + // (, line 176 + // <-, line 176 + slice_from("ic"); + break; + case 17: + // (, line 177 + // <-, line 177 + slice_from("luc"); + break; + case 18: + // (, line 178 + // <-, line 178 + slice_from("uas"); + break; + case 19: + // (, line 179 + // <-, line 179 + slice_from("vas"); + break; + case 20: + // (, line 180 + // <-, line 180 + slice_from("cis"); + break; + case 21: + // (, line 181 + // <-, line 181 + slice_from("lis"); + break; + case 22: + // (, line 182 + // <-, line 182 + slice_from("eris"); + break; + case 23: + // (, line 183 + // <-, line 183 + slice_from("pans"); + break; + case 24: + // (, line 184 + // not, line 184 { - break lab3; + v_4 = limit - cursor; + lab3: do { + // literal, line 184 + if (!(eq_s_b(1, "s"))) + { + break lab3; + } + return false; + } while (false); + cursor = limit - v_4; } - return false; - } while (false); - cursor = limit - v_4; - } - // <-, line 184 - slice_from("ens"); - break; - case 25: - // (, line 185 - // <-, line 185 - slice_from("ons"); - break; - case 26: - // (, line 186 - // <-, line 186 - slice_from("lus"); - break; - case 27: - // (, line 187 - // <-, line 187 - slice_from("rus"); - break; - case 28: - // (, line 188 - // not, line 188 - { - v_5 = limit - cursor; - lab4: do { - // literal, line 188 - if (!(eq_s_b(1, "p"))) + // <-, line 184 + slice_from("ens"); + break; + case 25: + // (, line 185 + // <-, line 185 + slice_from("ons"); + break; + case 26: + // (, line 186 + // <-, line 186 + slice_from("lus"); + break; + case 27: + // (, line 187 + // <-, line 187 + slice_from("rus"); + break; + case 28: + // (, line 188 + // not, line 188 { - break lab4; + v_5 = limit - cursor; + lab4: do { + // literal, line 188 + if (!(eq_s_b(1, "p"))) + { + break lab4; + } + return false; + } while (false); + cursor = limit - v_5; } - return false; - } while (false); - cursor = limit - v_5; - } - // not, line 188 - { - v_6 = limit - cursor; - lab5: do { - // literal, line 188 - if (!(eq_s_b(1, "t"))) + // not, line 188 { - break lab5; + v_6 = limit - cursor; + lab5: do { + // literal, line 188 + if (!(eq_s_b(1, "t"))) + { + break lab5; + } + return false; + } while (false); + cursor = limit - v_6; } - return false; - } while (false); - cursor = limit - v_6; - } - // <-, line 188 - slice_from("hes"); - break; - case 29: - // (, line 189 - // <-, line 189 - slice_from("mis"); - break; - case 30: - // (, line 190 - // not, line 190 - { - v_7 = limit - cursor; - lab6: do { - // literal, line 190 - if (!(eq_s_b(1, "m"))) + // <-, line 188 + slice_from("hes"); + break; + case 29: + // (, line 189 + // <-, line 189 + slice_from("mis"); + break; + case 30: + // (, line 190 + // not, line 190 { - break lab6; + v_7 = limit - cursor; + lab6: do { + // literal, line 190 + if (!(eq_s_b(1, "m"))) + { + break lab6; + } + return false; + } while (false); + cursor = limit - v_7; } - return false; - } while (false); - cursor = limit - v_7; - } - // <-, line 190 - slice_from("ens"); - break; - case 31: - // (, line 192 - // <-, line 192 - slice_from("ers"); - break; - case 32: - // (, line 193 - // not, line 193 - { - v_8 = limit - cursor; - lab7: do { - // literal, line 193 - if (!(eq_s_b(1, "n"))) + // <-, line 190 + slice_from("ens"); + break; + case 31: + // (, line 192 + // <-, line 192 + slice_from("ers"); + break; + case 32: + // (, line 193 + // not, line 193 { - break lab7; + v_8 = limit - cursor; + lab7: do { + // literal, line 193 + if (!(eq_s_b(1, "n"))) + { + break lab7; + } + return false; + } while (false); + cursor = limit - v_8; } - return false; - } while (false); - cursor = limit - v_8; + // <-, line 193 + slice_from("es"); + break; + case 33: + // (, line 194 + // <-, line 194 + slice_from("ys"); + break; + case 34: + // (, line 195 + // <-, line 195 + slice_from("ys"); + break; } - // <-, line 193 - slice_from("es"); - break; - case 33: - // (, line 194 - // <-, line 194 - slice_from("ys"); - break; - case 34: - // (, line 195 - // <-, line 195 - slice_from("ys"); - break; - } - return true; - } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; - // (, line 200 - // backwards, line 202 - limit_backward = cursor; cursor = limit; - // (, line 202 - // do, line 203 - v_1 = limit - cursor; - lab0: do { - // call endings, line 203 - if (!r_endings()) - { - break lab0; + // (, line 200 + // backwards, line 202 + limit_backward = cursor; cursor = limit; + // (, line 202 + // do, line 203 + v_1 = limit - cursor; + lab0: do { + // call endings, line 203 + if (!r_endings()) + { + break lab0; + } + } while (false); + cursor = limit - v_1; + // do, line 204 + v_2 = limit - cursor; + lab1: do { + // call undouble, line 204 + if (!r_undouble()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 205 + v_3 = limit - cursor; + lab2: do { + // call respell, line 205 + if (!r_respell()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + cursor = limit_backward; return true; } - } while (false); - cursor = limit - v_1; - // do, line 204 - v_2 = limit - cursor; - lab1: do { - // call undouble, line 204 - if (!r_undouble()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 205 - v_3 = limit - cursor; - lab2: do { - // call respell, line 205 - if (!r_respell()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof LovinsStemmer; } + @Override + public int hashCode() { + return LovinsStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/NorwegianStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/NorwegianStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/NorwegianStemmer.java 17 Aug 2012 14:55:08 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/NorwegianStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,358 +1,378 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class NorwegianStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "a", -1, 1, "", this), - new Among ( "e", -1, 1, "", this), - new Among ( "ede", 1, 1, "", this), - new Among ( "ande", 1, 1, "", this), - new Among ( "ende", 1, 1, "", this), - new Among ( "ane", 1, 1, "", this), - new Among ( "ene", 1, 1, "", this), - new Among ( "hetene", 6, 1, "", this), - new Among ( "erte", 1, 3, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "heten", 9, 1, "", this), - new Among ( "ar", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "heter", 12, 1, "", this), - new Among ( "s", -1, 2, "", this), - new Among ( "as", 14, 1, "", this), - new Among ( "es", 14, 1, "", this), - new Among ( "edes", 16, 1, "", this), - new Among ( "endes", 16, 1, "", this), - new Among ( "enes", 16, 1, "", this), - new Among ( "hetenes", 19, 1, "", this), - new Among ( "ens", 14, 1, "", this), - new Among ( "hetens", 21, 1, "", this), - new Among ( "ers", 14, 1, "", this), - new Among ( "ets", 14, 1, "", this), - new Among ( "et", -1, 1, "", this), - new Among ( "het", 25, 1, "", this), - new Among ( "ert", -1, 3, "", this), - new Among ( "ast", -1, 1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "dt", -1, -1, "", this), - new Among ( "vt", -1, -1, "", this) - }; + private final static NorwegianStemmer methodObject = new NorwegianStemmer (); - private Among a_2[] = { - new Among ( "leg", -1, 1, "", this), - new Among ( "eleg", 0, 1, "", this), - new Among ( "ig", -1, 1, "", this), - new Among ( "eig", 2, 1, "", this), - new Among ( "lig", 2, 1, "", this), - new Among ( "elig", 4, 1, "", this), - new Among ( "els", -1, 1, "", this), - new Among ( "lov", -1, 1, "", this), - new Among ( "elov", 7, 1, "", this), - new Among ( "slov", 7, 1, "", this), - new Among ( "hetslov", 9, 1, "", this) - }; + private final static Among a_0[] = { + new Among ( "a", -1, 1, "", methodObject ), + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "ede", 1, 1, "", methodObject ), + new Among ( "ande", 1, 1, "", methodObject ), + new Among ( "ende", 1, 1, "", methodObject ), + new Among ( "ane", 1, 1, "", methodObject ), + new Among ( "ene", 1, 1, "", methodObject ), + new Among ( "hetene", 6, 1, "", methodObject ), + new Among ( "erte", 1, 3, "", methodObject ), + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "heten", 9, 1, "", methodObject ), + new Among ( "ar", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "heter", 12, 1, "", methodObject ), + new Among ( "s", -1, 2, "", methodObject ), + new Among ( "as", 14, 1, "", methodObject ), + new Among ( "es", 14, 1, "", methodObject ), + new Among ( "edes", 16, 1, "", methodObject ), + new Among ( "endes", 16, 1, "", methodObject ), + new Among ( "enes", 16, 1, "", methodObject ), + new Among ( "hetenes", 19, 1, "", methodObject ), + new Among ( "ens", 14, 1, "", methodObject ), + new Among ( "hetens", 21, 1, "", methodObject ), + new Among ( "ers", 14, 1, "", methodObject ), + new Among ( "ets", 14, 1, "", methodObject ), + new Among ( "et", -1, 1, "", methodObject ), + new Among ( "het", 25, 1, "", methodObject ), + new Among ( "ert", -1, 3, "", methodObject ), + new Among ( "ast", -1, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + private final static Among a_1[] = { + new Among ( "dt", -1, -1, "", methodObject ), + new Among ( "vt", -1, -1, "", methodObject ) + }; - private static final char g_s_ending[] = {119, 125, 149, 1 }; + private final static Among a_2[] = { + new Among ( "leg", -1, 1, "", methodObject ), + new Among ( "eleg", 0, 1, "", methodObject ), + new Among ( "ig", -1, 1, "", methodObject ), + new Among ( "eig", 2, 1, "", methodObject ), + new Among ( "lig", 2, 1, "", methodObject ), + new Among ( "elig", 4, 1, "", methodObject ), + new Among ( "els", -1, 1, "", methodObject ), + new Among ( "lov", -1, 1, "", methodObject ), + new Among ( "elov", 7, 1, "", methodObject ), + new Among ( "slov", 7, 1, "", methodObject ), + new Among ( "hetslov", 9, 1, "", methodObject ) + }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 48, 0, 128 }; + + private static final char g_s_ending[] = {119, 125, 149, 1 }; + private int I_x; private int I_p1; - private void copy_from(NorwegianStemmer other) { - I_x = other.I_x; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(NorwegianStemmer other) { + I_x = other.I_x; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; - // (, line 26 - I_p1 = limit; - // test, line 30 - v_1 = cursor; - // (, line 30 - // hop, line 30 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - // setmark x, line 30 - I_x = cursor; - cursor = v_1; - // goto, line 31 - golab0: while(true) - { - v_2 = cursor; - lab1: do { - if (!(in_grouping(g_v, 97, 248))) + // (, line 26 + I_p1 = limit; + // test, line 30 + v_1 = cursor; + // (, line 30 + // hop, line 30 { - break lab1; + int c = cursor + 3; + if (0 > c || c > limit) + { + return false; + } + cursor = c; } - cursor = v_2; - break golab0; - } while (false); - cursor = v_2; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 31 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 248))) + // setmark x, line 30 + I_x = cursor; + cursor = v_1; + // goto, line 31 + golab0: while(true) { - break lab3; - } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; - } - cursor++; - } - // setmark p1, line 31 - I_p1 = cursor; - // try, line 32 - lab4: do { - // (, line 32 - if (!(I_p1 < I_x)) - { - break lab4; - } - I_p1 = I_x; - } while (false); - return true; - } - - private boolean r_main_suffix() { - int among_var; - int v_1; - int v_2; - int v_3; - // (, line 37 - // setlimit, line 38 - v_1 = limit - cursor; - // tomark, line 38 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 38 - // [, line 38 - ket = cursor; - // substring, line 38 - among_var = find_among_b(a_0, 29); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 38 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 44 - // delete, line 44 - slice_del(); - break; - case 2: - // (, line 46 - // or, line 46 - lab0: do { - v_3 = limit - cursor; + v_2 = cursor; lab1: do { - if (!(in_grouping_b(g_s_ending, 98, 122))) + if (!(in_grouping(g_v, 97, 248))) { break lab1; } - break lab0; + cursor = v_2; + break golab0; } while (false); - cursor = limit - v_3; - // (, line 46 - // literal, line 46 - if (!(eq_s_b(1, "k"))) + cursor = v_2; + if (cursor >= limit) { return false; } - if (!(out_grouping_b(g_v, 97, 248))) + cursor++; + } + // gopast, line 31 + golab2: while(true) + { + lab3: do { + if (!(out_grouping(g_v, 97, 248))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) { return false; } + cursor++; + } + // setmark p1, line 31 + I_p1 = cursor; + // try, line 32 + lab4: do { + // (, line 32 + if (!(I_p1 < I_x)) + { + break lab4; + } + I_p1 = I_x; } while (false); - // delete, line 46 - slice_del(); - break; - case 3: - // (, line 48 - // <-, line 48 - slice_from("er"); - break; - } - return true; - } + return true; + } - private boolean r_consonant_pair() { + private boolean r_main_suffix() { + int among_var; int v_1; int v_2; int v_3; - // (, line 52 - // test, line 53 - v_1 = limit - cursor; - // (, line 53 - // setlimit, line 54 - v_2 = limit - cursor; - // tomark, line 54 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_3 = limit_backward; - limit_backward = cursor; - cursor = limit - v_2; - // (, line 54 - // [, line 54 - ket = cursor; - // substring, line 54 - if (find_among_b(a_1, 2) == 0) - { - limit_backward = v_3; - return false; - } - // ], line 54 - bra = cursor; - limit_backward = v_3; - cursor = limit - v_1; - // next, line 59 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 59 - bra = cursor; - // delete, line 59 - slice_del(); - return true; - } + // (, line 37 + // setlimit, line 38 + v_1 = limit - cursor; + // tomark, line 38 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 38 + // [, line 38 + ket = cursor; + // substring, line 38 + among_var = find_among_b(a_0, 29); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 38 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 44 + // delete, line 44 + slice_del(); + break; + case 2: + // (, line 46 + // or, line 46 + lab0: do { + v_3 = limit - cursor; + lab1: do { + if (!(in_grouping_b(g_s_ending, 98, 122))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_3; + // (, line 46 + // literal, line 46 + if (!(eq_s_b(1, "k"))) + { + return false; + } + if (!(out_grouping_b(g_v, 97, 248))) + { + return false; + } + } while (false); + // delete, line 46 + slice_del(); + break; + case 3: + // (, line 48 + // <-, line 48 + slice_from("er"); + break; + } + return true; + } - private boolean r_other_suffix() { - int among_var; + private boolean r_consonant_pair() { int v_1; int v_2; - // (, line 62 - // setlimit, line 63 - v_1 = limit - cursor; - // tomark, line 63 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 63 - // [, line 63 - ket = cursor; - // substring, line 63 - among_var = find_among_b(a_2, 11); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 63 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 67 - // delete, line 67 + int v_3; + // (, line 52 + // test, line 53 + v_1 = limit - cursor; + // (, line 53 + // setlimit, line 54 + v_2 = limit - cursor; + // tomark, line 54 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_3 = limit_backward; + limit_backward = cursor; + cursor = limit - v_2; + // (, line 54 + // [, line 54 + ket = cursor; + // substring, line 54 + if (find_among_b(a_1, 2) == 0) + { + limit_backward = v_3; + return false; + } + // ], line 54 + bra = cursor; + limit_backward = v_3; + cursor = limit - v_1; + // next, line 59 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // ], line 59 + bra = cursor; + // delete, line 59 slice_del(); - break; - } - return true; - } + return true; + } - public boolean stem() { + private boolean r_other_suffix() { + int among_var; int v_1; int v_2; + // (, line 62 + // setlimit, line 63 + v_1 = limit - cursor; + // tomark, line 63 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 63 + // [, line 63 + ket = cursor; + // substring, line 63 + among_var = find_among_b(a_2, 11); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 63 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 67 + // delete, line 67 + slice_del(); + break; + } + return true; + } + + @Override + public boolean stem() { + int v_1; + int v_2; int v_3; int v_4; - // (, line 72 - // do, line 74 - v_1 = cursor; - lab0: do { - // call mark_regions, line 74 - if (!r_mark_regions()) - { - break lab0; + // (, line 72 + // do, line 74 + v_1 = cursor; + lab0: do { + // call mark_regions, line 74 + if (!r_mark_regions()) + { + break lab0; + } + } while (false); + cursor = v_1; + // backwards, line 75 + limit_backward = cursor; cursor = limit; + // (, line 75 + // do, line 76 + v_2 = limit - cursor; + lab1: do { + // call main_suffix, line 76 + if (!r_main_suffix()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 77 + v_3 = limit - cursor; + lab2: do { + // call consonant_pair, line 77 + if (!r_consonant_pair()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 78 + v_4 = limit - cursor; + lab3: do { + // call other_suffix, line 78 + if (!r_other_suffix()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + cursor = limit_backward; return true; } - } while (false); - cursor = v_1; - // backwards, line 75 - limit_backward = cursor; cursor = limit; - // (, line 75 - // do, line 76 - v_2 = limit - cursor; - lab1: do { - // call main_suffix, line 76 - if (!r_main_suffix()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 77 - v_3 = limit - cursor; - lab2: do { - // call consonant_pair, line 77 - if (!r_consonant_pair()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 78 - v_4 = limit - cursor; - lab3: do { - // call other_suffix, line 78 - if (!r_other_suffix()) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof NorwegianStemmer; } + @Override + public int hashCode() { + return NorwegianStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/PorterStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/PorterStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/PorterStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/PorterStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,609 +1,617 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class PorterStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "s", -1, 3, "", this), - new Among ( "ies", 0, 2, "", this), - new Among ( "sses", 0, 1, "", this), - new Among ( "ss", 0, -1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "", -1, 3, "", this), - new Among ( "bb", 0, 2, "", this), - new Among ( "dd", 0, 2, "", this), - new Among ( "ff", 0, 2, "", this), - new Among ( "gg", 0, 2, "", this), - new Among ( "bl", 0, 1, "", this), - new Among ( "mm", 0, 2, "", this), - new Among ( "nn", 0, 2, "", this), - new Among ( "pp", 0, 2, "", this), - new Among ( "rr", 0, 2, "", this), - new Among ( "at", 0, 1, "", this), - new Among ( "tt", 0, 2, "", this), - new Among ( "iz", 0, 1, "", this) - }; + private final static PorterStemmer methodObject = new PorterStemmer (); - private Among a_2[] = { - new Among ( "ed", -1, 2, "", this), - new Among ( "eed", 0, 1, "", this), - new Among ( "ing", -1, 2, "", this) - }; + private final static Among a_0[] = { + new Among ( "s", -1, 3, "", methodObject ), + new Among ( "ies", 0, 2, "", methodObject ), + new Among ( "sses", 0, 1, "", methodObject ), + new Among ( "ss", 0, -1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "anci", -1, 3, "", this), - new Among ( "enci", -1, 2, "", this), - new Among ( "abli", -1, 4, "", this), - new Among ( "eli", -1, 6, "", this), - new Among ( "alli", -1, 9, "", this), - new Among ( "ousli", -1, 12, "", this), - new Among ( "entli", -1, 5, "", this), - new Among ( "aliti", -1, 10, "", this), - new Among ( "biliti", -1, 14, "", this), - new Among ( "iviti", -1, 13, "", this), - new Among ( "tional", -1, 1, "", this), - new Among ( "ational", 10, 8, "", this), - new Among ( "alism", -1, 10, "", this), - new Among ( "ation", -1, 8, "", this), - new Among ( "ization", 13, 7, "", this), - new Among ( "izer", -1, 7, "", this), - new Among ( "ator", -1, 8, "", this), - new Among ( "iveness", -1, 13, "", this), - new Among ( "fulness", -1, 11, "", this), - new Among ( "ousness", -1, 12, "", this) - }; + private final static Among a_1[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "bb", 0, 2, "", methodObject ), + new Among ( "dd", 0, 2, "", methodObject ), + new Among ( "ff", 0, 2, "", methodObject ), + new Among ( "gg", 0, 2, "", methodObject ), + new Among ( "bl", 0, 1, "", methodObject ), + new Among ( "mm", 0, 2, "", methodObject ), + new Among ( "nn", 0, 2, "", methodObject ), + new Among ( "pp", 0, 2, "", methodObject ), + new Among ( "rr", 0, 2, "", methodObject ), + new Among ( "at", 0, 1, "", methodObject ), + new Among ( "tt", 0, 2, "", methodObject ), + new Among ( "iz", 0, 1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "icate", -1, 2, "", this), - new Among ( "ative", -1, 3, "", this), - new Among ( "alize", -1, 1, "", this), - new Among ( "iciti", -1, 2, "", this), - new Among ( "ical", -1, 2, "", this), - new Among ( "ful", -1, 3, "", this), - new Among ( "ness", -1, 3, "", this) - }; + private final static Among a_2[] = { + new Among ( "ed", -1, 2, "", methodObject ), + new Among ( "eed", 0, 1, "", methodObject ), + new Among ( "ing", -1, 2, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "ic", -1, 1, "", this), - new Among ( "ance", -1, 1, "", this), - new Among ( "ence", -1, 1, "", this), - new Among ( "able", -1, 1, "", this), - new Among ( "ible", -1, 1, "", this), - new Among ( "ate", -1, 1, "", this), - new Among ( "ive", -1, 1, "", this), - new Among ( "ize", -1, 1, "", this), - new Among ( "iti", -1, 1, "", this), - new Among ( "al", -1, 1, "", this), - new Among ( "ism", -1, 1, "", this), - new Among ( "ion", -1, 2, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "ous", -1, 1, "", this), - new Among ( "ant", -1, 1, "", this), - new Among ( "ent", -1, 1, "", this), - new Among ( "ment", 15, 1, "", this), - new Among ( "ement", 16, 1, "", this), - new Among ( "ou", -1, 1, "", this) - }; + private final static Among a_3[] = { + new Among ( "anci", -1, 3, "", methodObject ), + new Among ( "enci", -1, 2, "", methodObject ), + new Among ( "abli", -1, 4, "", methodObject ), + new Among ( "eli", -1, 6, "", methodObject ), + new Among ( "alli", -1, 9, "", methodObject ), + new Among ( "ousli", -1, 12, "", methodObject ), + new Among ( "entli", -1, 5, "", methodObject ), + new Among ( "aliti", -1, 10, "", methodObject ), + new Among ( "biliti", -1, 14, "", methodObject ), + new Among ( "iviti", -1, 13, "", methodObject ), + new Among ( "tional", -1, 1, "", methodObject ), + new Among ( "ational", 10, 8, "", methodObject ), + new Among ( "alism", -1, 10, "", methodObject ), + new Among ( "ation", -1, 8, "", methodObject ), + new Among ( "ization", 13, 7, "", methodObject ), + new Among ( "izer", -1, 7, "", methodObject ), + new Among ( "ator", -1, 8, "", methodObject ), + new Among ( "iveness", -1, 13, "", methodObject ), + new Among ( "fulness", -1, 11, "", methodObject ), + new Among ( "ousness", -1, 12, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1 }; + private final static Among a_4[] = { + new Among ( "icate", -1, 2, "", methodObject ), + new Among ( "ative", -1, 3, "", methodObject ), + new Among ( "alize", -1, 1, "", methodObject ), + new Among ( "iciti", -1, 2, "", methodObject ), + new Among ( "ical", -1, 2, "", methodObject ), + new Among ( "ful", -1, 3, "", methodObject ), + new Among ( "ness", -1, 3, "", methodObject ) + }; - private static final char g_v_WXY[] = {1, 17, 65, 208, 1 }; + private final static Among a_5[] = { + new Among ( "ic", -1, 1, "", methodObject ), + new Among ( "ance", -1, 1, "", methodObject ), + new Among ( "ence", -1, 1, "", methodObject ), + new Among ( "able", -1, 1, "", methodObject ), + new Among ( "ible", -1, 1, "", methodObject ), + new Among ( "ate", -1, 1, "", methodObject ), + new Among ( "ive", -1, 1, "", methodObject ), + new Among ( "ize", -1, 1, "", methodObject ), + new Among ( "iti", -1, 1, "", methodObject ), + new Among ( "al", -1, 1, "", methodObject ), + new Among ( "ism", -1, 1, "", methodObject ), + new Among ( "ion", -1, 2, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "ous", -1, 1, "", methodObject ), + new Among ( "ant", -1, 1, "", methodObject ), + new Among ( "ent", -1, 1, "", methodObject ), + new Among ( "ment", 15, 1, "", methodObject ), + new Among ( "ement", 16, 1, "", methodObject ), + new Among ( "ou", -1, 1, "", methodObject ) + }; + private static final char g_v[] = {17, 65, 16, 1 }; + + private static final char g_v_WXY[] = {1, 17, 65, 208, 1 }; + private boolean B_Y_found; private int I_p2; private int I_p1; - private void copy_from(PorterStemmer other) { - B_Y_found = other.B_Y_found; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(PorterStemmer other) { + B_Y_found = other.B_Y_found; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_shortv() { - // (, line 19 - if (!(out_grouping_b(g_v_WXY, 89, 121))) - { - return false; - } - if (!(in_grouping_b(g_v, 97, 121))) - { - return false; - } - if (!(out_grouping_b(g_v, 97, 121))) - { - return false; - } - return true; - } + private boolean r_shortv() { + // (, line 19 + if (!(out_grouping_b(g_v_WXY, 89, 121))) + { + return false; + } + if (!(in_grouping_b(g_v, 97, 121))) + { + return false; + } + if (!(out_grouping_b(g_v, 97, 121))) + { + return false; + } + return true; + } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } + private boolean r_R1() { + if (!(I_p1 <= cursor)) + { + return false; + } + return true; + } - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } + private boolean r_R2() { + if (!(I_p2 <= cursor)) + { + return false; + } + return true; + } - private boolean r_Step_1a() { + private boolean r_Step_1a() { int among_var; - // (, line 24 - // [, line 25 - ket = cursor; - // substring, line 25 - among_var = find_among_b(a_0, 4); - if (among_var == 0) - { - return false; - } - // ], line 25 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 26 - // <-, line 26 - slice_from("ss"); - break; - case 2: - // (, line 27 - // <-, line 27 - slice_from("i"); - break; - case 3: - // (, line 29 - // delete, line 29 - slice_del(); - break; - } - return true; - } + // (, line 24 + // [, line 25 + ket = cursor; + // substring, line 25 + among_var = find_among_b(a_0, 4); + if (among_var == 0) + { + return false; + } + // ], line 25 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 26 + // <-, line 26 + slice_from("ss"); + break; + case 2: + // (, line 27 + // <-, line 27 + slice_from("i"); + break; + case 3: + // (, line 29 + // delete, line 29 + slice_del(); + break; + } + return true; + } - private boolean r_Step_1b() { + private boolean r_Step_1b() { int among_var; int v_1; int v_3; int v_4; - // (, line 33 - // [, line 34 - ket = cursor; - // substring, line 34 - among_var = find_among_b(a_2, 3); - if (among_var == 0) - { - return false; - } - // ], line 34 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 35 - // call R1, line 35 - if (!r_R1()) + // (, line 33 + // [, line 34 + ket = cursor; + // substring, line 34 + among_var = find_among_b(a_2, 3); + if (among_var == 0) { return false; } - // <-, line 35 - slice_from("ee"); - break; - case 2: - // (, line 37 - // test, line 38 - v_1 = limit - cursor; - // gopast, line 38 - golab0: while(true) - { + // ], line 34 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 35 + // call R1, line 35 + if (!r_R1()) + { + return false; + } + // <-, line 35 + slice_from("ee"); + break; + case 2: + // (, line 37 + // test, line 38 + v_1 = limit - cursor; + // gopast, line 38 + golab0: while(true) + { + lab1: do { + if (!(in_grouping_b(g_v, 97, 121))) + { + break lab1; + } + break golab0; + } while (false); + if (cursor <= limit_backward) + { + return false; + } + cursor--; + } + cursor = limit - v_1; + // delete, line 38 + slice_del(); + // test, line 39 + v_3 = limit - cursor; + // substring, line 39 + among_var = find_among_b(a_1, 13); + if (among_var == 0) + { + return false; + } + cursor = limit - v_3; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 41 + // <+, line 41 + { + int c = cursor; + insert(cursor, cursor, "e"); + cursor = c; + } + break; + case 2: + // (, line 44 + // [, line 44 + ket = cursor; + // next, line 44 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // ], line 44 + bra = cursor; + // delete, line 44 + slice_del(); + break; + case 3: + // (, line 45 + // atmark, line 45 + if (cursor != I_p1) + { + return false; + } + // test, line 45 + v_4 = limit - cursor; + // call shortv, line 45 + if (!r_shortv()) + { + return false; + } + cursor = limit - v_4; + // <+, line 45 + { + int c = cursor; + insert(cursor, cursor, "e"); + cursor = c; + } + break; + } + break; + } + return true; + } + + private boolean r_Step_1c() { + int v_1; + // (, line 51 + // [, line 52 + ket = cursor; + // or, line 52 + lab0: do { + v_1 = limit - cursor; lab1: do { - if (!(in_grouping_b(g_v, 97, 121))) + // literal, line 52 + if (!(eq_s_b(1, "y"))) { break lab1; } - break golab0; + break lab0; } while (false); + cursor = limit - v_1; + // literal, line 52 + if (!(eq_s_b(1, "Y"))) + { + return false; + } + } while (false); + // ], line 52 + bra = cursor; + // gopast, line 53 + golab2: while(true) + { + lab3: do { + if (!(in_grouping_b(g_v, 97, 121))) + { + break lab3; + } + break golab2; + } while (false); if (cursor <= limit_backward) { return false; } cursor--; } - cursor = limit - v_1; - // delete, line 38 - slice_del(); - // test, line 39 - v_3 = limit - cursor; - // substring, line 39 - among_var = find_among_b(a_1, 13); + // <-, line 54 + slice_from("i"); + return true; + } + + private boolean r_Step_2() { + int among_var; + // (, line 57 + // [, line 58 + ket = cursor; + // substring, line 58 + among_var = find_among_b(a_3, 20); if (among_var == 0) { return false; } - cursor = limit - v_3; + // ], line 58 + bra = cursor; + // call R1, line 58 + if (!r_R1()) + { + return false; + } switch(among_var) { case 0: return false; case 1: - // (, line 41 - // <+, line 41 - { - int c = cursor; - insert(cursor, cursor, "e"); - cursor = c; - } + // (, line 59 + // <-, line 59 + slice_from("tion"); break; case 2: - // (, line 44 - // [, line 44 - ket = cursor; - // next, line 44 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // ], line 44 - bra = cursor; - // delete, line 44 - slice_del(); + // (, line 60 + // <-, line 60 + slice_from("ence"); break; case 3: - // (, line 45 - // atmark, line 45 - if (cursor != I_p1) - { - return false; - } - // test, line 45 - v_4 = limit - cursor; - // call shortv, line 45 - if (!r_shortv()) - { - return false; - } - cursor = limit - v_4; - // <+, line 45 - { - int c = cursor; - insert(cursor, cursor, "e"); - cursor = c; - } + // (, line 61 + // <-, line 61 + slice_from("ance"); break; + case 4: + // (, line 62 + // <-, line 62 + slice_from("able"); + break; + case 5: + // (, line 63 + // <-, line 63 + slice_from("ent"); + break; + case 6: + // (, line 64 + // <-, line 64 + slice_from("e"); + break; + case 7: + // (, line 66 + // <-, line 66 + slice_from("ize"); + break; + case 8: + // (, line 68 + // <-, line 68 + slice_from("ate"); + break; + case 9: + // (, line 69 + // <-, line 69 + slice_from("al"); + break; + case 10: + // (, line 71 + // <-, line 71 + slice_from("al"); + break; + case 11: + // (, line 72 + // <-, line 72 + slice_from("ful"); + break; + case 12: + // (, line 74 + // <-, line 74 + slice_from("ous"); + break; + case 13: + // (, line 76 + // <-, line 76 + slice_from("ive"); + break; + case 14: + // (, line 77 + // <-, line 77 + slice_from("ble"); + break; } - break; - } - return true; - } + return true; + } - private boolean r_Step_1c() { - int v_1; - // (, line 51 - // [, line 52 - ket = cursor; - // or, line 52 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // literal, line 52 - if (!(eq_s_b(1, "y"))) + private boolean r_Step_3() { + int among_var; + // (, line 81 + // [, line 82 + ket = cursor; + // substring, line 82 + among_var = find_among_b(a_4, 7); + if (among_var == 0) { - break lab1; + return false; } - break lab0; - } while (false); - cursor = limit - v_1; - // literal, line 52 - if (!(eq_s_b(1, "Y"))) - { - return false; - } - } while (false); - // ], line 52 - bra = cursor; - // gopast, line 53 - golab2: while(true) - { - lab3: do { - if (!(in_grouping_b(g_v, 97, 121))) + // ], line 82 + bra = cursor; + // call R1, line 82 + if (!r_R1()) { - break lab3; + return false; } - break golab2; - } while (false); - if (cursor <= limit_backward) - { - return false; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 83 + // <-, line 83 + slice_from("al"); + break; + case 2: + // (, line 85 + // <-, line 85 + slice_from("ic"); + break; + case 3: + // (, line 87 + // delete, line 87 + slice_del(); + break; + } + return true; } - cursor--; - } - // <-, line 54 - slice_from("i"); - return true; - } - private boolean r_Step_2() { + private boolean r_Step_4() { int among_var; - // (, line 57 - // [, line 58 - ket = cursor; - // substring, line 58 - among_var = find_among_b(a_3, 20); - if (among_var == 0) - { - return false; - } - // ], line 58 - bra = cursor; - // call R1, line 58 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 59 - // <-, line 59 - slice_from("tion"); - break; - case 2: - // (, line 60 - // <-, line 60 - slice_from("ence"); - break; - case 3: - // (, line 61 - // <-, line 61 - slice_from("ance"); - break; - case 4: - // (, line 62 - // <-, line 62 - slice_from("able"); - break; - case 5: - // (, line 63 - // <-, line 63 - slice_from("ent"); - break; - case 6: - // (, line 64 - // <-, line 64 - slice_from("e"); - break; - case 7: - // (, line 66 - // <-, line 66 - slice_from("ize"); - break; - case 8: - // (, line 68 - // <-, line 68 - slice_from("ate"); - break; - case 9: - // (, line 69 - // <-, line 69 - slice_from("al"); - break; - case 10: - // (, line 71 - // <-, line 71 - slice_from("al"); - break; - case 11: - // (, line 72 - // <-, line 72 - slice_from("ful"); - break; - case 12: - // (, line 74 - // <-, line 74 - slice_from("ous"); - break; - case 13: - // (, line 76 - // <-, line 76 - slice_from("ive"); - break; - case 14: - // (, line 77 - // <-, line 77 - slice_from("ble"); - break; - } - return true; - } + int v_1; + // (, line 91 + // [, line 92 + ket = cursor; + // substring, line 92 + among_var = find_among_b(a_5, 19); + if (among_var == 0) + { + return false; + } + // ], line 92 + bra = cursor; + // call R2, line 92 + if (!r_R2()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 95 + // delete, line 95 + slice_del(); + break; + case 2: + // (, line 96 + // or, line 96 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // literal, line 96 + if (!(eq_s_b(1, "s"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_1; + // literal, line 96 + if (!(eq_s_b(1, "t"))) + { + return false; + } + } while (false); + // delete, line 96 + slice_del(); + break; + } + return true; + } - private boolean r_Step_3() { - int among_var; - // (, line 81 - // [, line 82 - ket = cursor; - // substring, line 82 - among_var = find_among_b(a_4, 7); - if (among_var == 0) - { - return false; - } - // ], line 82 - bra = cursor; - // call R1, line 82 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 83 - // <-, line 83 - slice_from("al"); - break; - case 2: - // (, line 85 - // <-, line 85 - slice_from("ic"); - break; - case 3: - // (, line 87 - // delete, line 87 - slice_del(); - break; - } - return true; - } - - private boolean r_Step_4() { - int among_var; + private boolean r_Step_5a() { int v_1; - // (, line 91 - // [, line 92 - ket = cursor; - // substring, line 92 - among_var = find_among_b(a_5, 19); - if (among_var == 0) - { - return false; - } - // ], line 92 - bra = cursor; - // call R2, line 92 - if (!r_R2()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 95 - // delete, line 95 - slice_del(); - break; - case 2: - // (, line 96 - // or, line 96 + int v_2; + // (, line 100 + // [, line 101 + ket = cursor; + // literal, line 101 + if (!(eq_s_b(1, "e"))) + { + return false; + } + // ], line 101 + bra = cursor; + // or, line 102 lab0: do { v_1 = limit - cursor; lab1: do { - // literal, line 96 - if (!(eq_s_b(1, "s"))) + // call R2, line 102 + if (!r_R2()) { break lab1; } break lab0; } while (false); cursor = limit - v_1; - // literal, line 96 - if (!(eq_s_b(1, "t"))) + // (, line 102 + // call R1, line 102 + if (!r_R1()) { return false; } + // not, line 102 + { + v_2 = limit - cursor; + lab2: do { + // call shortv, line 102 + if (!r_shortv()) + { + break lab2; + } + return false; + } while (false); + cursor = limit - v_2; + } } while (false); - // delete, line 96 + // delete, line 103 slice_del(); - break; - } - return true; - } + return true; + } - private boolean r_Step_5a() { - int v_1; - int v_2; - // (, line 100 - // [, line 101 - ket = cursor; - // literal, line 101 - if (!(eq_s_b(1, "e"))) - { - return false; - } - // ], line 101 - bra = cursor; - // or, line 102 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // call R2, line 102 + private boolean r_Step_5b() { + // (, line 106 + // [, line 107 + ket = cursor; + // literal, line 107 + if (!(eq_s_b(1, "l"))) + { + return false; + } + // ], line 107 + bra = cursor; + // call R2, line 108 if (!r_R2()) { - break lab1; + return false; } - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 102 - // call R1, line 102 - if (!r_R1()) - { - return false; - } - // not, line 102 - { - v_2 = limit - cursor; - lab2: do { - // call shortv, line 102 - if (!r_shortv()) - { - break lab2; - } + // literal, line 108 + if (!(eq_s_b(1, "l"))) + { return false; - } while (false); - cursor = limit - v_2; + } + // delete, line 109 + slice_del(); + return true; } - } while (false); - // delete, line 103 - slice_del(); - return true; - } - private boolean r_Step_5b() { - // (, line 106 - // [, line 107 - ket = cursor; - // literal, line 107 - if (!(eq_s_b(1, "l"))) - { - return false; - } - // ], line 107 - bra = cursor; - // call R2, line 108 - if (!r_R2()) - { - return false; - } - // literal, line 108 - if (!(eq_s_b(1, "l"))) - { - return false; - } - // delete, line 109 - slice_del(); - return true; - } - - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -620,287 +628,299 @@ int v_18; int v_19; int v_20; - // (, line 113 - // unset Y_found, line 115 - B_Y_found = false; - // do, line 116 - v_1 = cursor; - lab0: do { - // (, line 116 - // [, line 116 - bra = cursor; - // literal, line 116 - if (!(eq_s(1, "y"))) - { - break lab0; - } - // ], line 116 - ket = cursor; - // <-, line 116 - slice_from("Y"); - // set Y_found, line 116 - B_Y_found = true; - } while (false); - cursor = v_1; - // do, line 117 - v_2 = cursor; - lab1: do { - // repeat, line 117 - replab2: while(true) - { - v_3 = cursor; - lab3: do { - // (, line 117 - // goto, line 117 - golab4: while(true) + // (, line 113 + // unset Y_found, line 115 + B_Y_found = false; + // do, line 116 + v_1 = cursor; + lab0: do { + // (, line 116 + // [, line 116 + bra = cursor; + // literal, line 116 + if (!(eq_s(1, "y"))) { - v_4 = cursor; - lab5: do { + break lab0; + } + // ], line 116 + ket = cursor; + // <-, line 116 + slice_from("Y"); + // set Y_found, line 116 + B_Y_found = true; + } while (false); + cursor = v_1; + // do, line 117 + v_2 = cursor; + lab1: do { + // repeat, line 117 + replab2: while(true) + { + v_3 = cursor; + lab3: do { // (, line 117 + // goto, line 117 + golab4: while(true) + { + v_4 = cursor; + lab5: do { + // (, line 117 + if (!(in_grouping(g_v, 97, 121))) + { + break lab5; + } + // [, line 117 + bra = cursor; + // literal, line 117 + if (!(eq_s(1, "y"))) + { + break lab5; + } + // ], line 117 + ket = cursor; + cursor = v_4; + break golab4; + } while (false); + cursor = v_4; + if (cursor >= limit) + { + break lab3; + } + cursor++; + } + // <-, line 117 + slice_from("Y"); + // set Y_found, line 117 + B_Y_found = true; + continue replab2; + } while (false); + cursor = v_3; + break replab2; + } + } while (false); + cursor = v_2; + I_p1 = limit; + I_p2 = limit; + // do, line 121 + v_5 = cursor; + lab6: do { + // (, line 121 + // gopast, line 122 + golab7: while(true) + { + lab8: do { if (!(in_grouping(g_v, 97, 121))) { - break lab5; + break lab8; } - // [, line 117 - bra = cursor; - // literal, line 117 - if (!(eq_s(1, "y"))) + break golab7; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; + } + // gopast, line 122 + golab9: while(true) + { + lab10: do { + if (!(out_grouping(g_v, 97, 121))) { - break lab5; + break lab10; } - // ], line 117 - ket = cursor; - cursor = v_4; - break golab4; + break golab9; } while (false); - cursor = v_4; if (cursor >= limit) { - break lab3; + break lab6; } cursor++; } - // <-, line 117 - slice_from("Y"); - // set Y_found, line 117 - B_Y_found = true; - continue replab2; + // setmark p1, line 122 + I_p1 = cursor; + // gopast, line 123 + golab11: while(true) + { + lab12: do { + if (!(in_grouping(g_v, 97, 121))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; + } + // gopast, line 123 + golab13: while(true) + { + lab14: do { + if (!(out_grouping(g_v, 97, 121))) + { + break lab14; + } + break golab13; + } while (false); + if (cursor >= limit) + { + break lab6; + } + cursor++; + } + // setmark p2, line 123 + I_p2 = cursor; } while (false); - cursor = v_3; - break replab2; - } - } while (false); - cursor = v_2; - I_p1 = limit; - I_p2 = limit; - // do, line 121 - v_5 = cursor; - lab6: do { - // (, line 121 - // gopast, line 122 - golab7: while(true) - { - lab8: do { - if (!(in_grouping(g_v, 97, 121))) + cursor = v_5; + // backwards, line 126 + limit_backward = cursor; cursor = limit; + // (, line 126 + // do, line 127 + v_10 = limit - cursor; + lab15: do { + // call Step_1a, line 127 + if (!r_Step_1a()) { - break lab8; + break lab15; } - break golab7; } while (false); - if (cursor >= limit) - { - break lab6; - } - cursor++; - } - // gopast, line 122 - golab9: while(true) - { - lab10: do { - if (!(out_grouping(g_v, 97, 121))) + cursor = limit - v_10; + // do, line 128 + v_11 = limit - cursor; + lab16: do { + // call Step_1b, line 128 + if (!r_Step_1b()) { - break lab10; + break lab16; } - break golab9; } while (false); - if (cursor >= limit) - { - break lab6; - } - cursor++; - } - // setmark p1, line 122 - I_p1 = cursor; - // gopast, line 123 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 121))) + cursor = limit - v_11; + // do, line 129 + v_12 = limit - cursor; + lab17: do { + // call Step_1c, line 129 + if (!r_Step_1c()) { - break lab12; + break lab17; } - break golab11; } while (false); - if (cursor >= limit) - { - break lab6; - } - cursor++; - } - // gopast, line 123 - golab13: while(true) - { - lab14: do { - if (!(out_grouping(g_v, 97, 121))) + cursor = limit - v_12; + // do, line 130 + v_13 = limit - cursor; + lab18: do { + // call Step_2, line 130 + if (!r_Step_2()) { - break lab14; + break lab18; } - break golab13; } while (false); - if (cursor >= limit) - { - break lab6; - } - cursor++; - } - // setmark p2, line 123 - I_p2 = cursor; - } while (false); - cursor = v_5; - // backwards, line 126 - limit_backward = cursor; cursor = limit; - // (, line 126 - // do, line 127 - v_10 = limit - cursor; - lab15: do { - // call Step_1a, line 127 - if (!r_Step_1a()) - { - break lab15; - } - } while (false); - cursor = limit - v_10; - // do, line 128 - v_11 = limit - cursor; - lab16: do { - // call Step_1b, line 128 - if (!r_Step_1b()) - { - break lab16; - } - } while (false); - cursor = limit - v_11; - // do, line 129 - v_12 = limit - cursor; - lab17: do { - // call Step_1c, line 129 - if (!r_Step_1c()) - { - break lab17; - } - } while (false); - cursor = limit - v_12; - // do, line 130 - v_13 = limit - cursor; - lab18: do { - // call Step_2, line 130 - if (!r_Step_2()) - { - break lab18; - } - } while (false); - cursor = limit - v_13; - // do, line 131 - v_14 = limit - cursor; - lab19: do { - // call Step_3, line 131 - if (!r_Step_3()) - { - break lab19; - } - } while (false); - cursor = limit - v_14; - // do, line 132 - v_15 = limit - cursor; - lab20: do { - // call Step_4, line 132 - if (!r_Step_4()) - { - break lab20; - } - } while (false); - cursor = limit - v_15; - // do, line 133 - v_16 = limit - cursor; - lab21: do { - // call Step_5a, line 133 - if (!r_Step_5a()) - { - break lab21; - } - } while (false); - cursor = limit - v_16; - // do, line 134 - v_17 = limit - cursor; - lab22: do { - // call Step_5b, line 134 - if (!r_Step_5b()) - { - break lab22; - } - } while (false); - cursor = limit - v_17; - cursor = limit_backward; // do, line 137 - v_18 = cursor; - lab23: do { - // (, line 137 - // Boolean test Y_found, line 137 - if (!(B_Y_found)) - { - break lab23; - } - // repeat, line 137 - replab24: while(true) - { - v_19 = cursor; - lab25: do { + cursor = limit - v_13; + // do, line 131 + v_14 = limit - cursor; + lab19: do { + // call Step_3, line 131 + if (!r_Step_3()) + { + break lab19; + } + } while (false); + cursor = limit - v_14; + // do, line 132 + v_15 = limit - cursor; + lab20: do { + // call Step_4, line 132 + if (!r_Step_4()) + { + break lab20; + } + } while (false); + cursor = limit - v_15; + // do, line 133 + v_16 = limit - cursor; + lab21: do { + // call Step_5a, line 133 + if (!r_Step_5a()) + { + break lab21; + } + } while (false); + cursor = limit - v_16; + // do, line 134 + v_17 = limit - cursor; + lab22: do { + // call Step_5b, line 134 + if (!r_Step_5b()) + { + break lab22; + } + } while (false); + cursor = limit - v_17; + cursor = limit_backward; // do, line 137 + v_18 = cursor; + lab23: do { // (, line 137 - // goto, line 137 - golab26: while(true) + // Boolean test Y_found, line 137 + if (!(B_Y_found)) { - v_20 = cursor; - lab27: do { + break lab23; + } + // repeat, line 137 + replab24: while(true) + { + v_19 = cursor; + lab25: do { // (, line 137 - // [, line 137 - bra = cursor; - // literal, line 137 - if (!(eq_s(1, "Y"))) + // goto, line 137 + golab26: while(true) { - break lab27; + v_20 = cursor; + lab27: do { + // (, line 137 + // [, line 137 + bra = cursor; + // literal, line 137 + if (!(eq_s(1, "Y"))) + { + break lab27; + } + // ], line 137 + ket = cursor; + cursor = v_20; + break golab26; + } while (false); + cursor = v_20; + if (cursor >= limit) + { + break lab25; + } + cursor++; } - // ], line 137 - ket = cursor; - cursor = v_20; - break golab26; + // <-, line 137 + slice_from("y"); + continue replab24; } while (false); - cursor = v_20; - if (cursor >= limit) - { - break lab25; - } - cursor++; + cursor = v_19; + break replab24; } - // <-, line 137 - slice_from("y"); - continue replab24; } while (false); - cursor = v_19; - break replab24; + cursor = v_18; + return true; } - } while (false); - cursor = v_18; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof PorterStemmer; } + @Override + public int hashCode() { + return PorterStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/PortugueseStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/PortugueseStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/PortugueseStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/PortugueseStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,988 +1,996 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class PortugueseStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 3, "", this), - new Among ( "\u00E3", 0, 1, "", this), - new Among ( "\u00F5", 0, 2, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "", -1, 3, "", this), - new Among ( "a~", 0, 1, "", this), - new Among ( "o~", 0, 2, "", this) - }; + private final static PortugueseStemmer methodObject = new PortugueseStemmer (); - private Among a_2[] = { - new Among ( "ic", -1, -1, "", this), - new Among ( "ad", -1, -1, "", this), - new Among ( "os", -1, -1, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "\u00E3", 0, 1, "", methodObject ), + new Among ( "\u00F5", 0, 2, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ante", -1, 1, "", this), - new Among ( "avel", -1, 1, "", this), - new Among ( "\u00EDvel", -1, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "a~", 0, 1, "", methodObject ), + new Among ( "o~", 0, 2, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ic", -1, 1, "", this), - new Among ( "abil", -1, 1, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_2[] = { + new Among ( "ic", -1, -1, "", methodObject ), + new Among ( "ad", -1, -1, "", methodObject ), + new Among ( "os", -1, -1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "ica", -1, 1, "", this), - new Among ( "\u00E2ncia", -1, 1, "", this), - new Among ( "\u00EAncia", -1, 4, "", this), - new Among ( "ira", -1, 9, "", this), - new Among ( "adora", -1, 1, "", this), - new Among ( "osa", -1, 1, "", this), - new Among ( "ista", -1, 1, "", this), - new Among ( "iva", -1, 8, "", this), - new Among ( "eza", -1, 1, "", this), - new Among ( "log\u00EDa", -1, 2, "", this), - new Among ( "idade", -1, 7, "", this), - new Among ( "ante", -1, 1, "", this), - new Among ( "mente", -1, 6, "", this), - new Among ( "amente", 12, 5, "", this), - new Among ( "\u00E1vel", -1, 1, "", this), - new Among ( "\u00EDvel", -1, 1, "", this), - new Among ( "uci\u00F3n", -1, 3, "", this), - new Among ( "ico", -1, 1, "", this), - new Among ( "ismo", -1, 1, "", this), - new Among ( "oso", -1, 1, "", this), - new Among ( "amento", -1, 1, "", this), - new Among ( "imento", -1, 1, "", this), - new Among ( "ivo", -1, 8, "", this), - new Among ( "a\u00E7a~o", -1, 1, "", this), - new Among ( "ador", -1, 1, "", this), - new Among ( "icas", -1, 1, "", this), - new Among ( "\u00EAncias", -1, 4, "", this), - new Among ( "iras", -1, 9, "", this), - new Among ( "adoras", -1, 1, "", this), - new Among ( "osas", -1, 1, "", this), - new Among ( "istas", -1, 1, "", this), - new Among ( "ivas", -1, 8, "", this), - new Among ( "ezas", -1, 1, "", this), - new Among ( "log\u00EDas", -1, 2, "", this), - new Among ( "idades", -1, 7, "", this), - new Among ( "uciones", -1, 3, "", this), - new Among ( "adores", -1, 1, "", this), - new Among ( "antes", -1, 1, "", this), - new Among ( "a\u00E7o~es", -1, 1, "", this), - new Among ( "icos", -1, 1, "", this), - new Among ( "ismos", -1, 1, "", this), - new Among ( "osos", -1, 1, "", this), - new Among ( "amentos", -1, 1, "", this), - new Among ( "imentos", -1, 1, "", this), - new Among ( "ivos", -1, 8, "", this) - }; + private final static Among a_3[] = { + new Among ( "ante", -1, 1, "", methodObject ), + new Among ( "avel", -1, 1, "", methodObject ), + new Among ( "\u00EDvel", -1, 1, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "ada", -1, 1, "", this), - new Among ( "ida", -1, 1, "", this), - new Among ( "ia", -1, 1, "", this), - new Among ( "aria", 2, 1, "", this), - new Among ( "eria", 2, 1, "", this), - new Among ( "iria", 2, 1, "", this), - new Among ( "ara", -1, 1, "", this), - new Among ( "era", -1, 1, "", this), - new Among ( "ira", -1, 1, "", this), - new Among ( "ava", -1, 1, "", this), - new Among ( "asse", -1, 1, "", this), - new Among ( "esse", -1, 1, "", this), - new Among ( "isse", -1, 1, "", this), - new Among ( "aste", -1, 1, "", this), - new Among ( "este", -1, 1, "", this), - new Among ( "iste", -1, 1, "", this), - new Among ( "ei", -1, 1, "", this), - new Among ( "arei", 16, 1, "", this), - new Among ( "erei", 16, 1, "", this), - new Among ( "irei", 16, 1, "", this), - new Among ( "am", -1, 1, "", this), - new Among ( "iam", 20, 1, "", this), - new Among ( "ariam", 21, 1, "", this), - new Among ( "eriam", 21, 1, "", this), - new Among ( "iriam", 21, 1, "", this), - new Among ( "aram", 20, 1, "", this), - new Among ( "eram", 20, 1, "", this), - new Among ( "iram", 20, 1, "", this), - new Among ( "avam", 20, 1, "", this), - new Among ( "em", -1, 1, "", this), - new Among ( "arem", 29, 1, "", this), - new Among ( "erem", 29, 1, "", this), - new Among ( "irem", 29, 1, "", this), - new Among ( "assem", 29, 1, "", this), - new Among ( "essem", 29, 1, "", this), - new Among ( "issem", 29, 1, "", this), - new Among ( "ado", -1, 1, "", this), - new Among ( "ido", -1, 1, "", this), - new Among ( "ando", -1, 1, "", this), - new Among ( "endo", -1, 1, "", this), - new Among ( "indo", -1, 1, "", this), - new Among ( "ara~o", -1, 1, "", this), - new Among ( "era~o", -1, 1, "", this), - new Among ( "ira~o", -1, 1, "", this), - new Among ( "ar", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "ir", -1, 1, "", this), - new Among ( "as", -1, 1, "", this), - new Among ( "adas", 47, 1, "", this), - new Among ( "idas", 47, 1, "", this), - new Among ( "ias", 47, 1, "", this), - new Among ( "arias", 50, 1, "", this), - new Among ( "erias", 50, 1, "", this), - new Among ( "irias", 50, 1, "", this), - new Among ( "aras", 47, 1, "", this), - new Among ( "eras", 47, 1, "", this), - new Among ( "iras", 47, 1, "", this), - new Among ( "avas", 47, 1, "", this), - new Among ( "es", -1, 1, "", this), - new Among ( "ardes", 58, 1, "", this), - new Among ( "erdes", 58, 1, "", this), - new Among ( "irdes", 58, 1, "", this), - new Among ( "ares", 58, 1, "", this), - new Among ( "eres", 58, 1, "", this), - new Among ( "ires", 58, 1, "", this), - new Among ( "asses", 58, 1, "", this), - new Among ( "esses", 58, 1, "", this), - new Among ( "isses", 58, 1, "", this), - new Among ( "astes", 58, 1, "", this), - new Among ( "estes", 58, 1, "", this), - new Among ( "istes", 58, 1, "", this), - new Among ( "is", -1, 1, "", this), - new Among ( "ais", 71, 1, "", this), - new Among ( "eis", 71, 1, "", this), - new Among ( "areis", 73, 1, "", this), - new Among ( "ereis", 73, 1, "", this), - new Among ( "ireis", 73, 1, "", this), - new Among ( "\u00E1reis", 73, 1, "", this), - new Among ( "\u00E9reis", 73, 1, "", this), - new Among ( "\u00EDreis", 73, 1, "", this), - new Among ( "\u00E1sseis", 73, 1, "", this), - new Among ( "\u00E9sseis", 73, 1, "", this), - new Among ( "\u00EDsseis", 73, 1, "", this), - new Among ( "\u00E1veis", 73, 1, "", this), - new Among ( "\u00EDeis", 73, 1, "", this), - new Among ( "ar\u00EDeis", 84, 1, "", this), - new Among ( "er\u00EDeis", 84, 1, "", this), - new Among ( "ir\u00EDeis", 84, 1, "", this), - new Among ( "ados", -1, 1, "", this), - new Among ( "idos", -1, 1, "", this), - new Among ( "amos", -1, 1, "", this), - new Among ( "\u00E1ramos", 90, 1, "", this), - new Among ( "\u00E9ramos", 90, 1, "", this), - new Among ( "\u00EDramos", 90, 1, "", this), - new Among ( "\u00E1vamos", 90, 1, "", this), - new Among ( "\u00EDamos", 90, 1, "", this), - new Among ( "ar\u00EDamos", 95, 1, "", this), - new Among ( "er\u00EDamos", 95, 1, "", this), - new Among ( "ir\u00EDamos", 95, 1, "", this), - new Among ( "emos", -1, 1, "", this), - new Among ( "aremos", 99, 1, "", this), - new Among ( "eremos", 99, 1, "", this), - new Among ( "iremos", 99, 1, "", this), - new Among ( "\u00E1ssemos", 99, 1, "", this), - new Among ( "\u00EAssemos", 99, 1, "", this), - new Among ( "\u00EDssemos", 99, 1, "", this), - new Among ( "imos", -1, 1, "", this), - new Among ( "armos", -1, 1, "", this), - new Among ( "ermos", -1, 1, "", this), - new Among ( "irmos", -1, 1, "", this), - new Among ( "\u00E1mos", -1, 1, "", this), - new Among ( "ar\u00E1s", -1, 1, "", this), - new Among ( "er\u00E1s", -1, 1, "", this), - new Among ( "ir\u00E1s", -1, 1, "", this), - new Among ( "eu", -1, 1, "", this), - new Among ( "iu", -1, 1, "", this), - new Among ( "ou", -1, 1, "", this), - new Among ( "ar\u00E1", -1, 1, "", this), - new Among ( "er\u00E1", -1, 1, "", this), - new Among ( "ir\u00E1", -1, 1, "", this) - }; + private final static Among a_4[] = { + new Among ( "ic", -1, 1, "", methodObject ), + new Among ( "abil", -1, 1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "a", -1, 1, "", this), - new Among ( "i", -1, 1, "", this), - new Among ( "o", -1, 1, "", this), - new Among ( "os", -1, 1, "", this), - new Among ( "\u00E1", -1, 1, "", this), - new Among ( "\u00ED", -1, 1, "", this), - new Among ( "\u00F3", -1, 1, "", this) - }; + private final static Among a_5[] = { + new Among ( "ica", -1, 1, "", methodObject ), + new Among ( "\u00E2ncia", -1, 1, "", methodObject ), + new Among ( "\u00EAncia", -1, 4, "", methodObject ), + new Among ( "ira", -1, 9, "", methodObject ), + new Among ( "adora", -1, 1, "", methodObject ), + new Among ( "osa", -1, 1, "", methodObject ), + new Among ( "ista", -1, 1, "", methodObject ), + new Among ( "iva", -1, 8, "", methodObject ), + new Among ( "eza", -1, 1, "", methodObject ), + new Among ( "log\u00EDa", -1, 2, "", methodObject ), + new Among ( "idade", -1, 7, "", methodObject ), + new Among ( "ante", -1, 1, "", methodObject ), + new Among ( "mente", -1, 6, "", methodObject ), + new Among ( "amente", 12, 5, "", methodObject ), + new Among ( "\u00E1vel", -1, 1, "", methodObject ), + new Among ( "\u00EDvel", -1, 1, "", methodObject ), + new Among ( "uci\u00F3n", -1, 3, "", methodObject ), + new Among ( "ico", -1, 1, "", methodObject ), + new Among ( "ismo", -1, 1, "", methodObject ), + new Among ( "oso", -1, 1, "", methodObject ), + new Among ( "amento", -1, 1, "", methodObject ), + new Among ( "imento", -1, 1, "", methodObject ), + new Among ( "ivo", -1, 8, "", methodObject ), + new Among ( "a\u00E7a~o", -1, 1, "", methodObject ), + new Among ( "ador", -1, 1, "", methodObject ), + new Among ( "icas", -1, 1, "", methodObject ), + new Among ( "\u00EAncias", -1, 4, "", methodObject ), + new Among ( "iras", -1, 9, "", methodObject ), + new Among ( "adoras", -1, 1, "", methodObject ), + new Among ( "osas", -1, 1, "", methodObject ), + new Among ( "istas", -1, 1, "", methodObject ), + new Among ( "ivas", -1, 8, "", methodObject ), + new Among ( "ezas", -1, 1, "", methodObject ), + new Among ( "log\u00EDas", -1, 2, "", methodObject ), + new Among ( "idades", -1, 7, "", methodObject ), + new Among ( "uciones", -1, 3, "", methodObject ), + new Among ( "adores", -1, 1, "", methodObject ), + new Among ( "antes", -1, 1, "", methodObject ), + new Among ( "a\u00E7o~es", -1, 1, "", methodObject ), + new Among ( "icos", -1, 1, "", methodObject ), + new Among ( "ismos", -1, 1, "", methodObject ), + new Among ( "osos", -1, 1, "", methodObject ), + new Among ( "amentos", -1, 1, "", methodObject ), + new Among ( "imentos", -1, 1, "", methodObject ), + new Among ( "ivos", -1, 8, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "e", -1, 1, "", this), - new Among ( "\u00E7", -1, 2, "", this), - new Among ( "\u00E9", -1, 1, "", this), - new Among ( "\u00EA", -1, 1, "", this) - }; + private final static Among a_6[] = { + new Among ( "ada", -1, 1, "", methodObject ), + new Among ( "ida", -1, 1, "", methodObject ), + new Among ( "ia", -1, 1, "", methodObject ), + new Among ( "aria", 2, 1, "", methodObject ), + new Among ( "eria", 2, 1, "", methodObject ), + new Among ( "iria", 2, 1, "", methodObject ), + new Among ( "ara", -1, 1, "", methodObject ), + new Among ( "era", -1, 1, "", methodObject ), + new Among ( "ira", -1, 1, "", methodObject ), + new Among ( "ava", -1, 1, "", methodObject ), + new Among ( "asse", -1, 1, "", methodObject ), + new Among ( "esse", -1, 1, "", methodObject ), + new Among ( "isse", -1, 1, "", methodObject ), + new Among ( "aste", -1, 1, "", methodObject ), + new Among ( "este", -1, 1, "", methodObject ), + new Among ( "iste", -1, 1, "", methodObject ), + new Among ( "ei", -1, 1, "", methodObject ), + new Among ( "arei", 16, 1, "", methodObject ), + new Among ( "erei", 16, 1, "", methodObject ), + new Among ( "irei", 16, 1, "", methodObject ), + new Among ( "am", -1, 1, "", methodObject ), + new Among ( "iam", 20, 1, "", methodObject ), + new Among ( "ariam", 21, 1, "", methodObject ), + new Among ( "eriam", 21, 1, "", methodObject ), + new Among ( "iriam", 21, 1, "", methodObject ), + new Among ( "aram", 20, 1, "", methodObject ), + new Among ( "eram", 20, 1, "", methodObject ), + new Among ( "iram", 20, 1, "", methodObject ), + new Among ( "avam", 20, 1, "", methodObject ), + new Among ( "em", -1, 1, "", methodObject ), + new Among ( "arem", 29, 1, "", methodObject ), + new Among ( "erem", 29, 1, "", methodObject ), + new Among ( "irem", 29, 1, "", methodObject ), + new Among ( "assem", 29, 1, "", methodObject ), + new Among ( "essem", 29, 1, "", methodObject ), + new Among ( "issem", 29, 1, "", methodObject ), + new Among ( "ado", -1, 1, "", methodObject ), + new Among ( "ido", -1, 1, "", methodObject ), + new Among ( "ando", -1, 1, "", methodObject ), + new Among ( "endo", -1, 1, "", methodObject ), + new Among ( "indo", -1, 1, "", methodObject ), + new Among ( "ara~o", -1, 1, "", methodObject ), + new Among ( "era~o", -1, 1, "", methodObject ), + new Among ( "ira~o", -1, 1, "", methodObject ), + new Among ( "ar", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "ir", -1, 1, "", methodObject ), + new Among ( "as", -1, 1, "", methodObject ), + new Among ( "adas", 47, 1, "", methodObject ), + new Among ( "idas", 47, 1, "", methodObject ), + new Among ( "ias", 47, 1, "", methodObject ), + new Among ( "arias", 50, 1, "", methodObject ), + new Among ( "erias", 50, 1, "", methodObject ), + new Among ( "irias", 50, 1, "", methodObject ), + new Among ( "aras", 47, 1, "", methodObject ), + new Among ( "eras", 47, 1, "", methodObject ), + new Among ( "iras", 47, 1, "", methodObject ), + new Among ( "avas", 47, 1, "", methodObject ), + new Among ( "es", -1, 1, "", methodObject ), + new Among ( "ardes", 58, 1, "", methodObject ), + new Among ( "erdes", 58, 1, "", methodObject ), + new Among ( "irdes", 58, 1, "", methodObject ), + new Among ( "ares", 58, 1, "", methodObject ), + new Among ( "eres", 58, 1, "", methodObject ), + new Among ( "ires", 58, 1, "", methodObject ), + new Among ( "asses", 58, 1, "", methodObject ), + new Among ( "esses", 58, 1, "", methodObject ), + new Among ( "isses", 58, 1, "", methodObject ), + new Among ( "astes", 58, 1, "", methodObject ), + new Among ( "estes", 58, 1, "", methodObject ), + new Among ( "istes", 58, 1, "", methodObject ), + new Among ( "is", -1, 1, "", methodObject ), + new Among ( "ais", 71, 1, "", methodObject ), + new Among ( "eis", 71, 1, "", methodObject ), + new Among ( "areis", 73, 1, "", methodObject ), + new Among ( "ereis", 73, 1, "", methodObject ), + new Among ( "ireis", 73, 1, "", methodObject ), + new Among ( "\u00E1reis", 73, 1, "", methodObject ), + new Among ( "\u00E9reis", 73, 1, "", methodObject ), + new Among ( "\u00EDreis", 73, 1, "", methodObject ), + new Among ( "\u00E1sseis", 73, 1, "", methodObject ), + new Among ( "\u00E9sseis", 73, 1, "", methodObject ), + new Among ( "\u00EDsseis", 73, 1, "", methodObject ), + new Among ( "\u00E1veis", 73, 1, "", methodObject ), + new Among ( "\u00EDeis", 73, 1, "", methodObject ), + new Among ( "ar\u00EDeis", 84, 1, "", methodObject ), + new Among ( "er\u00EDeis", 84, 1, "", methodObject ), + new Among ( "ir\u00EDeis", 84, 1, "", methodObject ), + new Among ( "ados", -1, 1, "", methodObject ), + new Among ( "idos", -1, 1, "", methodObject ), + new Among ( "amos", -1, 1, "", methodObject ), + new Among ( "\u00E1ramos", 90, 1, "", methodObject ), + new Among ( "\u00E9ramos", 90, 1, "", methodObject ), + new Among ( "\u00EDramos", 90, 1, "", methodObject ), + new Among ( "\u00E1vamos", 90, 1, "", methodObject ), + new Among ( "\u00EDamos", 90, 1, "", methodObject ), + new Among ( "ar\u00EDamos", 95, 1, "", methodObject ), + new Among ( "er\u00EDamos", 95, 1, "", methodObject ), + new Among ( "ir\u00EDamos", 95, 1, "", methodObject ), + new Among ( "emos", -1, 1, "", methodObject ), + new Among ( "aremos", 99, 1, "", methodObject ), + new Among ( "eremos", 99, 1, "", methodObject ), + new Among ( "iremos", 99, 1, "", methodObject ), + new Among ( "\u00E1ssemos", 99, 1, "", methodObject ), + new Among ( "\u00EAssemos", 99, 1, "", methodObject ), + new Among ( "\u00EDssemos", 99, 1, "", methodObject ), + new Among ( "imos", -1, 1, "", methodObject ), + new Among ( "armos", -1, 1, "", methodObject ), + new Among ( "ermos", -1, 1, "", methodObject ), + new Among ( "irmos", -1, 1, "", methodObject ), + new Among ( "\u00E1mos", -1, 1, "", methodObject ), + new Among ( "ar\u00E1s", -1, 1, "", methodObject ), + new Among ( "er\u00E1s", -1, 1, "", methodObject ), + new Among ( "ir\u00E1s", -1, 1, "", methodObject ), + new Among ( "eu", -1, 1, "", methodObject ), + new Among ( "iu", -1, 1, "", methodObject ), + new Among ( "ou", -1, 1, "", methodObject ), + new Among ( "ar\u00E1", -1, 1, "", methodObject ), + new Among ( "er\u00E1", -1, 1, "", methodObject ), + new Among ( "ir\u00E1", -1, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; + private final static Among a_7[] = { + new Among ( "a", -1, 1, "", methodObject ), + new Among ( "i", -1, 1, "", methodObject ), + new Among ( "o", -1, 1, "", methodObject ), + new Among ( "os", -1, 1, "", methodObject ), + new Among ( "\u00E1", -1, 1, "", methodObject ), + new Among ( "\u00ED", -1, 1, "", methodObject ), + new Among ( "\u00F3", -1, 1, "", methodObject ) + }; + private final static Among a_8[] = { + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "\u00E7", -1, 2, "", methodObject ), + new Among ( "\u00E9", -1, 1, "", methodObject ), + new Among ( "\u00EA", -1, 1, "", methodObject ) + }; + + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, 19, 12, 2 }; + private int I_p2; private int I_p1; private int I_pV; - private void copy_from(PortugueseStemmer other) { - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_pV = other.I_pV; - super.copy_from(other); - } + private void copy_from(PortugueseStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int among_var; int v_1; - // repeat, line 36 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 36 - // [, line 37 - bra = cursor; - // substring, line 37 - among_var = find_among(a_0, 3); - if (among_var == 0) + // repeat, line 36 + replab0: while(true) { - break lab1; - } - // ], line 37 - ket = cursor; - switch(among_var) { - case 0: - break lab1; - case 1: - // (, line 38 - // <-, line 38 - slice_from("a~"); - break; - case 2: - // (, line 39 - // <-, line 39 - slice_from("o~"); - break; - case 3: - // (, line 40 - // next, line 40 - if (cursor >= limit) + v_1 = cursor; + lab1: do { + // (, line 36 + // [, line 37 + bra = cursor; + // substring, line 37 + among_var = find_among(a_0, 3); + if (among_var == 0) { break lab1; } - cursor++; - break; + // ], line 37 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 38 + // <-, line 38 + slice_from("a~"); + break; + case 2: + // (, line 39 + // <-, line 39 + slice_from("o~"); + break; + case 3: + // (, line 40 + // next, line 40 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; int v_3; int v_6; int v_8; - // (, line 44 - I_pV = limit; - I_p1 = limit; - I_p2 = limit; - // do, line 50 - v_1 = cursor; - lab0: do { - // (, line 50 - // or, line 52 - lab1: do { - v_2 = cursor; - lab2: do { - // (, line 51 - if (!(in_grouping(g_v, 97, 250))) - { - break lab2; - } - // or, line 51 - lab3: do { - v_3 = cursor; - lab4: do { + // (, line 44 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 50 + v_1 = cursor; + lab0: do { + // (, line 50 + // or, line 52 + lab1: do { + v_2 = cursor; + lab2: do { // (, line 51 - if (!(out_grouping(g_v, 97, 250))) + if (!(in_grouping(g_v, 97, 250))) { - break lab4; + break lab2; } - // gopast, line 51 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 250))) + // or, line 51 + lab3: do { + v_3 = cursor; + lab4: do { + // (, line 51 + if (!(out_grouping(g_v, 97, 250))) { - break lab6; + break lab4; } - break golab5; + // gopast, line 51 + golab5: while(true) + { + lab6: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab4; + } + cursor++; + } + break lab3; } while (false); - if (cursor >= limit) + cursor = v_3; + // (, line 51 + if (!(in_grouping(g_v, 97, 250))) { - break lab4; + break lab2; } - cursor++; - } - break lab3; + // gopast, line 51 + golab7: while(true) + { + lab8: do { + if (!(out_grouping(g_v, 97, 250))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab2; + } + cursor++; + } + } while (false); + break lab1; } while (false); - cursor = v_3; - // (, line 51 - if (!(in_grouping(g_v, 97, 250))) + cursor = v_2; + // (, line 53 + if (!(out_grouping(g_v, 97, 250))) { - break lab2; + break lab0; } - // gopast, line 51 - golab7: while(true) - { - lab8: do { + // or, line 53 + lab9: do { + v_6 = cursor; + lab10: do { + // (, line 53 if (!(out_grouping(g_v, 97, 250))) { - break lab8; + break lab10; } - break golab7; + // gopast, line 53 + golab11: while(true) + { + lab12: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab10; + } + cursor++; + } + break lab9; } while (false); + cursor = v_6; + // (, line 53 + if (!(in_grouping(g_v, 97, 250))) + { + break lab0; + } + // next, line 53 if (cursor >= limit) { - break lab2; + break lab0; } cursor++; - } + } while (false); } while (false); - break lab1; + // setmark pV, line 54 + I_pV = cursor; } while (false); - cursor = v_2; - // (, line 53 - if (!(out_grouping(g_v, 97, 250))) - { - break lab0; - } - // or, line 53 - lab9: do { - v_6 = cursor; - lab10: do { - // (, line 53 - if (!(out_grouping(g_v, 97, 250))) + cursor = v_1; + // do, line 56 + v_8 = cursor; + lab13: do { + // (, line 56 + // gopast, line 57 + golab14: while(true) + { + lab15: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab15; + } + break golab14; + } while (false); + if (cursor >= limit) { - break lab10; + break lab13; } - // gopast, line 53 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 250))) - { - break lab12; - } - break golab11; - } while (false); - if (cursor >= limit) + cursor++; + } + // gopast, line 57 + golab16: while(true) + { + lab17: do { + if (!(out_grouping(g_v, 97, 250))) { - break lab10; + break lab17; } - cursor++; + break golab16; + } while (false); + if (cursor >= limit) + { + break lab13; } - break lab9; - } while (false); - cursor = v_6; - // (, line 53 - if (!(in_grouping(g_v, 97, 250))) - { - break lab0; + cursor++; } - // next, line 53 - if (cursor >= limit) + // setmark p1, line 57 + I_p1 = cursor; + // gopast, line 58 + golab18: while(true) { - break lab0; + lab19: do { + if (!(in_grouping(g_v, 97, 250))) + { + break lab19; + } + break golab18; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - cursor++; - } while (false); - } while (false); - // setmark pV, line 54 - I_pV = cursor; - } while (false); - cursor = v_1; - // do, line 56 - v_8 = cursor; - lab13: do { - // (, line 56 - // gopast, line 57 - golab14: while(true) - { - lab15: do { - if (!(in_grouping(g_v, 97, 250))) + // gopast, line 58 + golab20: while(true) { - break lab15; + lab21: do { + if (!(out_grouping(g_v, 97, 250))) + { + break lab21; + } + break golab20; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - break golab14; + // setmark p2, line 58 + I_p2 = cursor; } while (false); - if (cursor >= limit) + cursor = v_8; + return true; + } + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 62 + replab0: while(true) { - break lab13; + v_1 = cursor; + lab1: do { + // (, line 62 + // [, line 63 + bra = cursor; + // substring, line 63 + among_var = find_among(a_1, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 63 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 64 + // <-, line 64 + slice_from("\u00E3"); + break; + case 2: + // (, line 65 + // <-, line 65 + slice_from("\u00F5"); + break; + case 3: + // (, line 66 + // next, line 66 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - cursor++; + return true; } - // gopast, line 57 - golab16: while(true) - { - lab17: do { - if (!(out_grouping(g_v, 97, 250))) - { - break lab17; - } - break golab16; - } while (false); - if (cursor >= limit) + + private boolean r_RV() { + if (!(I_pV <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p1, line 57 - I_p1 = cursor; - // gopast, line 58 - golab18: while(true) - { - lab19: do { - if (!(in_grouping(g_v, 97, 250))) - { - break lab19; - } - break golab18; - } while (false); - if (cursor >= limit) + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // gopast, line 58 - golab20: while(true) - { - lab21: do { - if (!(out_grouping(g_v, 97, 250))) - { - break lab21; - } - break golab20; - } while (false); - if (cursor >= limit) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p2, line 58 - I_p2 = cursor; - } while (false); - cursor = v_8; - return true; - } - private boolean r_postlude() { + private boolean r_standard_suffix() { int among_var; int v_1; - // repeat, line 62 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 62 - // [, line 63 - bra = cursor; - // substring, line 63 - among_var = find_among(a_1, 3); + int v_2; + int v_3; + int v_4; + // (, line 76 + // [, line 77 + ket = cursor; + // substring, line 77 + among_var = find_among_b(a_5, 45); if (among_var == 0) { - break lab1; + return false; } - // ], line 63 - ket = cursor; + // ], line 77 + bra = cursor; switch(among_var) { case 0: - break lab1; + return false; case 1: - // (, line 64 - // <-, line 64 - slice_from("\u00E3"); + // (, line 92 + // call R2, line 93 + if (!r_R2()) + { + return false; + } + // delete, line 93 + slice_del(); break; case 2: - // (, line 65 - // <-, line 65 - slice_from("\u00F5"); + // (, line 97 + // call R2, line 98 + if (!r_R2()) + { + return false; + } + // <-, line 98 + slice_from("log"); break; case 3: - // (, line 66 - // next, line 66 - if (cursor >= limit) + // (, line 101 + // call R2, line 102 + if (!r_R2()) { - break lab1; + return false; } - cursor++; + // <-, line 102 + slice_from("u"); break; - } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } - - private boolean r_RV() { - if (!(I_pV <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_standard_suffix() { - int among_var; - int v_1; - int v_2; - int v_3; - int v_4; - // (, line 76 - // [, line 77 - ket = cursor; - // substring, line 77 - among_var = find_among_b(a_5, 45); - if (among_var == 0) - { - return false; - } - // ], line 77 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 92 - // call R2, line 93 - if (!r_R2()) - { - return false; - } - // delete, line 93 - slice_del(); - break; - case 2: - // (, line 97 - // call R2, line 98 - if (!r_R2()) - { - return false; - } - // <-, line 98 - slice_from("log"); - break; - case 3: - // (, line 101 - // call R2, line 102 - if (!r_R2()) - { - return false; - } - // <-, line 102 - slice_from("u"); - break; - case 4: - // (, line 105 - // call R2, line 106 - if (!r_R2()) - { - return false; - } - // <-, line 106 - slice_from("ente"); - break; - case 5: - // (, line 109 - // call R1, line 110 - if (!r_R1()) - { - return false; - } - // delete, line 110 - slice_del(); - // try, line 111 - v_1 = limit - cursor; - lab0: do { - // (, line 111 - // [, line 112 - ket = cursor; - // substring, line 112 - among_var = find_among_b(a_2, 4); - if (among_var == 0) - { - cursor = limit - v_1; - break lab0; - } - // ], line 112 - bra = cursor; - // call R2, line 112 - if (!r_R2()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 112 - slice_del(); - switch(among_var) { - case 0: - cursor = limit - v_1; - break lab0; - case 1: - // (, line 113 - // [, line 113 + case 4: + // (, line 105 + // call R2, line 106 + if (!r_R2()) + { + return false; + } + // <-, line 106 + slice_from("ente"); + break; + case 5: + // (, line 109 + // call R1, line 110 + if (!r_R1()) + { + return false; + } + // delete, line 110 + slice_del(); + // try, line 111 + v_1 = limit - cursor; + lab0: do { + // (, line 111 + // [, line 112 ket = cursor; - // literal, line 113 - if (!(eq_s_b(2, "at"))) + // substring, line 112 + among_var = find_among_b(a_2, 4); + if (among_var == 0) { cursor = limit - v_1; break lab0; } - // ], line 113 + // ], line 112 bra = cursor; - // call R2, line 113 + // call R2, line 112 if (!r_R2()) { cursor = limit - v_1; break lab0; } - // delete, line 113 + // delete, line 112 slice_del(); - break; - } - } while (false); - break; - case 6: - // (, line 121 - // call R2, line 122 - if (!r_R2()) - { - return false; - } - // delete, line 122 - slice_del(); - // try, line 123 - v_2 = limit - cursor; - lab1: do { - // (, line 123 - // [, line 124 - ket = cursor; - // substring, line 124 - among_var = find_among_b(a_3, 3); - if (among_var == 0) - { - cursor = limit - v_2; - break lab1; - } - // ], line 124 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_2; - break lab1; - case 1: - // (, line 127 - // call R2, line 127 - if (!r_R2()) + switch(among_var) { + case 0: + cursor = limit - v_1; + break lab0; + case 1: + // (, line 113 + // [, line 113 + ket = cursor; + // literal, line 113 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_1; + break lab0; + } + // ], line 113 + bra = cursor; + // call R2, line 113 + if (!r_R2()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 113 + slice_del(); + break; + } + } while (false); + break; + case 6: + // (, line 121 + // call R2, line 122 + if (!r_R2()) + { + return false; + } + // delete, line 122 + slice_del(); + // try, line 123 + v_2 = limit - cursor; + lab1: do { + // (, line 123 + // [, line 124 + ket = cursor; + // substring, line 124 + among_var = find_among_b(a_3, 3); + if (among_var == 0) { cursor = limit - v_2; break lab1; } - // delete, line 127 - slice_del(); - break; - } - } while (false); - break; - case 7: - // (, line 133 - // call R2, line 134 - if (!r_R2()) - { - return false; - } - // delete, line 134 - slice_del(); - // try, line 135 - v_3 = limit - cursor; - lab2: do { - // (, line 135 - // [, line 136 - ket = cursor; - // substring, line 136 - among_var = find_among_b(a_4, 3); - if (among_var == 0) - { - cursor = limit - v_3; - break lab2; - } - // ], line 136 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_3; - break lab2; - case 1: - // (, line 139 - // call R2, line 139 - if (!r_R2()) + // ], line 124 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_2; + break lab1; + case 1: + // (, line 127 + // call R2, line 127 + if (!r_R2()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 127 + slice_del(); + break; + } + } while (false); + break; + case 7: + // (, line 133 + // call R2, line 134 + if (!r_R2()) + { + return false; + } + // delete, line 134 + slice_del(); + // try, line 135 + v_3 = limit - cursor; + lab2: do { + // (, line 135 + // [, line 136 + ket = cursor; + // substring, line 136 + among_var = find_among_b(a_4, 3); + if (among_var == 0) { cursor = limit - v_3; break lab2; } - // delete, line 139 + // ], line 136 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_3; + break lab2; + case 1: + // (, line 139 + // call R2, line 139 + if (!r_R2()) + { + cursor = limit - v_3; + break lab2; + } + // delete, line 139 + slice_del(); + break; + } + } while (false); + break; + case 8: + // (, line 145 + // call R2, line 146 + if (!r_R2()) + { + return false; + } + // delete, line 146 + slice_del(); + // try, line 147 + v_4 = limit - cursor; + lab3: do { + // (, line 147 + // [, line 148 + ket = cursor; + // literal, line 148 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_4; + break lab3; + } + // ], line 148 + bra = cursor; + // call R2, line 148 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 148 slice_del(); - break; - } - } while (false); - break; - case 8: - // (, line 145 - // call R2, line 146 - if (!r_R2()) - { - return false; + } while (false); + break; + case 9: + // (, line 152 + // call RV, line 153 + if (!r_RV()) + { + return false; + } + // literal, line 153 + if (!(eq_s_b(1, "e"))) + { + return false; + } + // <-, line 154 + slice_from("ir"); + break; } - // delete, line 146 - slice_del(); - // try, line 147 - v_4 = limit - cursor; - lab3: do { - // (, line 147 - // [, line 148 - ket = cursor; - // literal, line 148 - if (!(eq_s_b(2, "at"))) - { - cursor = limit - v_4; - break lab3; - } - // ], line 148 - bra = cursor; - // call R2, line 148 - if (!r_R2()) - { - cursor = limit - v_4; - break lab3; - } - // delete, line 148 - slice_del(); - } while (false); - break; - case 9: - // (, line 152 - // call RV, line 153 - if (!r_RV()) + return true; + } + + private boolean r_verb_suffix() { + int among_var; + int v_1; + int v_2; + // setlimit, line 159 + v_1 = limit - cursor; + // tomark, line 159 + if (cursor < I_pV) { return false; } - // literal, line 153 - if (!(eq_s_b(1, "e"))) + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 159 + // [, line 160 + ket = cursor; + // substring, line 160 + among_var = find_among_b(a_6, 120); + if (among_var == 0) { + limit_backward = v_2; return false; } - // <-, line 154 - slice_from("ir"); - break; - } - return true; - } - - private boolean r_verb_suffix() { - int among_var; - int v_1; - int v_2; - // setlimit, line 159 - v_1 = limit - cursor; - // tomark, line 159 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 159 - // [, line 160 - ket = cursor; - // substring, line 160 - among_var = find_among_b(a_6, 120); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 160 - bra = cursor; - switch(among_var) { - case 0: + // ], line 160 + bra = cursor; + switch(among_var) { + case 0: + limit_backward = v_2; + return false; + case 1: + // (, line 179 + // delete, line 179 + slice_del(); + break; + } limit_backward = v_2; - return false; - case 1: - // (, line 179 - // delete, line 179 - slice_del(); - break; - } - limit_backward = v_2; - return true; - } + return true; + } - private boolean r_residual_suffix() { + private boolean r_residual_suffix() { int among_var; - // (, line 183 - // [, line 184 - ket = cursor; - // substring, line 184 - among_var = find_among_b(a_7, 7); - if (among_var == 0) - { - return false; - } - // ], line 184 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 187 - // call RV, line 187 - if (!r_RV()) + // (, line 183 + // [, line 184 + ket = cursor; + // substring, line 184 + among_var = find_among_b(a_7, 7); + if (among_var == 0) { return false; } - // delete, line 187 - slice_del(); - break; - } - return true; - } + // ], line 184 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 187 + // call RV, line 187 + if (!r_RV()) + { + return false; + } + // delete, line 187 + slice_del(); + break; + } + return true; + } - private boolean r_residual_form() { + private boolean r_residual_form() { int among_var; int v_1; int v_2; int v_3; - // (, line 191 - // [, line 192 - ket = cursor; - // substring, line 192 - among_var = find_among_b(a_8, 4); - if (among_var == 0) - { - return false; - } - // ], line 192 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 194 - // call RV, line 194 - if (!r_RV()) + // (, line 191 + // [, line 192 + ket = cursor; + // substring, line 192 + among_var = find_among_b(a_8, 4); + if (among_var == 0) { return false; } - // delete, line 194 - slice_del(); - // [, line 194 - ket = cursor; - // or, line 194 - lab0: do { - v_1 = limit - cursor; - lab1: do { + // ], line 192 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: // (, line 194 - // literal, line 194 - if (!(eq_s_b(1, "u"))) + // call RV, line 194 + if (!r_RV()) { - break lab1; + return false; } - // ], line 194 - bra = cursor; - // test, line 194 - v_2 = limit - cursor; - // literal, line 194 - if (!(eq_s_b(1, "g"))) + // delete, line 194 + slice_del(); + // [, line 194 + ket = cursor; + // or, line 194 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 194 + // literal, line 194 + if (!(eq_s_b(1, "u"))) + { + break lab1; + } + // ], line 194 + bra = cursor; + // test, line 194 + v_2 = limit - cursor; + // literal, line 194 + if (!(eq_s_b(1, "g"))) + { + break lab1; + } + cursor = limit - v_2; + break lab0; + } while (false); + cursor = limit - v_1; + // (, line 195 + // literal, line 195 + if (!(eq_s_b(1, "i"))) + { + return false; + } + // ], line 195 + bra = cursor; + // test, line 195 + v_3 = limit - cursor; + // literal, line 195 + if (!(eq_s_b(1, "c"))) + { + return false; + } + cursor = limit - v_3; + } while (false); + // call RV, line 195 + if (!r_RV()) { - break lab1; + return false; } - cursor = limit - v_2; - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 195 - // literal, line 195 - if (!(eq_s_b(1, "i"))) - { - return false; - } - // ], line 195 - bra = cursor; - // test, line 195 - v_3 = limit - cursor; - // literal, line 195 - if (!(eq_s_b(1, "c"))) - { - return false; - } - cursor = limit - v_3; - } while (false); - // call RV, line 195 - if (!r_RV()) - { - return false; + // delete, line 195 + slice_del(); + break; + case 2: + // (, line 196 + // <-, line 196 + slice_from("c"); + break; } - // delete, line 195 - slice_del(); - break; - case 2: - // (, line 196 - // <-, line 196 - slice_from("c"); - break; - } - return true; - } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -993,124 +1001,136 @@ int v_8; int v_9; int v_10; - // (, line 201 - // do, line 202 - v_1 = cursor; - lab0: do { - // call prelude, line 202 - if (!r_prelude()) - { - break lab0; - } - } while (false); - cursor = v_1; - // do, line 203 - v_2 = cursor; - lab1: do { - // call mark_regions, line 203 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 204 - limit_backward = cursor; cursor = limit; - // (, line 204 - // do, line 205 - v_3 = limit - cursor; - lab2: do { - // (, line 205 - // or, line 209 - lab3: do { - v_4 = limit - cursor; - lab4: do { - // (, line 206 - // and, line 207 - v_5 = limit - cursor; - // (, line 206 - // or, line 206 - lab5: do { - v_6 = limit - cursor; - lab6: do { - // call standard_suffix, line 206 - if (!r_standard_suffix()) - { - break lab6; - } - break lab5; + // (, line 201 + // do, line 202 + v_1 = cursor; + lab0: do { + // call prelude, line 202 + if (!r_prelude()) + { + break lab0; + } + } while (false); + cursor = v_1; + // do, line 203 + v_2 = cursor; + lab1: do { + // call mark_regions, line 203 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 204 + limit_backward = cursor; cursor = limit; + // (, line 204 + // do, line 205 + v_3 = limit - cursor; + lab2: do { + // (, line 205 + // or, line 209 + lab3: do { + v_4 = limit - cursor; + lab4: do { + // (, line 206 + // and, line 207 + v_5 = limit - cursor; + // (, line 206 + // or, line 206 + lab5: do { + v_6 = limit - cursor; + lab6: do { + // call standard_suffix, line 206 + if (!r_standard_suffix()) + { + break lab6; + } + break lab5; + } while (false); + cursor = limit - v_6; + // call verb_suffix, line 206 + if (!r_verb_suffix()) + { + break lab4; + } + } while (false); + cursor = limit - v_5; + // do, line 207 + v_7 = limit - cursor; + lab7: do { + // (, line 207 + // [, line 207 + ket = cursor; + // literal, line 207 + if (!(eq_s_b(1, "i"))) + { + break lab7; + } + // ], line 207 + bra = cursor; + // test, line 207 + v_8 = limit - cursor; + // literal, line 207 + if (!(eq_s_b(1, "c"))) + { + break lab7; + } + cursor = limit - v_8; + // call RV, line 207 + if (!r_RV()) + { + break lab7; + } + // delete, line 207 + slice_del(); + } while (false); + cursor = limit - v_7; + break lab3; } while (false); - cursor = limit - v_6; - // call verb_suffix, line 206 - if (!r_verb_suffix()) + cursor = limit - v_4; + // call residual_suffix, line 209 + if (!r_residual_suffix()) { - break lab4; + break lab2; } } while (false); - cursor = limit - v_5; - // do, line 207 - v_7 = limit - cursor; - lab7: do { - // (, line 207 - // [, line 207 - ket = cursor; - // literal, line 207 - if (!(eq_s_b(1, "i"))) - { - break lab7; - } - // ], line 207 - bra = cursor; - // test, line 207 - v_8 = limit - cursor; - // literal, line 207 - if (!(eq_s_b(1, "c"))) - { - break lab7; - } - cursor = limit - v_8; - // call RV, line 207 - if (!r_RV()) - { - break lab7; - } - // delete, line 207 - slice_del(); - } while (false); - cursor = limit - v_7; - break lab3; } while (false); - cursor = limit - v_4; - // call residual_suffix, line 209 - if (!r_residual_suffix()) - { - break lab2; - } - } while (false); - } while (false); - cursor = limit - v_3; - // do, line 211 - v_9 = limit - cursor; - lab8: do { - // call residual_form, line 211 - if (!r_residual_form()) - { - break lab8; + cursor = limit - v_3; + // do, line 211 + v_9 = limit - cursor; + lab8: do { + // call residual_form, line 211 + if (!r_residual_form()) + { + break lab8; + } + } while (false); + cursor = limit - v_9; + cursor = limit_backward; // do, line 213 + v_10 = cursor; + lab9: do { + // call postlude, line 213 + if (!r_postlude()) + { + break lab9; + } + } while (false); + cursor = v_10; + return true; } - } while (false); - cursor = limit - v_9; - cursor = limit_backward; // do, line 213 - v_10 = cursor; - lab9: do { - // call postlude, line 213 - if (!r_postlude()) - { - break lab9; - } - } while (false); - cursor = v_10; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof PortugueseStemmer; } + @Override + public int hashCode() { + return PortugueseStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/RomanianStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/RomanianStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/RomanianStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/RomanianStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,926 +1,934 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class RomanianStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 3, "", this), - new Among ( "I", 0, 1, "", this), - new Among ( "U", 0, 2, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "ea", -1, 3, "", this), - new Among ( "a\u0163ia", -1, 7, "", this), - new Among ( "aua", -1, 2, "", this), - new Among ( "iua", -1, 4, "", this), - new Among ( "a\u0163ie", -1, 7, "", this), - new Among ( "ele", -1, 3, "", this), - new Among ( "ile", -1, 5, "", this), - new Among ( "iile", 6, 4, "", this), - new Among ( "iei", -1, 4, "", this), - new Among ( "atei", -1, 6, "", this), - new Among ( "ii", -1, 4, "", this), - new Among ( "ului", -1, 1, "", this), - new Among ( "ul", -1, 1, "", this), - new Among ( "elor", -1, 3, "", this), - new Among ( "ilor", -1, 4, "", this), - new Among ( "iilor", 14, 4, "", this) - }; + private final static RomanianStemmer methodObject = new RomanianStemmer (); - private Among a_2[] = { - new Among ( "icala", -1, 4, "", this), - new Among ( "iciva", -1, 4, "", this), - new Among ( "ativa", -1, 5, "", this), - new Among ( "itiva", -1, 6, "", this), - new Among ( "icale", -1, 4, "", this), - new Among ( "a\u0163iune", -1, 5, "", this), - new Among ( "i\u0163iune", -1, 6, "", this), - new Among ( "atoare", -1, 5, "", this), - new Among ( "itoare", -1, 6, "", this), - new Among ( "\u0103toare", -1, 5, "", this), - new Among ( "icitate", -1, 4, "", this), - new Among ( "abilitate", -1, 1, "", this), - new Among ( "ibilitate", -1, 2, "", this), - new Among ( "ivitate", -1, 3, "", this), - new Among ( "icive", -1, 4, "", this), - new Among ( "ative", -1, 5, "", this), - new Among ( "itive", -1, 6, "", this), - new Among ( "icali", -1, 4, "", this), - new Among ( "atori", -1, 5, "", this), - new Among ( "icatori", 18, 4, "", this), - new Among ( "itori", -1, 6, "", this), - new Among ( "\u0103tori", -1, 5, "", this), - new Among ( "icitati", -1, 4, "", this), - new Among ( "abilitati", -1, 1, "", this), - new Among ( "ivitati", -1, 3, "", this), - new Among ( "icivi", -1, 4, "", this), - new Among ( "ativi", -1, 5, "", this), - new Among ( "itivi", -1, 6, "", this), - new Among ( "icit\u0103i", -1, 4, "", this), - new Among ( "abilit\u0103i", -1, 1, "", this), - new Among ( "ivit\u0103i", -1, 3, "", this), - new Among ( "icit\u0103\u0163i", -1, 4, "", this), - new Among ( "abilit\u0103\u0163i", -1, 1, "", this), - new Among ( "ivit\u0103\u0163i", -1, 3, "", this), - new Among ( "ical", -1, 4, "", this), - new Among ( "ator", -1, 5, "", this), - new Among ( "icator", 35, 4, "", this), - new Among ( "itor", -1, 6, "", this), - new Among ( "\u0103tor", -1, 5, "", this), - new Among ( "iciv", -1, 4, "", this), - new Among ( "ativ", -1, 5, "", this), - new Among ( "itiv", -1, 6, "", this), - new Among ( "ical\u0103", -1, 4, "", this), - new Among ( "iciv\u0103", -1, 4, "", this), - new Among ( "ativ\u0103", -1, 5, "", this), - new Among ( "itiv\u0103", -1, 6, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 3, "", methodObject ), + new Among ( "I", 0, 1, "", methodObject ), + new Among ( "U", 0, 2, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ica", -1, 1, "", this), - new Among ( "abila", -1, 1, "", this), - new Among ( "ibila", -1, 1, "", this), - new Among ( "oasa", -1, 1, "", this), - new Among ( "ata", -1, 1, "", this), - new Among ( "ita", -1, 1, "", this), - new Among ( "anta", -1, 1, "", this), - new Among ( "ista", -1, 3, "", this), - new Among ( "uta", -1, 1, "", this), - new Among ( "iva", -1, 1, "", this), - new Among ( "ic", -1, 1, "", this), - new Among ( "ice", -1, 1, "", this), - new Among ( "abile", -1, 1, "", this), - new Among ( "ibile", -1, 1, "", this), - new Among ( "isme", -1, 3, "", this), - new Among ( "iune", -1, 2, "", this), - new Among ( "oase", -1, 1, "", this), - new Among ( "ate", -1, 1, "", this), - new Among ( "itate", 17, 1, "", this), - new Among ( "ite", -1, 1, "", this), - new Among ( "ante", -1, 1, "", this), - new Among ( "iste", -1, 3, "", this), - new Among ( "ute", -1, 1, "", this), - new Among ( "ive", -1, 1, "", this), - new Among ( "ici", -1, 1, "", this), - new Among ( "abili", -1, 1, "", this), - new Among ( "ibili", -1, 1, "", this), - new Among ( "iuni", -1, 2, "", this), - new Among ( "atori", -1, 1, "", this), - new Among ( "osi", -1, 1, "", this), - new Among ( "ati", -1, 1, "", this), - new Among ( "itati", 30, 1, "", this), - new Among ( "iti", -1, 1, "", this), - new Among ( "anti", -1, 1, "", this), - new Among ( "isti", -1, 3, "", this), - new Among ( "uti", -1, 1, "", this), - new Among ( "i\u015Fti", -1, 3, "", this), - new Among ( "ivi", -1, 1, "", this), - new Among ( "it\u0103i", -1, 1, "", this), - new Among ( "o\u015Fi", -1, 1, "", this), - new Among ( "it\u0103\u0163i", -1, 1, "", this), - new Among ( "abil", -1, 1, "", this), - new Among ( "ibil", -1, 1, "", this), - new Among ( "ism", -1, 3, "", this), - new Among ( "ator", -1, 1, "", this), - new Among ( "os", -1, 1, "", this), - new Among ( "at", -1, 1, "", this), - new Among ( "it", -1, 1, "", this), - new Among ( "ant", -1, 1, "", this), - new Among ( "ist", -1, 3, "", this), - new Among ( "ut", -1, 1, "", this), - new Among ( "iv", -1, 1, "", this), - new Among ( "ic\u0103", -1, 1, "", this), - new Among ( "abil\u0103", -1, 1, "", this), - new Among ( "ibil\u0103", -1, 1, "", this), - new Among ( "oas\u0103", -1, 1, "", this), - new Among ( "at\u0103", -1, 1, "", this), - new Among ( "it\u0103", -1, 1, "", this), - new Among ( "ant\u0103", -1, 1, "", this), - new Among ( "ist\u0103", -1, 3, "", this), - new Among ( "ut\u0103", -1, 1, "", this), - new Among ( "iv\u0103", -1, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "ea", -1, 3, "", methodObject ), + new Among ( "a\u0163ia", -1, 7, "", methodObject ), + new Among ( "aua", -1, 2, "", methodObject ), + new Among ( "iua", -1, 4, "", methodObject ), + new Among ( "a\u0163ie", -1, 7, "", methodObject ), + new Among ( "ele", -1, 3, "", methodObject ), + new Among ( "ile", -1, 5, "", methodObject ), + new Among ( "iile", 6, 4, "", methodObject ), + new Among ( "iei", -1, 4, "", methodObject ), + new Among ( "atei", -1, 6, "", methodObject ), + new Among ( "ii", -1, 4, "", methodObject ), + new Among ( "ului", -1, 1, "", methodObject ), + new Among ( "ul", -1, 1, "", methodObject ), + new Among ( "elor", -1, 3, "", methodObject ), + new Among ( "ilor", -1, 4, "", methodObject ), + new Among ( "iilor", 14, 4, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "ea", -1, 1, "", this), - new Among ( "ia", -1, 1, "", this), - new Among ( "esc", -1, 1, "", this), - new Among ( "\u0103sc", -1, 1, "", this), - new Among ( "ind", -1, 1, "", this), - new Among ( "\u00E2nd", -1, 1, "", this), - new Among ( "are", -1, 1, "", this), - new Among ( "ere", -1, 1, "", this), - new Among ( "ire", -1, 1, "", this), - new Among ( "\u00E2re", -1, 1, "", this), - new Among ( "se", -1, 2, "", this), - new Among ( "ase", 10, 1, "", this), - new Among ( "sese", 10, 2, "", this), - new Among ( "ise", 10, 1, "", this), - new Among ( "use", 10, 1, "", this), - new Among ( "\u00E2se", 10, 1, "", this), - new Among ( "e\u015Fte", -1, 1, "", this), - new Among ( "\u0103\u015Fte", -1, 1, "", this), - new Among ( "eze", -1, 1, "", this), - new Among ( "ai", -1, 1, "", this), - new Among ( "eai", 19, 1, "", this), - new Among ( "iai", 19, 1, "", this), - new Among ( "sei", -1, 2, "", this), - new Among ( "e\u015Fti", -1, 1, "", this), - new Among ( "\u0103\u015Fti", -1, 1, "", this), - new Among ( "ui", -1, 1, "", this), - new Among ( "ezi", -1, 1, "", this), - new Among ( "\u00E2i", -1, 1, "", this), - new Among ( "a\u015Fi", -1, 1, "", this), - new Among ( "se\u015Fi", -1, 2, "", this), - new Among ( "ase\u015Fi", 29, 1, "", this), - new Among ( "sese\u015Fi", 29, 2, "", this), - new Among ( "ise\u015Fi", 29, 1, "", this), - new Among ( "use\u015Fi", 29, 1, "", this), - new Among ( "\u00E2se\u015Fi", 29, 1, "", this), - new Among ( "i\u015Fi", -1, 1, "", this), - new Among ( "u\u015Fi", -1, 1, "", this), - new Among ( "\u00E2\u015Fi", -1, 1, "", this), - new Among ( "a\u0163i", -1, 2, "", this), - new Among ( "ea\u0163i", 38, 1, "", this), - new Among ( "ia\u0163i", 38, 1, "", this), - new Among ( "e\u0163i", -1, 2, "", this), - new Among ( "i\u0163i", -1, 2, "", this), - new Among ( "\u00E2\u0163i", -1, 2, "", this), - new Among ( "ar\u0103\u0163i", -1, 1, "", this), - new Among ( "ser\u0103\u0163i", -1, 2, "", this), - new Among ( "aser\u0103\u0163i", 45, 1, "", this), - new Among ( "seser\u0103\u0163i", 45, 2, "", this), - new Among ( "iser\u0103\u0163i", 45, 1, "", this), - new Among ( "user\u0103\u0163i", 45, 1, "", this), - new Among ( "\u00E2ser\u0103\u0163i", 45, 1, "", this), - new Among ( "ir\u0103\u0163i", -1, 1, "", this), - new Among ( "ur\u0103\u0163i", -1, 1, "", this), - new Among ( "\u00E2r\u0103\u0163i", -1, 1, "", this), - new Among ( "am", -1, 1, "", this), - new Among ( "eam", 54, 1, "", this), - new Among ( "iam", 54, 1, "", this), - new Among ( "em", -1, 2, "", this), - new Among ( "asem", 57, 1, "", this), - new Among ( "sesem", 57, 2, "", this), - new Among ( "isem", 57, 1, "", this), - new Among ( "usem", 57, 1, "", this), - new Among ( "\u00E2sem", 57, 1, "", this), - new Among ( "im", -1, 2, "", this), - new Among ( "\u00E2m", -1, 2, "", this), - new Among ( "\u0103m", -1, 2, "", this), - new Among ( "ar\u0103m", 65, 1, "", this), - new Among ( "ser\u0103m", 65, 2, "", this), - new Among ( "aser\u0103m", 67, 1, "", this), - new Among ( "seser\u0103m", 67, 2, "", this), - new Among ( "iser\u0103m", 67, 1, "", this), - new Among ( "user\u0103m", 67, 1, "", this), - new Among ( "\u00E2ser\u0103m", 67, 1, "", this), - new Among ( "ir\u0103m", 65, 1, "", this), - new Among ( "ur\u0103m", 65, 1, "", this), - new Among ( "\u00E2r\u0103m", 65, 1, "", this), - new Among ( "au", -1, 1, "", this), - new Among ( "eau", 76, 1, "", this), - new Among ( "iau", 76, 1, "", this), - new Among ( "indu", -1, 1, "", this), - new Among ( "\u00E2ndu", -1, 1, "", this), - new Among ( "ez", -1, 1, "", this), - new Among ( "easc\u0103", -1, 1, "", this), - new Among ( "ar\u0103", -1, 1, "", this), - new Among ( "ser\u0103", -1, 2, "", this), - new Among ( "aser\u0103", 84, 1, "", this), - new Among ( "seser\u0103", 84, 2, "", this), - new Among ( "iser\u0103", 84, 1, "", this), - new Among ( "user\u0103", 84, 1, "", this), - new Among ( "\u00E2ser\u0103", 84, 1, "", this), - new Among ( "ir\u0103", -1, 1, "", this), - new Among ( "ur\u0103", -1, 1, "", this), - new Among ( "\u00E2r\u0103", -1, 1, "", this), - new Among ( "eaz\u0103", -1, 1, "", this) - }; + private final static Among a_2[] = { + new Among ( "icala", -1, 4, "", methodObject ), + new Among ( "iciva", -1, 4, "", methodObject ), + new Among ( "ativa", -1, 5, "", methodObject ), + new Among ( "itiva", -1, 6, "", methodObject ), + new Among ( "icale", -1, 4, "", methodObject ), + new Among ( "a\u0163iune", -1, 5, "", methodObject ), + new Among ( "i\u0163iune", -1, 6, "", methodObject ), + new Among ( "atoare", -1, 5, "", methodObject ), + new Among ( "itoare", -1, 6, "", methodObject ), + new Among ( "\u0103toare", -1, 5, "", methodObject ), + new Among ( "icitate", -1, 4, "", methodObject ), + new Among ( "abilitate", -1, 1, "", methodObject ), + new Among ( "ibilitate", -1, 2, "", methodObject ), + new Among ( "ivitate", -1, 3, "", methodObject ), + new Among ( "icive", -1, 4, "", methodObject ), + new Among ( "ative", -1, 5, "", methodObject ), + new Among ( "itive", -1, 6, "", methodObject ), + new Among ( "icali", -1, 4, "", methodObject ), + new Among ( "atori", -1, 5, "", methodObject ), + new Among ( "icatori", 18, 4, "", methodObject ), + new Among ( "itori", -1, 6, "", methodObject ), + new Among ( "\u0103tori", -1, 5, "", methodObject ), + new Among ( "icitati", -1, 4, "", methodObject ), + new Among ( "abilitati", -1, 1, "", methodObject ), + new Among ( "ivitati", -1, 3, "", methodObject ), + new Among ( "icivi", -1, 4, "", methodObject ), + new Among ( "ativi", -1, 5, "", methodObject ), + new Among ( "itivi", -1, 6, "", methodObject ), + new Among ( "icit\u0103i", -1, 4, "", methodObject ), + new Among ( "abilit\u0103i", -1, 1, "", methodObject ), + new Among ( "ivit\u0103i", -1, 3, "", methodObject ), + new Among ( "icit\u0103\u0163i", -1, 4, "", methodObject ), + new Among ( "abilit\u0103\u0163i", -1, 1, "", methodObject ), + new Among ( "ivit\u0103\u0163i", -1, 3, "", methodObject ), + new Among ( "ical", -1, 4, "", methodObject ), + new Among ( "ator", -1, 5, "", methodObject ), + new Among ( "icator", 35, 4, "", methodObject ), + new Among ( "itor", -1, 6, "", methodObject ), + new Among ( "\u0103tor", -1, 5, "", methodObject ), + new Among ( "iciv", -1, 4, "", methodObject ), + new Among ( "ativ", -1, 5, "", methodObject ), + new Among ( "itiv", -1, 6, "", methodObject ), + new Among ( "ical\u0103", -1, 4, "", methodObject ), + new Among ( "iciv\u0103", -1, 4, "", methodObject ), + new Among ( "ativ\u0103", -1, 5, "", methodObject ), + new Among ( "itiv\u0103", -1, 6, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "a", -1, 1, "", this), - new Among ( "e", -1, 1, "", this), - new Among ( "ie", 1, 1, "", this), - new Among ( "i", -1, 1, "", this), - new Among ( "\u0103", -1, 1, "", this) - }; + private final static Among a_3[] = { + new Among ( "ica", -1, 1, "", methodObject ), + new Among ( "abila", -1, 1, "", methodObject ), + new Among ( "ibila", -1, 1, "", methodObject ), + new Among ( "oasa", -1, 1, "", methodObject ), + new Among ( "ata", -1, 1, "", methodObject ), + new Among ( "ita", -1, 1, "", methodObject ), + new Among ( "anta", -1, 1, "", methodObject ), + new Among ( "ista", -1, 3, "", methodObject ), + new Among ( "uta", -1, 1, "", methodObject ), + new Among ( "iva", -1, 1, "", methodObject ), + new Among ( "ic", -1, 1, "", methodObject ), + new Among ( "ice", -1, 1, "", methodObject ), + new Among ( "abile", -1, 1, "", methodObject ), + new Among ( "ibile", -1, 1, "", methodObject ), + new Among ( "isme", -1, 3, "", methodObject ), + new Among ( "iune", -1, 2, "", methodObject ), + new Among ( "oase", -1, 1, "", methodObject ), + new Among ( "ate", -1, 1, "", methodObject ), + new Among ( "itate", 17, 1, "", methodObject ), + new Among ( "ite", -1, 1, "", methodObject ), + new Among ( "ante", -1, 1, "", methodObject ), + new Among ( "iste", -1, 3, "", methodObject ), + new Among ( "ute", -1, 1, "", methodObject ), + new Among ( "ive", -1, 1, "", methodObject ), + new Among ( "ici", -1, 1, "", methodObject ), + new Among ( "abili", -1, 1, "", methodObject ), + new Among ( "ibili", -1, 1, "", methodObject ), + new Among ( "iuni", -1, 2, "", methodObject ), + new Among ( "atori", -1, 1, "", methodObject ), + new Among ( "osi", -1, 1, "", methodObject ), + new Among ( "ati", -1, 1, "", methodObject ), + new Among ( "itati", 30, 1, "", methodObject ), + new Among ( "iti", -1, 1, "", methodObject ), + new Among ( "anti", -1, 1, "", methodObject ), + new Among ( "isti", -1, 3, "", methodObject ), + new Among ( "uti", -1, 1, "", methodObject ), + new Among ( "i\u015Fti", -1, 3, "", methodObject ), + new Among ( "ivi", -1, 1, "", methodObject ), + new Among ( "it\u0103i", -1, 1, "", methodObject ), + new Among ( "o\u015Fi", -1, 1, "", methodObject ), + new Among ( "it\u0103\u0163i", -1, 1, "", methodObject ), + new Among ( "abil", -1, 1, "", methodObject ), + new Among ( "ibil", -1, 1, "", methodObject ), + new Among ( "ism", -1, 3, "", methodObject ), + new Among ( "ator", -1, 1, "", methodObject ), + new Among ( "os", -1, 1, "", methodObject ), + new Among ( "at", -1, 1, "", methodObject ), + new Among ( "it", -1, 1, "", methodObject ), + new Among ( "ant", -1, 1, "", methodObject ), + new Among ( "ist", -1, 3, "", methodObject ), + new Among ( "ut", -1, 1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ), + new Among ( "ic\u0103", -1, 1, "", methodObject ), + new Among ( "abil\u0103", -1, 1, "", methodObject ), + new Among ( "ibil\u0103", -1, 1, "", methodObject ), + new Among ( "oas\u0103", -1, 1, "", methodObject ), + new Among ( "at\u0103", -1, 1, "", methodObject ), + new Among ( "it\u0103", -1, 1, "", methodObject ), + new Among ( "ant\u0103", -1, 1, "", methodObject ), + new Among ( "ist\u0103", -1, 3, "", methodObject ), + new Among ( "ut\u0103", -1, 1, "", methodObject ), + new Among ( "iv\u0103", -1, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4 }; + private final static Among a_4[] = { + new Among ( "ea", -1, 1, "", methodObject ), + new Among ( "ia", -1, 1, "", methodObject ), + new Among ( "esc", -1, 1, "", methodObject ), + new Among ( "\u0103sc", -1, 1, "", methodObject ), + new Among ( "ind", -1, 1, "", methodObject ), + new Among ( "\u00E2nd", -1, 1, "", methodObject ), + new Among ( "are", -1, 1, "", methodObject ), + new Among ( "ere", -1, 1, "", methodObject ), + new Among ( "ire", -1, 1, "", methodObject ), + new Among ( "\u00E2re", -1, 1, "", methodObject ), + new Among ( "se", -1, 2, "", methodObject ), + new Among ( "ase", 10, 1, "", methodObject ), + new Among ( "sese", 10, 2, "", methodObject ), + new Among ( "ise", 10, 1, "", methodObject ), + new Among ( "use", 10, 1, "", methodObject ), + new Among ( "\u00E2se", 10, 1, "", methodObject ), + new Among ( "e\u015Fte", -1, 1, "", methodObject ), + new Among ( "\u0103\u015Fte", -1, 1, "", methodObject ), + new Among ( "eze", -1, 1, "", methodObject ), + new Among ( "ai", -1, 1, "", methodObject ), + new Among ( "eai", 19, 1, "", methodObject ), + new Among ( "iai", 19, 1, "", methodObject ), + new Among ( "sei", -1, 2, "", methodObject ), + new Among ( "e\u015Fti", -1, 1, "", methodObject ), + new Among ( "\u0103\u015Fti", -1, 1, "", methodObject ), + new Among ( "ui", -1, 1, "", methodObject ), + new Among ( "ezi", -1, 1, "", methodObject ), + new Among ( "\u00E2i", -1, 1, "", methodObject ), + new Among ( "a\u015Fi", -1, 1, "", methodObject ), + new Among ( "se\u015Fi", -1, 2, "", methodObject ), + new Among ( "ase\u015Fi", 29, 1, "", methodObject ), + new Among ( "sese\u015Fi", 29, 2, "", methodObject ), + new Among ( "ise\u015Fi", 29, 1, "", methodObject ), + new Among ( "use\u015Fi", 29, 1, "", methodObject ), + new Among ( "\u00E2se\u015Fi", 29, 1, "", methodObject ), + new Among ( "i\u015Fi", -1, 1, "", methodObject ), + new Among ( "u\u015Fi", -1, 1, "", methodObject ), + new Among ( "\u00E2\u015Fi", -1, 1, "", methodObject ), + new Among ( "a\u0163i", -1, 2, "", methodObject ), + new Among ( "ea\u0163i", 38, 1, "", methodObject ), + new Among ( "ia\u0163i", 38, 1, "", methodObject ), + new Among ( "e\u0163i", -1, 2, "", methodObject ), + new Among ( "i\u0163i", -1, 2, "", methodObject ), + new Among ( "\u00E2\u0163i", -1, 2, "", methodObject ), + new Among ( "ar\u0103\u0163i", -1, 1, "", methodObject ), + new Among ( "ser\u0103\u0163i", -1, 2, "", methodObject ), + new Among ( "aser\u0103\u0163i", 45, 1, "", methodObject ), + new Among ( "seser\u0103\u0163i", 45, 2, "", methodObject ), + new Among ( "iser\u0103\u0163i", 45, 1, "", methodObject ), + new Among ( "user\u0103\u0163i", 45, 1, "", methodObject ), + new Among ( "\u00E2ser\u0103\u0163i", 45, 1, "", methodObject ), + new Among ( "ir\u0103\u0163i", -1, 1, "", methodObject ), + new Among ( "ur\u0103\u0163i", -1, 1, "", methodObject ), + new Among ( "\u00E2r\u0103\u0163i", -1, 1, "", methodObject ), + new Among ( "am", -1, 1, "", methodObject ), + new Among ( "eam", 54, 1, "", methodObject ), + new Among ( "iam", 54, 1, "", methodObject ), + new Among ( "em", -1, 2, "", methodObject ), + new Among ( "asem", 57, 1, "", methodObject ), + new Among ( "sesem", 57, 2, "", methodObject ), + new Among ( "isem", 57, 1, "", methodObject ), + new Among ( "usem", 57, 1, "", methodObject ), + new Among ( "\u00E2sem", 57, 1, "", methodObject ), + new Among ( "im", -1, 2, "", methodObject ), + new Among ( "\u00E2m", -1, 2, "", methodObject ), + new Among ( "\u0103m", -1, 2, "", methodObject ), + new Among ( "ar\u0103m", 65, 1, "", methodObject ), + new Among ( "ser\u0103m", 65, 2, "", methodObject ), + new Among ( "aser\u0103m", 67, 1, "", methodObject ), + new Among ( "seser\u0103m", 67, 2, "", methodObject ), + new Among ( "iser\u0103m", 67, 1, "", methodObject ), + new Among ( "user\u0103m", 67, 1, "", methodObject ), + new Among ( "\u00E2ser\u0103m", 67, 1, "", methodObject ), + new Among ( "ir\u0103m", 65, 1, "", methodObject ), + new Among ( "ur\u0103m", 65, 1, "", methodObject ), + new Among ( "\u00E2r\u0103m", 65, 1, "", methodObject ), + new Among ( "au", -1, 1, "", methodObject ), + new Among ( "eau", 76, 1, "", methodObject ), + new Among ( "iau", 76, 1, "", methodObject ), + new Among ( "indu", -1, 1, "", methodObject ), + new Among ( "\u00E2ndu", -1, 1, "", methodObject ), + new Among ( "ez", -1, 1, "", methodObject ), + new Among ( "easc\u0103", -1, 1, "", methodObject ), + new Among ( "ar\u0103", -1, 1, "", methodObject ), + new Among ( "ser\u0103", -1, 2, "", methodObject ), + new Among ( "aser\u0103", 84, 1, "", methodObject ), + new Among ( "seser\u0103", 84, 2, "", methodObject ), + new Among ( "iser\u0103", 84, 1, "", methodObject ), + new Among ( "user\u0103", 84, 1, "", methodObject ), + new Among ( "\u00E2ser\u0103", 84, 1, "", methodObject ), + new Among ( "ir\u0103", -1, 1, "", methodObject ), + new Among ( "ur\u0103", -1, 1, "", methodObject ), + new Among ( "\u00E2r\u0103", -1, 1, "", methodObject ), + new Among ( "eaz\u0103", -1, 1, "", methodObject ) + }; + private final static Among a_5[] = { + new Among ( "a", -1, 1, "", methodObject ), + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "ie", 1, 1, "", methodObject ), + new Among ( "i", -1, 1, "", methodObject ), + new Among ( "\u0103", -1, 1, "", methodObject ) + }; + + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 32, 0, 0, 4 }; + private boolean B_standard_suffix_removed; private int I_p2; private int I_p1; private int I_pV; - private void copy_from(RomanianStemmer other) { - B_standard_suffix_removed = other.B_standard_suffix_removed; - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_pV = other.I_pV; - super.copy_from(other); - } + private void copy_from(RomanianStemmer other) { + B_standard_suffix_removed = other.B_standard_suffix_removed; + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } - private boolean r_prelude() { + private boolean r_prelude() { int v_1; int v_2; int v_3; - // (, line 31 - // repeat, line 32 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // goto, line 32 - golab2: while(true) + // (, line 31 + // repeat, line 32 + replab0: while(true) { - v_2 = cursor; - lab3: do { - // (, line 32 - if (!(in_grouping(g_v, 97, 259))) + v_1 = cursor; + lab1: do { + // goto, line 32 + golab2: while(true) { - break lab3; - } - // [, line 33 - bra = cursor; - // or, line 33 - lab4: do { - v_3 = cursor; - lab5: do { - // (, line 33 - // literal, line 33 - if (!(eq_s(1, "u"))) - { - break lab5; - } - // ], line 33 - ket = cursor; + v_2 = cursor; + lab3: do { + // (, line 32 if (!(in_grouping(g_v, 97, 259))) { - break lab5; + break lab3; } - // <-, line 33 - slice_from("U"); - break lab4; + // [, line 33 + bra = cursor; + // or, line 33 + lab4: do { + v_3 = cursor; + lab5: do { + // (, line 33 + // literal, line 33 + if (!(eq_s(1, "u"))) + { + break lab5; + } + // ], line 33 + ket = cursor; + if (!(in_grouping(g_v, 97, 259))) + { + break lab5; + } + // <-, line 33 + slice_from("U"); + break lab4; + } while (false); + cursor = v_3; + // (, line 34 + // literal, line 34 + if (!(eq_s(1, "i"))) + { + break lab3; + } + // ], line 34 + ket = cursor; + if (!(in_grouping(g_v, 97, 259))) + { + break lab3; + } + // <-, line 34 + slice_from("I"); + } while (false); + cursor = v_2; + break golab2; } while (false); - cursor = v_3; - // (, line 34 - // literal, line 34 - if (!(eq_s(1, "i"))) + cursor = v_2; + if (cursor >= limit) { - break lab3; + break lab1; } - // ], line 34 - ket = cursor; - if (!(in_grouping(g_v, 97, 259))) - { - break lab3; - } - // <-, line 34 - slice_from("I"); - } while (false); - cursor = v_2; - break golab2; + cursor++; + } + continue replab0; } while (false); - cursor = v_2; - if (cursor >= limit) - { - break lab1; - } - cursor++; + cursor = v_1; + break replab0; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; int v_3; int v_6; int v_8; - // (, line 38 - I_pV = limit; - I_p1 = limit; - I_p2 = limit; - // do, line 44 - v_1 = cursor; - lab0: do { - // (, line 44 - // or, line 46 - lab1: do { - v_2 = cursor; - lab2: do { - // (, line 45 - if (!(in_grouping(g_v, 97, 259))) - { - break lab2; - } - // or, line 45 - lab3: do { - v_3 = cursor; - lab4: do { + // (, line 38 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 44 + v_1 = cursor; + lab0: do { + // (, line 44 + // or, line 46 + lab1: do { + v_2 = cursor; + lab2: do { // (, line 45 - if (!(out_grouping(g_v, 97, 259))) + if (!(in_grouping(g_v, 97, 259))) { - break lab4; + break lab2; } - // gopast, line 45 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 259))) + // or, line 45 + lab3: do { + v_3 = cursor; + lab4: do { + // (, line 45 + if (!(out_grouping(g_v, 97, 259))) { - break lab6; + break lab4; } - break golab5; + // gopast, line 45 + golab5: while(true) + { + lab6: do { + if (!(in_grouping(g_v, 97, 259))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab4; + } + cursor++; + } + break lab3; } while (false); - if (cursor >= limit) + cursor = v_3; + // (, line 45 + if (!(in_grouping(g_v, 97, 259))) { - break lab4; + break lab2; } - cursor++; - } - break lab3; + // gopast, line 45 + golab7: while(true) + { + lab8: do { + if (!(out_grouping(g_v, 97, 259))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab2; + } + cursor++; + } + } while (false); + break lab1; } while (false); - cursor = v_3; - // (, line 45 - if (!(in_grouping(g_v, 97, 259))) + cursor = v_2; + // (, line 47 + if (!(out_grouping(g_v, 97, 259))) { - break lab2; + break lab0; } - // gopast, line 45 - golab7: while(true) - { - lab8: do { + // or, line 47 + lab9: do { + v_6 = cursor; + lab10: do { + // (, line 47 if (!(out_grouping(g_v, 97, 259))) { - break lab8; + break lab10; } - break golab7; + // gopast, line 47 + golab11: while(true) + { + lab12: do { + if (!(in_grouping(g_v, 97, 259))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab10; + } + cursor++; + } + break lab9; } while (false); + cursor = v_6; + // (, line 47 + if (!(in_grouping(g_v, 97, 259))) + { + break lab0; + } + // next, line 47 if (cursor >= limit) { - break lab2; + break lab0; } cursor++; - } + } while (false); } while (false); - break lab1; + // setmark pV, line 48 + I_pV = cursor; } while (false); - cursor = v_2; - // (, line 47 - if (!(out_grouping(g_v, 97, 259))) - { - break lab0; - } - // or, line 47 - lab9: do { - v_6 = cursor; - lab10: do { - // (, line 47 - if (!(out_grouping(g_v, 97, 259))) + cursor = v_1; + // do, line 50 + v_8 = cursor; + lab13: do { + // (, line 50 + // gopast, line 51 + golab14: while(true) + { + lab15: do { + if (!(in_grouping(g_v, 97, 259))) + { + break lab15; + } + break golab14; + } while (false); + if (cursor >= limit) { - break lab10; + break lab13; } - // gopast, line 47 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 259))) - { - break lab12; - } - break golab11; - } while (false); - if (cursor >= limit) + cursor++; + } + // gopast, line 51 + golab16: while(true) + { + lab17: do { + if (!(out_grouping(g_v, 97, 259))) { - break lab10; + break lab17; } - cursor++; + break golab16; + } while (false); + if (cursor >= limit) + { + break lab13; } - break lab9; - } while (false); - cursor = v_6; - // (, line 47 - if (!(in_grouping(g_v, 97, 259))) - { - break lab0; + cursor++; } - // next, line 47 - if (cursor >= limit) + // setmark p1, line 51 + I_p1 = cursor; + // gopast, line 52 + golab18: while(true) { - break lab0; + lab19: do { + if (!(in_grouping(g_v, 97, 259))) + { + break lab19; + } + break golab18; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - cursor++; - } while (false); - } while (false); - // setmark pV, line 48 - I_pV = cursor; - } while (false); - cursor = v_1; - // do, line 50 - v_8 = cursor; - lab13: do { - // (, line 50 - // gopast, line 51 - golab14: while(true) - { - lab15: do { - if (!(in_grouping(g_v, 97, 259))) + // gopast, line 52 + golab20: while(true) { - break lab15; + lab21: do { + if (!(out_grouping(g_v, 97, 259))) + { + break lab21; + } + break golab20; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - break golab14; + // setmark p2, line 52 + I_p2 = cursor; } while (false); - if (cursor >= limit) + cursor = v_8; + return true; + } + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 56 + replab0: while(true) { - break lab13; + v_1 = cursor; + lab1: do { + // (, line 56 + // [, line 58 + bra = cursor; + // substring, line 58 + among_var = find_among(a_0, 3); + if (among_var == 0) + { + break lab1; + } + // ], line 58 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 59 + // <-, line 59 + slice_from("i"); + break; + case 2: + // (, line 60 + // <-, line 60 + slice_from("u"); + break; + case 3: + // (, line 61 + // next, line 61 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - cursor++; + return true; } - // gopast, line 51 - golab16: while(true) - { - lab17: do { - if (!(out_grouping(g_v, 97, 259))) - { - break lab17; - } - break golab16; - } while (false); - if (cursor >= limit) + + private boolean r_RV() { + if (!(I_pV <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p1, line 51 - I_p1 = cursor; - // gopast, line 52 - golab18: while(true) - { - lab19: do { - if (!(in_grouping(g_v, 97, 259))) - { - break lab19; - } - break golab18; - } while (false); - if (cursor >= limit) + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // gopast, line 52 - golab20: while(true) - { - lab21: do { - if (!(out_grouping(g_v, 97, 259))) - { - break lab21; - } - break golab20; - } while (false); - if (cursor >= limit) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p2, line 52 - I_p2 = cursor; - } while (false); - cursor = v_8; - return true; - } - private boolean r_postlude() { + private boolean r_step_0() { int among_var; int v_1; - // repeat, line 56 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 56 - // [, line 58 - bra = cursor; - // substring, line 58 - among_var = find_among(a_0, 3); + // (, line 72 + // [, line 73 + ket = cursor; + // substring, line 73 + among_var = find_among_b(a_1, 16); if (among_var == 0) { - break lab1; + return false; } - // ], line 58 - ket = cursor; + // ], line 73 + bra = cursor; + // call R1, line 73 + if (!r_R1()) + { + return false; + } switch(among_var) { case 0: - break lab1; + return false; case 1: - // (, line 59 - // <-, line 59 - slice_from("i"); + // (, line 75 + // delete, line 75 + slice_del(); break; case 2: - // (, line 60 - // <-, line 60 - slice_from("u"); + // (, line 77 + // <-, line 77 + slice_from("a"); break; case 3: - // (, line 61 - // next, line 61 - if (cursor >= limit) + // (, line 79 + // <-, line 79 + slice_from("e"); + break; + case 4: + // (, line 81 + // <-, line 81 + slice_from("i"); + break; + case 5: + // (, line 83 + // not, line 83 { - break lab1; + v_1 = limit - cursor; + lab0: do { + // literal, line 83 + if (!(eq_s_b(2, "ab"))) + { + break lab0; + } + return false; + } while (false); + cursor = limit - v_1; } - cursor++; + // <-, line 83 + slice_from("i"); break; + case 6: + // (, line 85 + // <-, line 85 + slice_from("at"); + break; + case 7: + // (, line 87 + // <-, line 87 + slice_from("a\u0163i"); + break; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_RV() { - if (!(I_pV <= cursor)) - { - return false; - } - return true; - } + private boolean r_combo_suffix() { + int among_var; + int v_1; + // test, line 91 + v_1 = limit - cursor; + // (, line 91 + // [, line 92 + ket = cursor; + // substring, line 92 + among_var = find_among_b(a_2, 46); + if (among_var == 0) + { + return false; + } + // ], line 92 + bra = cursor; + // call R1, line 92 + if (!r_R1()) + { + return false; + } + // (, line 92 + switch(among_var) { + case 0: + return false; + case 1: + // (, line 100 + // <-, line 101 + slice_from("abil"); + break; + case 2: + // (, line 103 + // <-, line 104 + slice_from("ibil"); + break; + case 3: + // (, line 106 + // <-, line 107 + slice_from("iv"); + break; + case 4: + // (, line 112 + // <-, line 113 + slice_from("ic"); + break; + case 5: + // (, line 117 + // <-, line 118 + slice_from("at"); + break; + case 6: + // (, line 121 + // <-, line 122 + slice_from("it"); + break; + } + // set standard_suffix_removed, line 125 + B_standard_suffix_removed = true; + cursor = limit - v_1; + return true; + } - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_step_0() { + private boolean r_standard_suffix() { int among_var; int v_1; - // (, line 72 - // [, line 73 - ket = cursor; - // substring, line 73 - among_var = find_among_b(a_1, 16); - if (among_var == 0) - { - return false; - } - // ], line 73 - bra = cursor; - // call R1, line 73 - if (!r_R1()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 75 - // delete, line 75 - slice_del(); - break; - case 2: - // (, line 77 - // <-, line 77 - slice_from("a"); - break; - case 3: - // (, line 79 - // <-, line 79 - slice_from("e"); - break; - case 4: - // (, line 81 - // <-, line 81 - slice_from("i"); - break; - case 5: - // (, line 83 - // not, line 83 + // (, line 129 + // unset standard_suffix_removed, line 130 + B_standard_suffix_removed = false; + // repeat, line 131 + replab0: while(true) { v_1 = limit - cursor; - lab0: do { - // literal, line 83 - if (!(eq_s_b(2, "ab"))) + lab1: do { + // call combo_suffix, line 131 + if (!r_combo_suffix()) { - break lab0; + break lab1; } - return false; + continue replab0; } while (false); cursor = limit - v_1; + break replab0; } - // <-, line 83 - slice_from("i"); - break; - case 6: - // (, line 85 - // <-, line 85 - slice_from("at"); - break; - case 7: - // (, line 87 - // <-, line 87 - slice_from("a\u0163i"); - break; - } - return true; - } - - private boolean r_combo_suffix() { - int among_var; - int v_1; - // test, line 91 - v_1 = limit - cursor; - // (, line 91 - // [, line 92 - ket = cursor; - // substring, line 92 - among_var = find_among_b(a_2, 46); - if (among_var == 0) - { - return false; - } - // ], line 92 - bra = cursor; - // call R1, line 92 - if (!r_R1()) - { - return false; - } - // (, line 92 - switch(among_var) { - case 0: - return false; - case 1: - // (, line 100 - // <-, line 101 - slice_from("abil"); - break; - case 2: - // (, line 103 - // <-, line 104 - slice_from("ibil"); - break; - case 3: - // (, line 106 - // <-, line 107 - slice_from("iv"); - break; - case 4: - // (, line 112 - // <-, line 113 - slice_from("ic"); - break; - case 5: - // (, line 117 - // <-, line 118 - slice_from("at"); - break; - case 6: - // (, line 121 - // <-, line 122 - slice_from("it"); - break; - } - // set standard_suffix_removed, line 125 - B_standard_suffix_removed = true; - cursor = limit - v_1; - return true; - } - - private boolean r_standard_suffix() { - int among_var; - int v_1; - // (, line 129 - // unset standard_suffix_removed, line 130 - B_standard_suffix_removed = false; - // repeat, line 131 - replab0: while(true) - { - v_1 = limit - cursor; - lab1: do { - // call combo_suffix, line 131 - if (!r_combo_suffix()) + // [, line 132 + ket = cursor; + // substring, line 132 + among_var = find_among_b(a_3, 62); + if (among_var == 0) { - break lab1; + return false; } - continue replab0; - } while (false); - cursor = limit - v_1; - break replab0; - } - // [, line 132 - ket = cursor; - // substring, line 132 - among_var = find_among_b(a_3, 62); - if (among_var == 0) - { - return false; - } - // ], line 132 - bra = cursor; - // call R2, line 132 - if (!r_R2()) - { - return false; - } - // (, line 132 - switch(among_var) { - case 0: - return false; - case 1: - // (, line 148 - // delete, line 149 - slice_del(); - break; - case 2: - // (, line 151 - // literal, line 152 - if (!(eq_s_b(1, "\u0163"))) + // ], line 132 + bra = cursor; + // call R2, line 132 + if (!r_R2()) { return false; } - // ], line 152 - bra = cursor; - // <-, line 152 - slice_from("t"); - break; - case 3: - // (, line 155 - // <-, line 156 - slice_from("ist"); - break; - } - // set standard_suffix_removed, line 160 - B_standard_suffix_removed = true; - return true; - } + // (, line 132 + switch(among_var) { + case 0: + return false; + case 1: + // (, line 148 + // delete, line 149 + slice_del(); + break; + case 2: + // (, line 151 + // literal, line 152 + if (!(eq_s_b(1, "\u0163"))) + { + return false; + } + // ], line 152 + bra = cursor; + // <-, line 152 + slice_from("t"); + break; + case 3: + // (, line 155 + // <-, line 156 + slice_from("ist"); + break; + } + // set standard_suffix_removed, line 160 + B_standard_suffix_removed = true; + return true; + } - private boolean r_verb_suffix() { + private boolean r_verb_suffix() { int among_var; int v_1; int v_2; int v_3; - // setlimit, line 164 - v_1 = limit - cursor; - // tomark, line 164 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 164 - // [, line 165 - ket = cursor; - // substring, line 165 - among_var = find_among_b(a_4, 94); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 165 - bra = cursor; - switch(among_var) { - case 0: - limit_backward = v_2; - return false; - case 1: - // (, line 200 - // or, line 200 - lab0: do { - v_3 = limit - cursor; - lab1: do { - if (!(out_grouping_b(g_v, 97, 259))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_3; - // literal, line 200 - if (!(eq_s_b(1, "u"))) - { + // setlimit, line 164 + v_1 = limit - cursor; + // tomark, line 164 + if (cursor < I_pV) + { + return false; + } + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 164 + // [, line 165 + ket = cursor; + // substring, line 165 + among_var = find_among_b(a_4, 94); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 165 + bra = cursor; + switch(among_var) { + case 0: limit_backward = v_2; return false; - } - } while (false); - // delete, line 200 - slice_del(); - break; - case 2: - // (, line 214 - // delete, line 214 - slice_del(); - break; - } - limit_backward = v_2; - return true; - } + case 1: + // (, line 200 + // or, line 200 + lab0: do { + v_3 = limit - cursor; + lab1: do { + if (!(out_grouping_b(g_v, 97, 259))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_3; + // literal, line 200 + if (!(eq_s_b(1, "u"))) + { + limit_backward = v_2; + return false; + } + } while (false); + // delete, line 200 + slice_del(); + break; + case 2: + // (, line 214 + // delete, line 214 + slice_del(); + break; + } + limit_backward = v_2; + return true; + } - private boolean r_vowel_suffix() { + private boolean r_vowel_suffix() { int among_var; - // (, line 218 - // [, line 219 - ket = cursor; - // substring, line 219 - among_var = find_among_b(a_5, 5); - if (among_var == 0) - { - return false; - } - // ], line 219 - bra = cursor; - // call RV, line 219 - if (!r_RV()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 220 - // delete, line 220 - slice_del(); - break; - } - return true; - } + // (, line 218 + // [, line 219 + ket = cursor; + // substring, line 219 + among_var = find_among_b(a_5, 5); + if (among_var == 0) + { + return false; + } + // ], line 219 + bra = cursor; + // call RV, line 219 + if (!r_RV()) + { + return false; + } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 220 + // delete, line 220 + slice_del(); + break; + } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -929,96 +937,108 @@ int v_6; int v_7; int v_8; - // (, line 225 - // do, line 226 - v_1 = cursor; - lab0: do { - // call prelude, line 226 - if (!r_prelude()) - { - break lab0; - } - } while (false); - cursor = v_1; - // do, line 227 - v_2 = cursor; - lab1: do { - // call mark_regions, line 227 - if (!r_mark_regions()) - { - break lab1; - } - } while (false); - cursor = v_2; - // backwards, line 228 - limit_backward = cursor; cursor = limit; - // (, line 228 - // do, line 229 - v_3 = limit - cursor; - lab2: do { - // call step_0, line 229 - if (!r_step_0()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 230 - v_4 = limit - cursor; - lab3: do { - // call standard_suffix, line 230 - if (!r_standard_suffix()) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - // do, line 231 - v_5 = limit - cursor; - lab4: do { - // (, line 231 - // or, line 231 - lab5: do { - v_6 = limit - cursor; - lab6: do { - // Boolean test standard_suffix_removed, line 231 - if (!(B_standard_suffix_removed)) + // (, line 225 + // do, line 226 + v_1 = cursor; + lab0: do { + // call prelude, line 226 + if (!r_prelude()) { - break lab6; + break lab0; } - break lab5; } while (false); - cursor = limit - v_6; - // call verb_suffix, line 231 - if (!r_verb_suffix()) - { - break lab4; - } - } while (false); - } while (false); - cursor = limit - v_5; - // do, line 232 - v_7 = limit - cursor; - lab7: do { - // call vowel_suffix, line 232 - if (!r_vowel_suffix()) - { - break lab7; + cursor = v_1; + // do, line 227 + v_2 = cursor; + lab1: do { + // call mark_regions, line 227 + if (!r_mark_regions()) + { + break lab1; + } + } while (false); + cursor = v_2; + // backwards, line 228 + limit_backward = cursor; cursor = limit; + // (, line 228 + // do, line 229 + v_3 = limit - cursor; + lab2: do { + // call step_0, line 229 + if (!r_step_0()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 230 + v_4 = limit - cursor; + lab3: do { + // call standard_suffix, line 230 + if (!r_standard_suffix()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // do, line 231 + v_5 = limit - cursor; + lab4: do { + // (, line 231 + // or, line 231 + lab5: do { + v_6 = limit - cursor; + lab6: do { + // Boolean test standard_suffix_removed, line 231 + if (!(B_standard_suffix_removed)) + { + break lab6; + } + break lab5; + } while (false); + cursor = limit - v_6; + // call verb_suffix, line 231 + if (!r_verb_suffix()) + { + break lab4; + } + } while (false); + } while (false); + cursor = limit - v_5; + // do, line 232 + v_7 = limit - cursor; + lab7: do { + // call vowel_suffix, line 232 + if (!r_vowel_suffix()) + { + break lab7; + } + } while (false); + cursor = limit - v_7; + cursor = limit_backward; // do, line 234 + v_8 = cursor; + lab8: do { + // call postlude, line 234 + if (!r_postlude()) + { + break lab8; + } + } while (false); + cursor = v_8; + return true; } - } while (false); - cursor = limit - v_7; - cursor = limit_backward; // do, line 234 - v_8 = cursor; - lab8: do { - // call postlude, line 234 - if (!r_postlude()) - { - break lab8; - } - } while (false); - cursor = v_8; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof RomanianStemmer; } + @Override + public int hashCode() { + return RomanianStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/RussianStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/RussianStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/RussianStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/RussianStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,595 +1,603 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class RussianStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "\u0432", -1, 1, "", this), - new Among ( "\u0438\u0432", 0, 2, "", this), - new Among ( "\u044B\u0432", 0, 2, "", this), - new Among ( "\u0432\u0448\u0438", -1, 1, "", this), - new Among ( "\u0438\u0432\u0448\u0438", 3, 2, "", this), - new Among ( "\u044B\u0432\u0448\u0438", 3, 2, "", this), - new Among ( "\u0432\u0448\u0438\u0441\u044C", -1, 1, "", this), - new Among ( "\u0438\u0432\u0448\u0438\u0441\u044C", 6, 2, "", this), - new Among ( "\u044B\u0432\u0448\u0438\u0441\u044C", 6, 2, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "\u0435\u0435", -1, 1, "", this), - new Among ( "\u0438\u0435", -1, 1, "", this), - new Among ( "\u043E\u0435", -1, 1, "", this), - new Among ( "\u044B\u0435", -1, 1, "", this), - new Among ( "\u0438\u043C\u0438", -1, 1, "", this), - new Among ( "\u044B\u043C\u0438", -1, 1, "", this), - new Among ( "\u0435\u0439", -1, 1, "", this), - new Among ( "\u0438\u0439", -1, 1, "", this), - new Among ( "\u043E\u0439", -1, 1, "", this), - new Among ( "\u044B\u0439", -1, 1, "", this), - new Among ( "\u0435\u043C", -1, 1, "", this), - new Among ( "\u0438\u043C", -1, 1, "", this), - new Among ( "\u043E\u043C", -1, 1, "", this), - new Among ( "\u044B\u043C", -1, 1, "", this), - new Among ( "\u0435\u0433\u043E", -1, 1, "", this), - new Among ( "\u043E\u0433\u043E", -1, 1, "", this), - new Among ( "\u0435\u043C\u0443", -1, 1, "", this), - new Among ( "\u043E\u043C\u0443", -1, 1, "", this), - new Among ( "\u0438\u0445", -1, 1, "", this), - new Among ( "\u044B\u0445", -1, 1, "", this), - new Among ( "\u0435\u044E", -1, 1, "", this), - new Among ( "\u043E\u044E", -1, 1, "", this), - new Among ( "\u0443\u044E", -1, 1, "", this), - new Among ( "\u044E\u044E", -1, 1, "", this), - new Among ( "\u0430\u044F", -1, 1, "", this), - new Among ( "\u044F\u044F", -1, 1, "", this) - }; + private final static RussianStemmer methodObject = new RussianStemmer (); - private Among a_2[] = { - new Among ( "\u0435\u043C", -1, 1, "", this), - new Among ( "\u043D\u043D", -1, 1, "", this), - new Among ( "\u0432\u0448", -1, 1, "", this), - new Among ( "\u0438\u0432\u0448", 2, 2, "", this), - new Among ( "\u044B\u0432\u0448", 2, 2, "", this), - new Among ( "\u0449", -1, 1, "", this), - new Among ( "\u044E\u0449", 5, 1, "", this), - new Among ( "\u0443\u044E\u0449", 6, 2, "", this) - }; + private final static Among a_0[] = { + new Among ( "\u0432", -1, 1, "", methodObject ), + new Among ( "\u0438\u0432", 0, 2, "", methodObject ), + new Among ( "\u044B\u0432", 0, 2, "", methodObject ), + new Among ( "\u0432\u0448\u0438", -1, 1, "", methodObject ), + new Among ( "\u0438\u0432\u0448\u0438", 3, 2, "", methodObject ), + new Among ( "\u044B\u0432\u0448\u0438", 3, 2, "", methodObject ), + new Among ( "\u0432\u0448\u0438\u0441\u044C", -1, 1, "", methodObject ), + new Among ( "\u0438\u0432\u0448\u0438\u0441\u044C", 6, 2, "", methodObject ), + new Among ( "\u044B\u0432\u0448\u0438\u0441\u044C", 6, 2, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "\u0441\u044C", -1, 1, "", this), - new Among ( "\u0441\u044F", -1, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "\u0435\u0435", -1, 1, "", methodObject ), + new Among ( "\u0438\u0435", -1, 1, "", methodObject ), + new Among ( "\u043E\u0435", -1, 1, "", methodObject ), + new Among ( "\u044B\u0435", -1, 1, "", methodObject ), + new Among ( "\u0438\u043C\u0438", -1, 1, "", methodObject ), + new Among ( "\u044B\u043C\u0438", -1, 1, "", methodObject ), + new Among ( "\u0435\u0439", -1, 1, "", methodObject ), + new Among ( "\u0438\u0439", -1, 1, "", methodObject ), + new Among ( "\u043E\u0439", -1, 1, "", methodObject ), + new Among ( "\u044B\u0439", -1, 1, "", methodObject ), + new Among ( "\u0435\u043C", -1, 1, "", methodObject ), + new Among ( "\u0438\u043C", -1, 1, "", methodObject ), + new Among ( "\u043E\u043C", -1, 1, "", methodObject ), + new Among ( "\u044B\u043C", -1, 1, "", methodObject ), + new Among ( "\u0435\u0433\u043E", -1, 1, "", methodObject ), + new Among ( "\u043E\u0433\u043E", -1, 1, "", methodObject ), + new Among ( "\u0435\u043C\u0443", -1, 1, "", methodObject ), + new Among ( "\u043E\u043C\u0443", -1, 1, "", methodObject ), + new Among ( "\u0438\u0445", -1, 1, "", methodObject ), + new Among ( "\u044B\u0445", -1, 1, "", methodObject ), + new Among ( "\u0435\u044E", -1, 1, "", methodObject ), + new Among ( "\u043E\u044E", -1, 1, "", methodObject ), + new Among ( "\u0443\u044E", -1, 1, "", methodObject ), + new Among ( "\u044E\u044E", -1, 1, "", methodObject ), + new Among ( "\u0430\u044F", -1, 1, "", methodObject ), + new Among ( "\u044F\u044F", -1, 1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "\u043B\u0430", -1, 1, "", this), - new Among ( "\u0438\u043B\u0430", 0, 2, "", this), - new Among ( "\u044B\u043B\u0430", 0, 2, "", this), - new Among ( "\u043D\u0430", -1, 1, "", this), - new Among ( "\u0435\u043D\u0430", 3, 2, "", this), - new Among ( "\u0435\u0442\u0435", -1, 1, "", this), - new Among ( "\u0438\u0442\u0435", -1, 2, "", this), - new Among ( "\u0439\u0442\u0435", -1, 1, "", this), - new Among ( "\u0435\u0439\u0442\u0435", 7, 2, "", this), - new Among ( "\u0443\u0439\u0442\u0435", 7, 2, "", this), - new Among ( "\u043B\u0438", -1, 1, "", this), - new Among ( "\u0438\u043B\u0438", 10, 2, "", this), - new Among ( "\u044B\u043B\u0438", 10, 2, "", this), - new Among ( "\u0439", -1, 1, "", this), - new Among ( "\u0435\u0439", 13, 2, "", this), - new Among ( "\u0443\u0439", 13, 2, "", this), - new Among ( "\u043B", -1, 1, "", this), - new Among ( "\u0438\u043B", 16, 2, "", this), - new Among ( "\u044B\u043B", 16, 2, "", this), - new Among ( "\u0435\u043C", -1, 1, "", this), - new Among ( "\u0438\u043C", -1, 2, "", this), - new Among ( "\u044B\u043C", -1, 2, "", this), - new Among ( "\u043D", -1, 1, "", this), - new Among ( "\u0435\u043D", 22, 2, "", this), - new Among ( "\u043B\u043E", -1, 1, "", this), - new Among ( "\u0438\u043B\u043E", 24, 2, "", this), - new Among ( "\u044B\u043B\u043E", 24, 2, "", this), - new Among ( "\u043D\u043E", -1, 1, "", this), - new Among ( "\u0435\u043D\u043E", 27, 2, "", this), - new Among ( "\u043D\u043D\u043E", 27, 1, "", this), - new Among ( "\u0435\u0442", -1, 1, "", this), - new Among ( "\u0443\u0435\u0442", 30, 2, "", this), - new Among ( "\u0438\u0442", -1, 2, "", this), - new Among ( "\u044B\u0442", -1, 2, "", this), - new Among ( "\u044E\u0442", -1, 1, "", this), - new Among ( "\u0443\u044E\u0442", 34, 2, "", this), - new Among ( "\u044F\u0442", -1, 2, "", this), - new Among ( "\u043D\u044B", -1, 1, "", this), - new Among ( "\u0435\u043D\u044B", 37, 2, "", this), - new Among ( "\u0442\u044C", -1, 1, "", this), - new Among ( "\u0438\u0442\u044C", 39, 2, "", this), - new Among ( "\u044B\u0442\u044C", 39, 2, "", this), - new Among ( "\u0435\u0448\u044C", -1, 1, "", this), - new Among ( "\u0438\u0448\u044C", -1, 2, "", this), - new Among ( "\u044E", -1, 2, "", this), - new Among ( "\u0443\u044E", 44, 2, "", this) - }; + private final static Among a_2[] = { + new Among ( "\u0435\u043C", -1, 1, "", methodObject ), + new Among ( "\u043D\u043D", -1, 1, "", methodObject ), + new Among ( "\u0432\u0448", -1, 1, "", methodObject ), + new Among ( "\u0438\u0432\u0448", 2, 2, "", methodObject ), + new Among ( "\u044B\u0432\u0448", 2, 2, "", methodObject ), + new Among ( "\u0449", -1, 1, "", methodObject ), + new Among ( "\u044E\u0449", 5, 1, "", methodObject ), + new Among ( "\u0443\u044E\u0449", 6, 2, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "\u0430", -1, 1, "", this), - new Among ( "\u0435\u0432", -1, 1, "", this), - new Among ( "\u043E\u0432", -1, 1, "", this), - new Among ( "\u0435", -1, 1, "", this), - new Among ( "\u0438\u0435", 3, 1, "", this), - new Among ( "\u044C\u0435", 3, 1, "", this), - new Among ( "\u0438", -1, 1, "", this), - new Among ( "\u0435\u0438", 6, 1, "", this), - new Among ( "\u0438\u0438", 6, 1, "", this), - new Among ( "\u0430\u043C\u0438", 6, 1, "", this), - new Among ( "\u044F\u043C\u0438", 6, 1, "", this), - new Among ( "\u0438\u044F\u043C\u0438", 10, 1, "", this), - new Among ( "\u0439", -1, 1, "", this), - new Among ( "\u0435\u0439", 12, 1, "", this), - new Among ( "\u0438\u0435\u0439", 13, 1, "", this), - new Among ( "\u0438\u0439", 12, 1, "", this), - new Among ( "\u043E\u0439", 12, 1, "", this), - new Among ( "\u0430\u043C", -1, 1, "", this), - new Among ( "\u0435\u043C", -1, 1, "", this), - new Among ( "\u0438\u0435\u043C", 18, 1, "", this), - new Among ( "\u043E\u043C", -1, 1, "", this), - new Among ( "\u044F\u043C", -1, 1, "", this), - new Among ( "\u0438\u044F\u043C", 21, 1, "", this), - new Among ( "\u043E", -1, 1, "", this), - new Among ( "\u0443", -1, 1, "", this), - new Among ( "\u0430\u0445", -1, 1, "", this), - new Among ( "\u044F\u0445", -1, 1, "", this), - new Among ( "\u0438\u044F\u0445", 26, 1, "", this), - new Among ( "\u044B", -1, 1, "", this), - new Among ( "\u044C", -1, 1, "", this), - new Among ( "\u044E", -1, 1, "", this), - new Among ( "\u0438\u044E", 30, 1, "", this), - new Among ( "\u044C\u044E", 30, 1, "", this), - new Among ( "\u044F", -1, 1, "", this), - new Among ( "\u0438\u044F", 33, 1, "", this), - new Among ( "\u044C\u044F", 33, 1, "", this) - }; + private final static Among a_3[] = { + new Among ( "\u0441\u044C", -1, 1, "", methodObject ), + new Among ( "\u0441\u044F", -1, 1, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "\u043E\u0441\u0442", -1, 1, "", this), - new Among ( "\u043E\u0441\u0442\u044C", -1, 1, "", this) - }; + private final static Among a_4[] = { + new Among ( "\u043B\u0430", -1, 1, "", methodObject ), + new Among ( "\u0438\u043B\u0430", 0, 2, "", methodObject ), + new Among ( "\u044B\u043B\u0430", 0, 2, "", methodObject ), + new Among ( "\u043D\u0430", -1, 1, "", methodObject ), + new Among ( "\u0435\u043D\u0430", 3, 2, "", methodObject ), + new Among ( "\u0435\u0442\u0435", -1, 1, "", methodObject ), + new Among ( "\u0438\u0442\u0435", -1, 2, "", methodObject ), + new Among ( "\u0439\u0442\u0435", -1, 1, "", methodObject ), + new Among ( "\u0435\u0439\u0442\u0435", 7, 2, "", methodObject ), + new Among ( "\u0443\u0439\u0442\u0435", 7, 2, "", methodObject ), + new Among ( "\u043B\u0438", -1, 1, "", methodObject ), + new Among ( "\u0438\u043B\u0438", 10, 2, "", methodObject ), + new Among ( "\u044B\u043B\u0438", 10, 2, "", methodObject ), + new Among ( "\u0439", -1, 1, "", methodObject ), + new Among ( "\u0435\u0439", 13, 2, "", methodObject ), + new Among ( "\u0443\u0439", 13, 2, "", methodObject ), + new Among ( "\u043B", -1, 1, "", methodObject ), + new Among ( "\u0438\u043B", 16, 2, "", methodObject ), + new Among ( "\u044B\u043B", 16, 2, "", methodObject ), + new Among ( "\u0435\u043C", -1, 1, "", methodObject ), + new Among ( "\u0438\u043C", -1, 2, "", methodObject ), + new Among ( "\u044B\u043C", -1, 2, "", methodObject ), + new Among ( "\u043D", -1, 1, "", methodObject ), + new Among ( "\u0435\u043D", 22, 2, "", methodObject ), + new Among ( "\u043B\u043E", -1, 1, "", methodObject ), + new Among ( "\u0438\u043B\u043E", 24, 2, "", methodObject ), + new Among ( "\u044B\u043B\u043E", 24, 2, "", methodObject ), + new Among ( "\u043D\u043E", -1, 1, "", methodObject ), + new Among ( "\u0435\u043D\u043E", 27, 2, "", methodObject ), + new Among ( "\u043D\u043D\u043E", 27, 1, "", methodObject ), + new Among ( "\u0435\u0442", -1, 1, "", methodObject ), + new Among ( "\u0443\u0435\u0442", 30, 2, "", methodObject ), + new Among ( "\u0438\u0442", -1, 2, "", methodObject ), + new Among ( "\u044B\u0442", -1, 2, "", methodObject ), + new Among ( "\u044E\u0442", -1, 1, "", methodObject ), + new Among ( "\u0443\u044E\u0442", 34, 2, "", methodObject ), + new Among ( "\u044F\u0442", -1, 2, "", methodObject ), + new Among ( "\u043D\u044B", -1, 1, "", methodObject ), + new Among ( "\u0435\u043D\u044B", 37, 2, "", methodObject ), + new Among ( "\u0442\u044C", -1, 1, "", methodObject ), + new Among ( "\u0438\u0442\u044C", 39, 2, "", methodObject ), + new Among ( "\u044B\u0442\u044C", 39, 2, "", methodObject ), + new Among ( "\u0435\u0448\u044C", -1, 1, "", methodObject ), + new Among ( "\u0438\u0448\u044C", -1, 2, "", methodObject ), + new Among ( "\u044E", -1, 2, "", methodObject ), + new Among ( "\u0443\u044E", 44, 2, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "\u0435\u0439\u0448\u0435", -1, 1, "", this), - new Among ( "\u043D", -1, 2, "", this), - new Among ( "\u0435\u0439\u0448", -1, 1, "", this), - new Among ( "\u044C", -1, 3, "", this) - }; + private final static Among a_5[] = { + new Among ( "\u0430", -1, 1, "", methodObject ), + new Among ( "\u0435\u0432", -1, 1, "", methodObject ), + new Among ( "\u043E\u0432", -1, 1, "", methodObject ), + new Among ( "\u0435", -1, 1, "", methodObject ), + new Among ( "\u0438\u0435", 3, 1, "", methodObject ), + new Among ( "\u044C\u0435", 3, 1, "", methodObject ), + new Among ( "\u0438", -1, 1, "", methodObject ), + new Among ( "\u0435\u0438", 6, 1, "", methodObject ), + new Among ( "\u0438\u0438", 6, 1, "", methodObject ), + new Among ( "\u0430\u043C\u0438", 6, 1, "", methodObject ), + new Among ( "\u044F\u043C\u0438", 6, 1, "", methodObject ), + new Among ( "\u0438\u044F\u043C\u0438", 10, 1, "", methodObject ), + new Among ( "\u0439", -1, 1, "", methodObject ), + new Among ( "\u0435\u0439", 12, 1, "", methodObject ), + new Among ( "\u0438\u0435\u0439", 13, 1, "", methodObject ), + new Among ( "\u0438\u0439", 12, 1, "", methodObject ), + new Among ( "\u043E\u0439", 12, 1, "", methodObject ), + new Among ( "\u0430\u043C", -1, 1, "", methodObject ), + new Among ( "\u0435\u043C", -1, 1, "", methodObject ), + new Among ( "\u0438\u0435\u043C", 18, 1, "", methodObject ), + new Among ( "\u043E\u043C", -1, 1, "", methodObject ), + new Among ( "\u044F\u043C", -1, 1, "", methodObject ), + new Among ( "\u0438\u044F\u043C", 21, 1, "", methodObject ), + new Among ( "\u043E", -1, 1, "", methodObject ), + new Among ( "\u0443", -1, 1, "", methodObject ), + new Among ( "\u0430\u0445", -1, 1, "", methodObject ), + new Among ( "\u044F\u0445", -1, 1, "", methodObject ), + new Among ( "\u0438\u044F\u0445", 26, 1, "", methodObject ), + new Among ( "\u044B", -1, 1, "", methodObject ), + new Among ( "\u044C", -1, 1, "", methodObject ), + new Among ( "\u044E", -1, 1, "", methodObject ), + new Among ( "\u0438\u044E", 30, 1, "", methodObject ), + new Among ( "\u044C\u044E", 30, 1, "", methodObject ), + new Among ( "\u044F", -1, 1, "", methodObject ), + new Among ( "\u0438\u044F", 33, 1, "", methodObject ), + new Among ( "\u044C\u044F", 33, 1, "", methodObject ) + }; - private static final char g_v[] = {33, 65, 8, 232 }; + private final static Among a_6[] = { + new Among ( "\u043E\u0441\u0442", -1, 1, "", methodObject ), + new Among ( "\u043E\u0441\u0442\u044C", -1, 1, "", methodObject ) + }; + private final static Among a_7[] = { + new Among ( "\u0435\u0439\u0448\u0435", -1, 1, "", methodObject ), + new Among ( "\u043D", -1, 2, "", methodObject ), + new Among ( "\u0435\u0439\u0448", -1, 1, "", methodObject ), + new Among ( "\u044C", -1, 3, "", methodObject ) + }; + + private static final char g_v[] = {33, 65, 8, 232 }; + private int I_p2; private int I_pV; - private void copy_from(RussianStemmer other) { - I_p2 = other.I_p2; - I_pV = other.I_pV; - super.copy_from(other); - } + private void copy_from(RussianStemmer other) { + I_p2 = other.I_p2; + I_pV = other.I_pV; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; - // (, line 57 - I_pV = limit; - I_p2 = limit; - // do, line 61 - v_1 = cursor; - lab0: do { - // (, line 61 - // gopast, line 62 - golab1: while(true) - { - lab2: do { - if (!(in_grouping(g_v, 1072, 1103))) + // (, line 57 + I_pV = limit; + I_p2 = limit; + // do, line 61 + v_1 = cursor; + lab0: do { + // (, line 61 + // gopast, line 62 + golab1: while(true) { - break lab2; + lab2: do { + if (!(in_grouping(g_v, 1072, 1103))) + { + break lab2; + } + break golab1; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; } - break golab1; - } while (false); - if (cursor >= limit) - { - break lab0; - } - cursor++; - } - // setmark pV, line 62 - I_pV = cursor; - // gopast, line 62 - golab3: while(true) - { - lab4: do { - if (!(out_grouping(g_v, 1072, 1103))) + // setmark pV, line 62 + I_pV = cursor; + // gopast, line 62 + golab3: while(true) { - break lab4; + lab4: do { + if (!(out_grouping(g_v, 1072, 1103))) + { + break lab4; + } + break golab3; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; } - break golab3; - } while (false); - if (cursor >= limit) - { - break lab0; - } - cursor++; - } - // gopast, line 63 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 1072, 1103))) + // gopast, line 63 + golab5: while(true) { - break lab6; + lab6: do { + if (!(in_grouping(g_v, 1072, 1103))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; } - break golab5; - } while (false); - if (cursor >= limit) - { - break lab0; - } - cursor++; - } - // gopast, line 63 - golab7: while(true) - { - lab8: do { - if (!(out_grouping(g_v, 1072, 1103))) + // gopast, line 63 + golab7: while(true) { - break lab8; + lab8: do { + if (!(out_grouping(g_v, 1072, 1103))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab0; + } + cursor++; } - break golab7; + // setmark p2, line 63 + I_p2 = cursor; } while (false); - if (cursor >= limit) + cursor = v_1; + return true; + } + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab0; + return false; } - cursor++; + return true; } - // setmark p2, line 63 - I_p2 = cursor; - } while (false); - cursor = v_1; - return true; - } - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_perfective_gerund() { + private boolean r_perfective_gerund() { int among_var; int v_1; - // (, line 71 - // [, line 72 - ket = cursor; - // substring, line 72 - among_var = find_among_b(a_0, 9); - if (among_var == 0) - { - return false; - } - // ], line 72 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 76 - // or, line 76 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // literal, line 76 - if (!(eq_s_b(1, "\u0430"))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_1; - // literal, line 76 - if (!(eq_s_b(1, "\u044F"))) - { + // (, line 71 + // [, line 72 + ket = cursor; + // substring, line 72 + among_var = find_among_b(a_0, 9); + if (among_var == 0) + { + return false; + } + // ], line 72 + bra = cursor; + switch(among_var) { + case 0: return false; - } - } while (false); - // delete, line 76 - slice_del(); - break; - case 2: - // (, line 83 - // delete, line 83 - slice_del(); - break; - } - return true; - } - - private boolean r_adjective() { - int among_var; - // (, line 87 - // [, line 88 - ket = cursor; - // substring, line 88 - among_var = find_among_b(a_1, 26); - if (among_var == 0) - { - return false; - } - // ], line 88 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 97 - // delete, line 97 - slice_del(); - break; - } - return true; - } - - private boolean r_adjectival() { - int among_var; - int v_1; - int v_2; - // (, line 101 - // call adjective, line 102 - if (!r_adjective()) - { - return false; - } - // try, line 109 - v_1 = limit - cursor; - lab0: do { - // (, line 109 - // [, line 110 - ket = cursor; - // substring, line 110 - among_var = find_among_b(a_2, 8); - if (among_var == 0) - { - cursor = limit - v_1; - break lab0; - } - // ], line 110 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_1; - break lab0; - case 1: - // (, line 115 - // or, line 115 - lab1: do { - v_2 = limit - cursor; - lab2: do { - // literal, line 115 - if (!(eq_s_b(1, "\u0430"))) + case 1: + // (, line 76 + // or, line 76 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // literal, line 76 + if (!(eq_s_b(1, "\u0430"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_1; + // literal, line 76 + if (!(eq_s_b(1, "\u044F"))) { - break lab2; + return false; } - break lab1; } while (false); - cursor = limit - v_2; - // literal, line 115 - if (!(eq_s_b(1, "\u044F"))) - { - cursor = limit - v_1; - break lab0; - } - } while (false); - // delete, line 115 - slice_del(); - break; - case 2: - // (, line 122 - // delete, line 122 - slice_del(); - break; + // delete, line 76 + slice_del(); + break; + case 2: + // (, line 83 + // delete, line 83 + slice_del(); + break; + } + return true; } - } while (false); - return true; - } - private boolean r_reflexive() { + private boolean r_adjective() { int among_var; - // (, line 128 - // [, line 129 - ket = cursor; - // substring, line 129 - among_var = find_among_b(a_3, 2); - if (among_var == 0) - { - return false; - } - // ], line 129 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 132 - // delete, line 132 - slice_del(); - break; - } - return true; - } + // (, line 87 + // [, line 88 + ket = cursor; + // substring, line 88 + among_var = find_among_b(a_1, 26); + if (among_var == 0) + { + return false; + } + // ], line 88 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 97 + // delete, line 97 + slice_del(); + break; + } + return true; + } - private boolean r_verb() { + private boolean r_adjectival() { int among_var; int v_1; - // (, line 136 - // [, line 137 - ket = cursor; - // substring, line 137 - among_var = find_among_b(a_4, 46); - if (among_var == 0) - { - return false; - } - // ], line 137 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 143 - // or, line 143 + int v_2; + // (, line 101 + // call adjective, line 102 + if (!r_adjective()) + { + return false; + } + // try, line 109 + v_1 = limit - cursor; lab0: do { - v_1 = limit - cursor; - lab1: do { - // literal, line 143 - if (!(eq_s_b(1, "\u0430"))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_1; - // literal, line 143 - if (!(eq_s_b(1, "\u044F"))) + // (, line 109 + // [, line 110 + ket = cursor; + // substring, line 110 + among_var = find_among_b(a_2, 8); + if (among_var == 0) { - return false; + cursor = limit - v_1; + break lab0; } + // ], line 110 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_1; + break lab0; + case 1: + // (, line 115 + // or, line 115 + lab1: do { + v_2 = limit - cursor; + lab2: do { + // literal, line 115 + if (!(eq_s_b(1, "\u0430"))) + { + break lab2; + } + break lab1; + } while (false); + cursor = limit - v_2; + // literal, line 115 + if (!(eq_s_b(1, "\u044F"))) + { + cursor = limit - v_1; + break lab0; + } + } while (false); + // delete, line 115 + slice_del(); + break; + case 2: + // (, line 122 + // delete, line 122 + slice_del(); + break; + } } while (false); - // delete, line 143 - slice_del(); - break; - case 2: - // (, line 151 - // delete, line 151 - slice_del(); - break; - } - return true; - } + return true; + } - private boolean r_noun() { + private boolean r_reflexive() { int among_var; - // (, line 159 - // [, line 160 - ket = cursor; - // substring, line 160 - among_var = find_among_b(a_5, 36); - if (among_var == 0) - { - return false; - } - // ], line 160 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 167 - // delete, line 167 - slice_del(); - break; - } - return true; - } + // (, line 128 + // [, line 129 + ket = cursor; + // substring, line 129 + among_var = find_among_b(a_3, 2); + if (among_var == 0) + { + return false; + } + // ], line 129 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 132 + // delete, line 132 + slice_del(); + break; + } + return true; + } - private boolean r_derivational() { + private boolean r_verb() { int among_var; - // (, line 175 - // [, line 176 - ket = cursor; - // substring, line 176 - among_var = find_among_b(a_6, 2); - if (among_var == 0) - { - return false; - } - // ], line 176 - bra = cursor; - // call R2, line 176 - if (!r_R2()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 179 - // delete, line 179 - slice_del(); - break; - } - return true; - } + int v_1; + // (, line 136 + // [, line 137 + ket = cursor; + // substring, line 137 + among_var = find_among_b(a_4, 46); + if (among_var == 0) + { + return false; + } + // ], line 137 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 143 + // or, line 143 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // literal, line 143 + if (!(eq_s_b(1, "\u0430"))) + { + break lab1; + } + break lab0; + } while (false); + cursor = limit - v_1; + // literal, line 143 + if (!(eq_s_b(1, "\u044F"))) + { + return false; + } + } while (false); + // delete, line 143 + slice_del(); + break; + case 2: + // (, line 151 + // delete, line 151 + slice_del(); + break; + } + return true; + } - private boolean r_tidy_up() { + private boolean r_noun() { int among_var; - // (, line 183 - // [, line 184 - ket = cursor; - // substring, line 184 - among_var = find_among_b(a_7, 4); - if (among_var == 0) - { - return false; - } - // ], line 184 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 188 - // delete, line 188 - slice_del(); - // [, line 189 + // (, line 159 + // [, line 160 ket = cursor; - // literal, line 189 - if (!(eq_s_b(1, "\u043D"))) + // substring, line 160 + among_var = find_among_b(a_5, 36); + if (among_var == 0) { return false; } - // ], line 189 + // ], line 160 bra = cursor; - // literal, line 189 - if (!(eq_s_b(1, "\u043D"))) + switch(among_var) { + case 0: + return false; + case 1: + // (, line 167 + // delete, line 167 + slice_del(); + break; + } + return true; + } + + private boolean r_derivational() { + int among_var; + // (, line 175 + // [, line 176 + ket = cursor; + // substring, line 176 + among_var = find_among_b(a_6, 2); + if (among_var == 0) { return false; } - // delete, line 189 - slice_del(); - break; - case 2: - // (, line 192 - // literal, line 192 - if (!(eq_s_b(1, "\u043D"))) + // ], line 176 + bra = cursor; + // call R2, line 176 + if (!r_R2()) { return false; } - // delete, line 192 - slice_del(); - break; - case 3: - // (, line 194 - // delete, line 194 - slice_del(); - break; - } - return true; - } + switch(among_var) { + case 0: + return false; + case 1: + // (, line 179 + // delete, line 179 + slice_del(); + break; + } + return true; + } - public boolean stem() { + private boolean r_tidy_up() { + int among_var; + // (, line 183 + // [, line 184 + ket = cursor; + // substring, line 184 + among_var = find_among_b(a_7, 4); + if (among_var == 0) + { + return false; + } + // ], line 184 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 188 + // delete, line 188 + slice_del(); + // [, line 189 + ket = cursor; + // literal, line 189 + if (!(eq_s_b(1, "\u043D"))) + { + return false; + } + // ], line 189 + bra = cursor; + // literal, line 189 + if (!(eq_s_b(1, "\u043D"))) + { + return false; + } + // delete, line 189 + slice_del(); + break; + case 2: + // (, line 192 + // literal, line 192 + if (!(eq_s_b(1, "\u043D"))) + { + return false; + } + // delete, line 192 + slice_del(); + break; + case 3: + // (, line 194 + // delete, line 194 + slice_del(); + break; + } + return true; + } + + @Override + public boolean stem() { int v_1; int v_2; int v_3; @@ -600,128 +608,140 @@ int v_8; int v_9; int v_10; - // (, line 199 - // do, line 201 - v_1 = cursor; - lab0: do { - // call mark_regions, line 201 - if (!r_mark_regions()) - { - break lab0; - } - } while (false); - cursor = v_1; - // backwards, line 202 - limit_backward = cursor; cursor = limit; - // setlimit, line 202 - v_2 = limit - cursor; - // tomark, line 202 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_3 = limit_backward; - limit_backward = cursor; - cursor = limit - v_2; - // (, line 202 - // do, line 203 - v_4 = limit - cursor; - lab1: do { - // (, line 203 - // or, line 204 - lab2: do { - v_5 = limit - cursor; - lab3: do { - // call perfective_gerund, line 204 - if (!r_perfective_gerund()) + // (, line 199 + // do, line 201 + v_1 = cursor; + lab0: do { + // call mark_regions, line 201 + if (!r_mark_regions()) { - break lab3; + break lab0; } - break lab2; } while (false); - cursor = limit - v_5; - // (, line 205 - // try, line 205 - v_6 = limit - cursor; - lab4: do { - // call reflexive, line 205 - if (!r_reflexive()) + cursor = v_1; + // backwards, line 202 + limit_backward = cursor; cursor = limit; + // setlimit, line 202 + v_2 = limit - cursor; + // tomark, line 202 + if (cursor < I_pV) + { + return false; + } + cursor = I_pV; + v_3 = limit_backward; + limit_backward = cursor; + cursor = limit - v_2; + // (, line 202 + // do, line 203 + v_4 = limit - cursor; + lab1: do { + // (, line 203 + // or, line 204 + lab2: do { + v_5 = limit - cursor; + lab3: do { + // call perfective_gerund, line 204 + if (!r_perfective_gerund()) + { + break lab3; + } + break lab2; + } while (false); + cursor = limit - v_5; + // (, line 205 + // try, line 205 + v_6 = limit - cursor; + lab4: do { + // call reflexive, line 205 + if (!r_reflexive()) + { + cursor = limit - v_6; + break lab4; + } + } while (false); + // or, line 206 + lab5: do { + v_7 = limit - cursor; + lab6: do { + // call adjectival, line 206 + if (!r_adjectival()) + { + break lab6; + } + break lab5; + } while (false); + cursor = limit - v_7; + lab7: do { + // call verb, line 206 + if (!r_verb()) + { + break lab7; + } + break lab5; + } while (false); + cursor = limit - v_7; + // call noun, line 206 + if (!r_noun()) + { + break lab1; + } + } while (false); + } while (false); + } while (false); + cursor = limit - v_4; + // try, line 209 + v_8 = limit - cursor; + lab8: do { + // (, line 209 + // [, line 209 + ket = cursor; + // literal, line 209 + if (!(eq_s_b(1, "\u0438"))) { - cursor = limit - v_6; - break lab4; + cursor = limit - v_8; + break lab8; } + // ], line 209 + bra = cursor; + // delete, line 209 + slice_del(); } while (false); - // or, line 206 - lab5: do { - v_7 = limit - cursor; - lab6: do { - // call adjectival, line 206 - if (!r_adjectival()) - { - break lab6; - } - break lab5; - } while (false); - cursor = limit - v_7; - lab7: do { - // call verb, line 206 - if (!r_verb()) - { - break lab7; - } - break lab5; - } while (false); - cursor = limit - v_7; - // call noun, line 206 - if (!r_noun()) + // do, line 212 + v_9 = limit - cursor; + lab9: do { + // call derivational, line 212 + if (!r_derivational()) { - break lab1; + break lab9; } } while (false); - } while (false); - } while (false); - cursor = limit - v_4; - // try, line 209 - v_8 = limit - cursor; - lab8: do { - // (, line 209 - // [, line 209 - ket = cursor; - // literal, line 209 - if (!(eq_s_b(1, "\u0438"))) - { - cursor = limit - v_8; - break lab8; + cursor = limit - v_9; + // do, line 213 + v_10 = limit - cursor; + lab10: do { + // call tidy_up, line 213 + if (!r_tidy_up()) + { + break lab10; + } + } while (false); + cursor = limit - v_10; + limit_backward = v_3; + cursor = limit_backward; return true; } - // ], line 209 - bra = cursor; - // delete, line 209 - slice_del(); - } while (false); - // do, line 212 - v_9 = limit - cursor; - lab9: do { - // call derivational, line 212 - if (!r_derivational()) - { - break lab9; - } - } while (false); - cursor = limit - v_9; - // do, line 213 - v_10 = limit - cursor; - lab10: do { - // call tidy_up, line 213 - if (!r_tidy_up()) - { - break lab10; - } - } while (false); - cursor = limit - v_10; - limit_backward = v_3; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof RussianStemmer; } + @Override + public int hashCode() { + return RussianStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/SpanishStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/SpanishStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/SpanishStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/SpanishStemmer.java 16 Dec 2014 11:31:46 -0000 1.1.2.1 @@ -1,1182 +1,1202 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class SpanishStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "", -1, 6, "", this), - new Among ( "\u00E1", 0, 1, "", this), - new Among ( "\u00E9", 0, 2, "", this), - new Among ( "\u00ED", 0, 3, "", this), - new Among ( "\u00F3", 0, 4, "", this), - new Among ( "\u00FA", 0, 5, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "la", -1, -1, "", this), - new Among ( "sela", 0, -1, "", this), - new Among ( "le", -1, -1, "", this), - new Among ( "me", -1, -1, "", this), - new Among ( "se", -1, -1, "", this), - new Among ( "lo", -1, -1, "", this), - new Among ( "selo", 5, -1, "", this), - new Among ( "las", -1, -1, "", this), - new Among ( "selas", 7, -1, "", this), - new Among ( "les", -1, -1, "", this), - new Among ( "los", -1, -1, "", this), - new Among ( "selos", 10, -1, "", this), - new Among ( "nos", -1, -1, "", this) - }; + private final static SpanishStemmer methodObject = new SpanishStemmer (); - private Among a_2[] = { - new Among ( "ando", -1, 6, "", this), - new Among ( "iendo", -1, 6, "", this), - new Among ( "yendo", -1, 7, "", this), - new Among ( "\u00E1ndo", -1, 2, "", this), - new Among ( "i\u00E9ndo", -1, 1, "", this), - new Among ( "ar", -1, 6, "", this), - new Among ( "er", -1, 6, "", this), - new Among ( "ir", -1, 6, "", this), - new Among ( "\u00E1r", -1, 3, "", this), - new Among ( "\u00E9r", -1, 4, "", this), - new Among ( "\u00EDr", -1, 5, "", this) - }; + private final static Among a_0[] = { + new Among ( "", -1, 6, "", methodObject ), + new Among ( "\u00E1", 0, 1, "", methodObject ), + new Among ( "\u00E9", 0, 2, "", methodObject ), + new Among ( "\u00ED", 0, 3, "", methodObject ), + new Among ( "\u00F3", 0, 4, "", methodObject ), + new Among ( "\u00FA", 0, 5, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "ic", -1, -1, "", this), - new Among ( "ad", -1, -1, "", this), - new Among ( "os", -1, -1, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_1[] = { + new Among ( "la", -1, -1, "", methodObject ), + new Among ( "sela", 0, -1, "", methodObject ), + new Among ( "le", -1, -1, "", methodObject ), + new Among ( "me", -1, -1, "", methodObject ), + new Among ( "se", -1, -1, "", methodObject ), + new Among ( "lo", -1, -1, "", methodObject ), + new Among ( "selo", 5, -1, "", methodObject ), + new Among ( "las", -1, -1, "", methodObject ), + new Among ( "selas", 7, -1, "", methodObject ), + new Among ( "les", -1, -1, "", methodObject ), + new Among ( "los", -1, -1, "", methodObject ), + new Among ( "selos", 10, -1, "", methodObject ), + new Among ( "nos", -1, -1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "able", -1, 1, "", this), - new Among ( "ible", -1, 1, "", this), - new Among ( "ante", -1, 1, "", this) - }; + private final static Among a_2[] = { + new Among ( "ando", -1, 6, "", methodObject ), + new Among ( "iendo", -1, 6, "", methodObject ), + new Among ( "yendo", -1, 7, "", methodObject ), + new Among ( "\u00E1ndo", -1, 2, "", methodObject ), + new Among ( "i\u00E9ndo", -1, 1, "", methodObject ), + new Among ( "ar", -1, 6, "", methodObject ), + new Among ( "er", -1, 6, "", methodObject ), + new Among ( "ir", -1, 6, "", methodObject ), + new Among ( "\u00E1r", -1, 3, "", methodObject ), + new Among ( "\u00E9r", -1, 4, "", methodObject ), + new Among ( "\u00EDr", -1, 5, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "ic", -1, 1, "", this), - new Among ( "abil", -1, 1, "", this), - new Among ( "iv", -1, 1, "", this) - }; + private final static Among a_3[] = { + new Among ( "ic", -1, -1, "", methodObject ), + new Among ( "ad", -1, -1, "", methodObject ), + new Among ( "os", -1, -1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "ica", -1, 1, "", this), - new Among ( "ancia", -1, 2, "", this), - new Among ( "encia", -1, 5, "", this), - new Among ( "adora", -1, 2, "", this), - new Among ( "osa", -1, 1, "", this), - new Among ( "ista", -1, 1, "", this), - new Among ( "iva", -1, 9, "", this), - new Among ( "anza", -1, 1, "", this), - new Among ( "log\u00EDa", -1, 3, "", this), - new Among ( "idad", -1, 8, "", this), - new Among ( "able", -1, 1, "", this), - new Among ( "ible", -1, 1, "", this), - new Among ( "ante", -1, 2, "", this), - new Among ( "mente", -1, 7, "", this), - new Among ( "amente", 13, 6, "", this), - new Among ( "aci\u00F3n", -1, 2, "", this), - new Among ( "uci\u00F3n", -1, 4, "", this), - new Among ( "ico", -1, 1, "", this), - new Among ( "ismo", -1, 1, "", this), - new Among ( "oso", -1, 1, "", this), - new Among ( "amiento", -1, 1, "", this), - new Among ( "imiento", -1, 1, "", this), - new Among ( "ivo", -1, 9, "", this), - new Among ( "ador", -1, 2, "", this), - new Among ( "icas", -1, 1, "", this), - new Among ( "ancias", -1, 2, "", this), - new Among ( "encias", -1, 5, "", this), - new Among ( "adoras", -1, 2, "", this), - new Among ( "osas", -1, 1, "", this), - new Among ( "istas", -1, 1, "", this), - new Among ( "ivas", -1, 9, "", this), - new Among ( "anzas", -1, 1, "", this), - new Among ( "log\u00EDas", -1, 3, "", this), - new Among ( "idades", -1, 8, "", this), - new Among ( "ables", -1, 1, "", this), - new Among ( "ibles", -1, 1, "", this), - new Among ( "aciones", -1, 2, "", this), - new Among ( "uciones", -1, 4, "", this), - new Among ( "adores", -1, 2, "", this), - new Among ( "antes", -1, 2, "", this), - new Among ( "icos", -1, 1, "", this), - new Among ( "ismos", -1, 1, "", this), - new Among ( "osos", -1, 1, "", this), - new Among ( "amientos", -1, 1, "", this), - new Among ( "imientos", -1, 1, "", this), - new Among ( "ivos", -1, 9, "", this) - }; + private final static Among a_4[] = { + new Among ( "able", -1, 1, "", methodObject ), + new Among ( "ible", -1, 1, "", methodObject ), + new Among ( "ante", -1, 1, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "ya", -1, 1, "", this), - new Among ( "ye", -1, 1, "", this), - new Among ( "yan", -1, 1, "", this), - new Among ( "yen", -1, 1, "", this), - new Among ( "yeron", -1, 1, "", this), - new Among ( "yendo", -1, 1, "", this), - new Among ( "yo", -1, 1, "", this), - new Among ( "yas", -1, 1, "", this), - new Among ( "yes", -1, 1, "", this), - new Among ( "yais", -1, 1, "", this), - new Among ( "yamos", -1, 1, "", this), - new Among ( "y\u00F3", -1, 1, "", this) - }; + private final static Among a_5[] = { + new Among ( "ic", -1, 1, "", methodObject ), + new Among ( "abil", -1, 1, "", methodObject ), + new Among ( "iv", -1, 1, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "aba", -1, 2, "", this), - new Among ( "ada", -1, 2, "", this), - new Among ( "ida", -1, 2, "", this), - new Among ( "ara", -1, 2, "", this), - new Among ( "iera", -1, 2, "", this), - new Among ( "\u00EDa", -1, 2, "", this), - new Among ( "ar\u00EDa", 5, 2, "", this), - new Among ( "er\u00EDa", 5, 2, "", this), - new Among ( "ir\u00EDa", 5, 2, "", this), - new Among ( "ad", -1, 2, "", this), - new Among ( "ed", -1, 2, "", this), - new Among ( "id", -1, 2, "", this), - new Among ( "ase", -1, 2, "", this), - new Among ( "iese", -1, 2, "", this), - new Among ( "aste", -1, 2, "", this), - new Among ( "iste", -1, 2, "", this), - new Among ( "an", -1, 2, "", this), - new Among ( "aban", 16, 2, "", this), - new Among ( "aran", 16, 2, "", this), - new Among ( "ieran", 16, 2, "", this), - new Among ( "\u00EDan", 16, 2, "", this), - new Among ( "ar\u00EDan", 20, 2, "", this), - new Among ( "er\u00EDan", 20, 2, "", this), - new Among ( "ir\u00EDan", 20, 2, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "asen", 24, 2, "", this), - new Among ( "iesen", 24, 2, "", this), - new Among ( "aron", -1, 2, "", this), - new Among ( "ieron", -1, 2, "", this), - new Among ( "ar\u00E1n", -1, 2, "", this), - new Among ( "er\u00E1n", -1, 2, "", this), - new Among ( "ir\u00E1n", -1, 2, "", this), - new Among ( "ado", -1, 2, "", this), - new Among ( "ido", -1, 2, "", this), - new Among ( "ando", -1, 2, "", this), - new Among ( "iendo", -1, 2, "", this), - new Among ( "ar", -1, 2, "", this), - new Among ( "er", -1, 2, "", this), - new Among ( "ir", -1, 2, "", this), - new Among ( "as", -1, 2, "", this), - new Among ( "abas", 39, 2, "", this), - new Among ( "adas", 39, 2, "", this), - new Among ( "idas", 39, 2, "", this), - new Among ( "aras", 39, 2, "", this), - new Among ( "ieras", 39, 2, "", this), - new Among ( "\u00EDas", 39, 2, "", this), - new Among ( "ar\u00EDas", 45, 2, "", this), - new Among ( "er\u00EDas", 45, 2, "", this), - new Among ( "ir\u00EDas", 45, 2, "", this), - new Among ( "es", -1, 1, "", this), - new Among ( "ases", 49, 2, "", this), - new Among ( "ieses", 49, 2, "", this), - new Among ( "abais", -1, 2, "", this), - new Among ( "arais", -1, 2, "", this), - new Among ( "ierais", -1, 2, "", this), - new Among ( "\u00EDais", -1, 2, "", this), - new Among ( "ar\u00EDais", 55, 2, "", this), - new Among ( "er\u00EDais", 55, 2, "", this), - new Among ( "ir\u00EDais", 55, 2, "", this), - new Among ( "aseis", -1, 2, "", this), - new Among ( "ieseis", -1, 2, "", this), - new Among ( "asteis", -1, 2, "", this), - new Among ( "isteis", -1, 2, "", this), - new Among ( "\u00E1is", -1, 2, "", this), - new Among ( "\u00E9is", -1, 1, "", this), - new Among ( "ar\u00E9is", 64, 2, "", this), - new Among ( "er\u00E9is", 64, 2, "", this), - new Among ( "ir\u00E9is", 64, 2, "", this), - new Among ( "ados", -1, 2, "", this), - new Among ( "idos", -1, 2, "", this), - new Among ( "amos", -1, 2, "", this), - new Among ( "\u00E1bamos", 70, 2, "", this), - new Among ( "\u00E1ramos", 70, 2, "", this), - new Among ( "i\u00E9ramos", 70, 2, "", this), - new Among ( "\u00EDamos", 70, 2, "", this), - new Among ( "ar\u00EDamos", 74, 2, "", this), - new Among ( "er\u00EDamos", 74, 2, "", this), - new Among ( "ir\u00EDamos", 74, 2, "", this), - new Among ( "emos", -1, 1, "", this), - new Among ( "aremos", 78, 2, "", this), - new Among ( "eremos", 78, 2, "", this), - new Among ( "iremos", 78, 2, "", this), - new Among ( "\u00E1semos", 78, 2, "", this), - new Among ( "i\u00E9semos", 78, 2, "", this), - new Among ( "imos", -1, 2, "", this), - new Among ( "ar\u00E1s", -1, 2, "", this), - new Among ( "er\u00E1s", -1, 2, "", this), - new Among ( "ir\u00E1s", -1, 2, "", this), - new Among ( "\u00EDs", -1, 2, "", this), - new Among ( "ar\u00E1", -1, 2, "", this), - new Among ( "er\u00E1", -1, 2, "", this), - new Among ( "ir\u00E1", -1, 2, "", this), - new Among ( "ar\u00E9", -1, 2, "", this), - new Among ( "er\u00E9", -1, 2, "", this), - new Among ( "ir\u00E9", -1, 2, "", this), - new Among ( "i\u00F3", -1, 2, "", this) - }; + private final static Among a_6[] = { + new Among ( "ica", -1, 1, "", methodObject ), + new Among ( "ancia", -1, 2, "", methodObject ), + new Among ( "encia", -1, 5, "", methodObject ), + new Among ( "adora", -1, 2, "", methodObject ), + new Among ( "osa", -1, 1, "", methodObject ), + new Among ( "ista", -1, 1, "", methodObject ), + new Among ( "iva", -1, 9, "", methodObject ), + new Among ( "anza", -1, 1, "", methodObject ), + new Among ( "log\u00EDa", -1, 3, "", methodObject ), + new Among ( "idad", -1, 8, "", methodObject ), + new Among ( "able", -1, 1, "", methodObject ), + new Among ( "ible", -1, 1, "", methodObject ), + new Among ( "ante", -1, 2, "", methodObject ), + new Among ( "mente", -1, 7, "", methodObject ), + new Among ( "amente", 13, 6, "", methodObject ), + new Among ( "aci\u00F3n", -1, 2, "", methodObject ), + new Among ( "uci\u00F3n", -1, 4, "", methodObject ), + new Among ( "ico", -1, 1, "", methodObject ), + new Among ( "ismo", -1, 1, "", methodObject ), + new Among ( "oso", -1, 1, "", methodObject ), + new Among ( "amiento", -1, 1, "", methodObject ), + new Among ( "imiento", -1, 1, "", methodObject ), + new Among ( "ivo", -1, 9, "", methodObject ), + new Among ( "ador", -1, 2, "", methodObject ), + new Among ( "icas", -1, 1, "", methodObject ), + new Among ( "ancias", -1, 2, "", methodObject ), + new Among ( "encias", -1, 5, "", methodObject ), + new Among ( "adoras", -1, 2, "", methodObject ), + new Among ( "osas", -1, 1, "", methodObject ), + new Among ( "istas", -1, 1, "", methodObject ), + new Among ( "ivas", -1, 9, "", methodObject ), + new Among ( "anzas", -1, 1, "", methodObject ), + new Among ( "log\u00EDas", -1, 3, "", methodObject ), + new Among ( "idades", -1, 8, "", methodObject ), + new Among ( "ables", -1, 1, "", methodObject ), + new Among ( "ibles", -1, 1, "", methodObject ), + new Among ( "aciones", -1, 2, "", methodObject ), + new Among ( "uciones", -1, 4, "", methodObject ), + new Among ( "adores", -1, 2, "", methodObject ), + new Among ( "antes", -1, 2, "", methodObject ), + new Among ( "icos", -1, 1, "", methodObject ), + new Among ( "ismos", -1, 1, "", methodObject ), + new Among ( "osos", -1, 1, "", methodObject ), + new Among ( "amientos", -1, 1, "", methodObject ), + new Among ( "imientos", -1, 1, "", methodObject ), + new Among ( "ivos", -1, 9, "", methodObject ) + }; - private Among a_9[] = { - new Among ( "a", -1, 1, "", this), - new Among ( "e", -1, 2, "", this), - new Among ( "o", -1, 1, "", this), - new Among ( "os", -1, 1, "", this), - new Among ( "\u00E1", -1, 1, "", this), - new Among ( "\u00E9", -1, 2, "", this), - new Among ( "\u00ED", -1, 1, "", this), - new Among ( "\u00F3", -1, 1, "", this) - }; + private final static Among a_7[] = { + new Among ( "ya", -1, 1, "", methodObject ), + new Among ( "ye", -1, 1, "", methodObject ), + new Among ( "yan", -1, 1, "", methodObject ), + new Among ( "yen", -1, 1, "", methodObject ), + new Among ( "yeron", -1, 1, "", methodObject ), + new Among ( "yendo", -1, 1, "", methodObject ), + new Among ( "yo", -1, 1, "", methodObject ), + new Among ( "yas", -1, 1, "", methodObject ), + new Among ( "yes", -1, 1, "", methodObject ), + new Among ( "yais", -1, 1, "", methodObject ), + new Among ( "yamos", -1, 1, "", methodObject ), + new Among ( "y\u00F3", -1, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; + private final static Among a_8[] = { + new Among ( "aba", -1, 2, "", methodObject ), + new Among ( "ada", -1, 2, "", methodObject ), + new Among ( "ida", -1, 2, "", methodObject ), + new Among ( "ara", -1, 2, "", methodObject ), + new Among ( "iera", -1, 2, "", methodObject ), + new Among ( "\u00EDa", -1, 2, "", methodObject ), + new Among ( "ar\u00EDa", 5, 2, "", methodObject ), + new Among ( "er\u00EDa", 5, 2, "", methodObject ), + new Among ( "ir\u00EDa", 5, 2, "", methodObject ), + new Among ( "ad", -1, 2, "", methodObject ), + new Among ( "ed", -1, 2, "", methodObject ), + new Among ( "id", -1, 2, "", methodObject ), + new Among ( "ase", -1, 2, "", methodObject ), + new Among ( "iese", -1, 2, "", methodObject ), + new Among ( "aste", -1, 2, "", methodObject ), + new Among ( "iste", -1, 2, "", methodObject ), + new Among ( "an", -1, 2, "", methodObject ), + new Among ( "aban", 16, 2, "", methodObject ), + new Among ( "aran", 16, 2, "", methodObject ), + new Among ( "ieran", 16, 2, "", methodObject ), + new Among ( "\u00EDan", 16, 2, "", methodObject ), + new Among ( "ar\u00EDan", 20, 2, "", methodObject ), + new Among ( "er\u00EDan", 20, 2, "", methodObject ), + new Among ( "ir\u00EDan", 20, 2, "", methodObject ), + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "asen", 24, 2, "", methodObject ), + new Among ( "iesen", 24, 2, "", methodObject ), + new Among ( "aron", -1, 2, "", methodObject ), + new Among ( "ieron", -1, 2, "", methodObject ), + new Among ( "ar\u00E1n", -1, 2, "", methodObject ), + new Among ( "er\u00E1n", -1, 2, "", methodObject ), + new Among ( "ir\u00E1n", -1, 2, "", methodObject ), + new Among ( "ado", -1, 2, "", methodObject ), + new Among ( "ido", -1, 2, "", methodObject ), + new Among ( "ando", -1, 2, "", methodObject ), + new Among ( "iendo", -1, 2, "", methodObject ), + new Among ( "ar", -1, 2, "", methodObject ), + new Among ( "er", -1, 2, "", methodObject ), + new Among ( "ir", -1, 2, "", methodObject ), + new Among ( "as", -1, 2, "", methodObject ), + new Among ( "abas", 39, 2, "", methodObject ), + new Among ( "adas", 39, 2, "", methodObject ), + new Among ( "idas", 39, 2, "", methodObject ), + new Among ( "aras", 39, 2, "", methodObject ), + new Among ( "ieras", 39, 2, "", methodObject ), + new Among ( "\u00EDas", 39, 2, "", methodObject ), + new Among ( "ar\u00EDas", 45, 2, "", methodObject ), + new Among ( "er\u00EDas", 45, 2, "", methodObject ), + new Among ( "ir\u00EDas", 45, 2, "", methodObject ), + new Among ( "es", -1, 1, "", methodObject ), + new Among ( "ases", 49, 2, "", methodObject ), + new Among ( "ieses", 49, 2, "", methodObject ), + new Among ( "abais", -1, 2, "", methodObject ), + new Among ( "arais", -1, 2, "", methodObject ), + new Among ( "ierais", -1, 2, "", methodObject ), + new Among ( "\u00EDais", -1, 2, "", methodObject ), + new Among ( "ar\u00EDais", 55, 2, "", methodObject ), + new Among ( "er\u00EDais", 55, 2, "", methodObject ), + new Among ( "ir\u00EDais", 55, 2, "", methodObject ), + new Among ( "aseis", -1, 2, "", methodObject ), + new Among ( "ieseis", -1, 2, "", methodObject ), + new Among ( "asteis", -1, 2, "", methodObject ), + new Among ( "isteis", -1, 2, "", methodObject ), + new Among ( "\u00E1is", -1, 2, "", methodObject ), + new Among ( "\u00E9is", -1, 1, "", methodObject ), + new Among ( "ar\u00E9is", 64, 2, "", methodObject ), + new Among ( "er\u00E9is", 64, 2, "", methodObject ), + new Among ( "ir\u00E9is", 64, 2, "", methodObject ), + new Among ( "ados", -1, 2, "", methodObject ), + new Among ( "idos", -1, 2, "", methodObject ), + new Among ( "amos", -1, 2, "", methodObject ), + new Among ( "\u00E1bamos", 70, 2, "", methodObject ), + new Among ( "\u00E1ramos", 70, 2, "", methodObject ), + new Among ( "i\u00E9ramos", 70, 2, "", methodObject ), + new Among ( "\u00EDamos", 70, 2, "", methodObject ), + new Among ( "ar\u00EDamos", 74, 2, "", methodObject ), + new Among ( "er\u00EDamos", 74, 2, "", methodObject ), + new Among ( "ir\u00EDamos", 74, 2, "", methodObject ), + new Among ( "emos", -1, 1, "", methodObject ), + new Among ( "aremos", 78, 2, "", methodObject ), + new Among ( "eremos", 78, 2, "", methodObject ), + new Among ( "iremos", 78, 2, "", methodObject ), + new Among ( "\u00E1semos", 78, 2, "", methodObject ), + new Among ( "i\u00E9semos", 78, 2, "", methodObject ), + new Among ( "imos", -1, 2, "", methodObject ), + new Among ( "ar\u00E1s", -1, 2, "", methodObject ), + new Among ( "er\u00E1s", -1, 2, "", methodObject ), + new Among ( "ir\u00E1s", -1, 2, "", methodObject ), + new Among ( "\u00EDs", -1, 2, "", methodObject ), + new Among ( "ar\u00E1", -1, 2, "", methodObject ), + new Among ( "er\u00E1", -1, 2, "", methodObject ), + new Among ( "ir\u00E1", -1, 2, "", methodObject ), + new Among ( "ar\u00E9", -1, 2, "", methodObject ), + new Among ( "er\u00E9", -1, 2, "", methodObject ), + new Among ( "ir\u00E9", -1, 2, "", methodObject ), + new Among ( "i\u00F3", -1, 2, "", methodObject ) + }; + private final static Among a_9[] = { + new Among ( "a", -1, 1, "", methodObject ), + new Among ( "e", -1, 2, "", methodObject ), + new Among ( "o", -1, 1, "", methodObject ), + new Among ( "os", -1, 1, "", methodObject ), + new Among ( "\u00E1", -1, 1, "", methodObject ), + new Among ( "\u00E9", -1, 2, "", methodObject ), + new Among ( "\u00ED", -1, 1, "", methodObject ), + new Among ( "\u00F3", -1, 1, "", methodObject ) + }; + + private static final char g_v[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 17, 4, 10 }; + private int I_p2; private int I_p1; private int I_pV; - private void copy_from(SpanishStemmer other) { - I_p2 = other.I_p2; - I_p1 = other.I_p1; - I_pV = other.I_pV; - super.copy_from(other); - } + private void copy_from(SpanishStemmer other) { + I_p2 = other.I_p2; + I_p1 = other.I_p1; + I_pV = other.I_pV; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; int v_3; int v_6; int v_8; - // (, line 31 - I_pV = limit; - I_p1 = limit; - I_p2 = limit; - // do, line 37 - v_1 = cursor; - lab0: do { - // (, line 37 - // or, line 39 - lab1: do { - v_2 = cursor; - lab2: do { - // (, line 38 - if (!(in_grouping(g_v, 97, 252))) - { - break lab2; - } - // or, line 38 - lab3: do { - v_3 = cursor; - lab4: do { + // (, line 31 + I_pV = limit; + I_p1 = limit; + I_p2 = limit; + // do, line 37 + v_1 = cursor; + lab0: do { + // (, line 37 + // or, line 39 + lab1: do { + v_2 = cursor; + lab2: do { // (, line 38 - if (!(out_grouping(g_v, 97, 252))) + if (!(in_grouping(g_v, 97, 252))) { - break lab4; + break lab2; } - // gopast, line 38 - golab5: while(true) - { - lab6: do { - if (!(in_grouping(g_v, 97, 252))) + // or, line 38 + lab3: do { + v_3 = cursor; + lab4: do { + // (, line 38 + if (!(out_grouping(g_v, 97, 252))) { - break lab6; + break lab4; } - break golab5; + // gopast, line 38 + golab5: while(true) + { + lab6: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab6; + } + break golab5; + } while (false); + if (cursor >= limit) + { + break lab4; + } + cursor++; + } + break lab3; } while (false); - if (cursor >= limit) + cursor = v_3; + // (, line 38 + if (!(in_grouping(g_v, 97, 252))) { - break lab4; + break lab2; } - cursor++; - } - break lab3; + // gopast, line 38 + golab7: while(true) + { + lab8: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab8; + } + break golab7; + } while (false); + if (cursor >= limit) + { + break lab2; + } + cursor++; + } + } while (false); + break lab1; } while (false); - cursor = v_3; - // (, line 38 - if (!(in_grouping(g_v, 97, 252))) + cursor = v_2; + // (, line 40 + if (!(out_grouping(g_v, 97, 252))) { - break lab2; + break lab0; } - // gopast, line 38 - golab7: while(true) - { - lab8: do { + // or, line 40 + lab9: do { + v_6 = cursor; + lab10: do { + // (, line 40 if (!(out_grouping(g_v, 97, 252))) { - break lab8; + break lab10; } - break golab7; + // gopast, line 40 + golab11: while(true) + { + lab12: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab12; + } + break golab11; + } while (false); + if (cursor >= limit) + { + break lab10; + } + cursor++; + } + break lab9; } while (false); + cursor = v_6; + // (, line 40 + if (!(in_grouping(g_v, 97, 252))) + { + break lab0; + } + // next, line 40 if (cursor >= limit) { - break lab2; + break lab0; } cursor++; - } + } while (false); } while (false); - break lab1; + // setmark pV, line 41 + I_pV = cursor; } while (false); - cursor = v_2; - // (, line 40 - if (!(out_grouping(g_v, 97, 252))) - { - break lab0; - } - // or, line 40 - lab9: do { - v_6 = cursor; - lab10: do { - // (, line 40 - if (!(out_grouping(g_v, 97, 252))) + cursor = v_1; + // do, line 43 + v_8 = cursor; + lab13: do { + // (, line 43 + // gopast, line 44 + golab14: while(true) + { + lab15: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab15; + } + break golab14; + } while (false); + if (cursor >= limit) { - break lab10; + break lab13; } - // gopast, line 40 - golab11: while(true) - { - lab12: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab12; - } - break golab11; - } while (false); - if (cursor >= limit) + cursor++; + } + // gopast, line 44 + golab16: while(true) + { + lab17: do { + if (!(out_grouping(g_v, 97, 252))) { - break lab10; + break lab17; } - cursor++; + break golab16; + } while (false); + if (cursor >= limit) + { + break lab13; } - break lab9; - } while (false); - cursor = v_6; - // (, line 40 - if (!(in_grouping(g_v, 97, 252))) - { - break lab0; + cursor++; } - // next, line 40 - if (cursor >= limit) + // setmark p1, line 44 + I_p1 = cursor; + // gopast, line 45 + golab18: while(true) { - break lab0; + lab19: do { + if (!(in_grouping(g_v, 97, 252))) + { + break lab19; + } + break golab18; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - cursor++; - } while (false); - } while (false); - // setmark pV, line 41 - I_pV = cursor; - } while (false); - cursor = v_1; - // do, line 43 - v_8 = cursor; - lab13: do { - // (, line 43 - // gopast, line 44 - golab14: while(true) - { - lab15: do { - if (!(in_grouping(g_v, 97, 252))) + // gopast, line 45 + golab20: while(true) { - break lab15; + lab21: do { + if (!(out_grouping(g_v, 97, 252))) + { + break lab21; + } + break golab20; + } while (false); + if (cursor >= limit) + { + break lab13; + } + cursor++; } - break golab14; + // setmark p2, line 45 + I_p2 = cursor; } while (false); - if (cursor >= limit) + cursor = v_8; + return true; + } + + private boolean r_postlude() { + int among_var; + int v_1; + // repeat, line 49 + replab0: while(true) { - break lab13; + v_1 = cursor; + lab1: do { + // (, line 49 + // [, line 50 + bra = cursor; + // substring, line 50 + among_var = find_among(a_0, 6); + if (among_var == 0) + { + break lab1; + } + // ], line 50 + ket = cursor; + switch(among_var) { + case 0: + break lab1; + case 1: + // (, line 51 + // <-, line 51 + slice_from("a"); + break; + case 2: + // (, line 52 + // <-, line 52 + slice_from("e"); + break; + case 3: + // (, line 53 + // <-, line 53 + slice_from("i"); + break; + case 4: + // (, line 54 + // <-, line 54 + slice_from("o"); + break; + case 5: + // (, line 55 + // <-, line 55 + slice_from("u"); + break; + case 6: + // (, line 57 + // next, line 57 + if (cursor >= limit) + { + break lab1; + } + cursor++; + break; + } + continue replab0; + } while (false); + cursor = v_1; + break replab0; } - cursor++; + return true; } - // gopast, line 44 - golab16: while(true) - { - lab17: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab17; - } - break golab16; - } while (false); - if (cursor >= limit) + + private boolean r_RV() { + if (!(I_pV <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p1, line 44 - I_p1 = cursor; - // gopast, line 45 - golab18: while(true) - { - lab19: do { - if (!(in_grouping(g_v, 97, 252))) - { - break lab19; - } - break golab18; - } while (false); - if (cursor >= limit) + + private boolean r_R1() { + if (!(I_p1 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // gopast, line 45 - golab20: while(true) - { - lab21: do { - if (!(out_grouping(g_v, 97, 252))) - { - break lab21; - } - break golab20; - } while (false); - if (cursor >= limit) + + private boolean r_R2() { + if (!(I_p2 <= cursor)) { - break lab13; + return false; } - cursor++; + return true; } - // setmark p2, line 45 - I_p2 = cursor; - } while (false); - cursor = v_8; - return true; - } - private boolean r_postlude() { + private boolean r_attached_pronoun() { int among_var; - int v_1; - // repeat, line 49 - replab0: while(true) - { - v_1 = cursor; - lab1: do { - // (, line 49 - // [, line 50 + // (, line 67 + // [, line 68 + ket = cursor; + // substring, line 68 + if (find_among_b(a_1, 13) == 0) + { + return false; + } + // ], line 68 bra = cursor; - // substring, line 50 - among_var = find_among(a_0, 6); + // substring, line 72 + among_var = find_among_b(a_2, 11); if (among_var == 0) { - break lab1; + return false; } - // ], line 50 - ket = cursor; + // call RV, line 72 + if (!r_RV()) + { + return false; + } switch(among_var) { case 0: - break lab1; + return false; case 1: - // (, line 51 - // <-, line 51 - slice_from("a"); + // (, line 73 + // ], line 73 + bra = cursor; + // <-, line 73 + slice_from("iendo"); break; case 2: - // (, line 52 - // <-, line 52 - slice_from("e"); + // (, line 74 + // ], line 74 + bra = cursor; + // <-, line 74 + slice_from("ando"); break; case 3: - // (, line 53 - // <-, line 53 - slice_from("i"); + // (, line 75 + // ], line 75 + bra = cursor; + // <-, line 75 + slice_from("ar"); break; case 4: - // (, line 54 - // <-, line 54 - slice_from("o"); + // (, line 76 + // ], line 76 + bra = cursor; + // <-, line 76 + slice_from("er"); break; case 5: - // (, line 55 - // <-, line 55 - slice_from("u"); + // (, line 77 + // ], line 77 + bra = cursor; + // <-, line 77 + slice_from("ir"); break; case 6: - // (, line 57 - // next, line 57 - if (cursor >= limit) + // (, line 81 + // delete, line 81 + slice_del(); + break; + case 7: + // (, line 82 + // literal, line 82 + if (!(eq_s_b(1, "u"))) { - break lab1; + return false; } - cursor++; + // delete, line 82 + slice_del(); break; } - continue replab0; - } while (false); - cursor = v_1; - break replab0; - } - return true; - } + return true; + } - private boolean r_RV() { - if (!(I_pV <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R1() { - if (!(I_p1 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_R2() { - if (!(I_p2 <= cursor)) - { - return false; - } - return true; - } - - private boolean r_attached_pronoun() { + private boolean r_standard_suffix() { int among_var; - // (, line 67 - // [, line 68 - ket = cursor; - // substring, line 68 - if (find_among_b(a_1, 13) == 0) - { - return false; - } - // ], line 68 - bra = cursor; - // substring, line 72 - among_var = find_among_b(a_2, 11); - if (among_var == 0) - { - return false; - } - // call RV, line 72 - if (!r_RV()) - { - return false; - } - switch(among_var) { - case 0: - return false; - case 1: - // (, line 73 - // ], line 73 - bra = cursor; - // <-, line 73 - slice_from("iendo"); - break; - case 2: - // (, line 74 - // ], line 74 - bra = cursor; - // <-, line 74 - slice_from("ando"); - break; - case 3: - // (, line 75 - // ], line 75 - bra = cursor; - // <-, line 75 - slice_from("ar"); - break; - case 4: - // (, line 76 - // ], line 76 - bra = cursor; - // <-, line 76 - slice_from("er"); - break; - case 5: - // (, line 77 - // ], line 77 - bra = cursor; - // <-, line 77 - slice_from("ir"); - break; - case 6: - // (, line 81 - // delete, line 81 - slice_del(); - break; - case 7: - // (, line 82 - // literal, line 82 - if (!(eq_s_b(1, "u"))) - { - return false; - } - // delete, line 82 - slice_del(); - break; - } - return true; - } - - private boolean r_standard_suffix() { - int among_var; int v_1; int v_2; int v_3; int v_4; int v_5; - // (, line 86 - // [, line 87 - ket = cursor; - // substring, line 87 - among_var = find_among_b(a_6, 46); - if (among_var == 0) - { - return false; - } - // ], line 87 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 98 - // call R2, line 99 - if (!r_R2()) + // (, line 86 + // [, line 87 + ket = cursor; + // substring, line 87 + among_var = find_among_b(a_6, 46); + if (among_var == 0) { return false; } - // delete, line 99 - slice_del(); - break; - case 2: - // (, line 104 - // call R2, line 105 - if (!r_R2()) - { - return false; - } - // delete, line 105 - slice_del(); - // try, line 106 - v_1 = limit - cursor; - lab0: do { - // (, line 106 - // [, line 106 - ket = cursor; - // literal, line 106 - if (!(eq_s_b(2, "ic"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 106 - bra = cursor; - // call R2, line 106 - if (!r_R2()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 106 - slice_del(); - } while (false); - break; - case 3: - // (, line 110 - // call R2, line 111 - if (!r_R2()) - { - return false; - } - // <-, line 111 - slice_from("log"); - break; - case 4: - // (, line 114 - // call R2, line 115 - if (!r_R2()) - { - return false; - } - // <-, line 115 - slice_from("u"); - break; - case 5: - // (, line 118 - // call R2, line 119 - if (!r_R2()) - { - return false; - } - // <-, line 119 - slice_from("ente"); - break; - case 6: - // (, line 122 - // call R1, line 123 - if (!r_R1()) - { - return false; - } - // delete, line 123 - slice_del(); - // try, line 124 - v_2 = limit - cursor; - lab1: do { - // (, line 124 - // [, line 125 - ket = cursor; - // substring, line 125 - among_var = find_among_b(a_3, 4); - if (among_var == 0) - { - cursor = limit - v_2; - break lab1; - } - // ], line 125 - bra = cursor; - // call R2, line 125 - if (!r_R2()) - { - cursor = limit - v_2; - break lab1; - } - // delete, line 125 - slice_del(); - switch(among_var) { - case 0: - cursor = limit - v_2; - break lab1; - case 1: - // (, line 126 - // [, line 126 + // ], line 87 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 98 + // call R2, line 99 + if (!r_R2()) + { + return false; + } + // delete, line 99 + slice_del(); + break; + case 2: + // (, line 104 + // call R2, line 105 + if (!r_R2()) + { + return false; + } + // delete, line 105 + slice_del(); + // try, line 106 + v_1 = limit - cursor; + lab0: do { + // (, line 106 + // [, line 106 ket = cursor; - // literal, line 126 - if (!(eq_s_b(2, "at"))) + // literal, line 106 + if (!(eq_s_b(2, "ic"))) { + cursor = limit - v_1; + break lab0; + } + // ], line 106 + bra = cursor; + // call R2, line 106 + if (!r_R2()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 106 + slice_del(); + } while (false); + break; + case 3: + // (, line 110 + // call R2, line 111 + if (!r_R2()) + { + return false; + } + // <-, line 111 + slice_from("log"); + break; + case 4: + // (, line 114 + // call R2, line 115 + if (!r_R2()) + { + return false; + } + // <-, line 115 + slice_from("u"); + break; + case 5: + // (, line 118 + // call R2, line 119 + if (!r_R2()) + { + return false; + } + // <-, line 119 + slice_from("ente"); + break; + case 6: + // (, line 122 + // call R1, line 123 + if (!r_R1()) + { + return false; + } + // delete, line 123 + slice_del(); + // try, line 124 + v_2 = limit - cursor; + lab1: do { + // (, line 124 + // [, line 125 + ket = cursor; + // substring, line 125 + among_var = find_among_b(a_3, 4); + if (among_var == 0) + { cursor = limit - v_2; break lab1; } - // ], line 126 + // ], line 125 bra = cursor; - // call R2, line 126 + // call R2, line 125 if (!r_R2()) { cursor = limit - v_2; break lab1; } - // delete, line 126 + // delete, line 125 slice_del(); - break; - } - } while (false); - break; - case 7: - // (, line 134 - // call R2, line 135 - if (!r_R2()) - { - return false; - } - // delete, line 135 - slice_del(); - // try, line 136 - v_3 = limit - cursor; - lab2: do { - // (, line 136 - // [, line 137 - ket = cursor; - // substring, line 137 - among_var = find_among_b(a_4, 3); - if (among_var == 0) - { - cursor = limit - v_3; - break lab2; - } - // ], line 137 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_3; - break lab2; - case 1: - // (, line 140 - // call R2, line 140 - if (!r_R2()) + switch(among_var) { + case 0: + cursor = limit - v_2; + break lab1; + case 1: + // (, line 126 + // [, line 126 + ket = cursor; + // literal, line 126 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_2; + break lab1; + } + // ], line 126 + bra = cursor; + // call R2, line 126 + if (!r_R2()) + { + cursor = limit - v_2; + break lab1; + } + // delete, line 126 + slice_del(); + break; + } + } while (false); + break; + case 7: + // (, line 134 + // call R2, line 135 + if (!r_R2()) + { + return false; + } + // delete, line 135 + slice_del(); + // try, line 136 + v_3 = limit - cursor; + lab2: do { + // (, line 136 + // [, line 137 + ket = cursor; + // substring, line 137 + among_var = find_among_b(a_4, 3); + if (among_var == 0) { cursor = limit - v_3; break lab2; } - // delete, line 140 - slice_del(); - break; - } - } while (false); - break; - case 8: - // (, line 146 - // call R2, line 147 - if (!r_R2()) - { - return false; - } - // delete, line 147 - slice_del(); - // try, line 148 - v_4 = limit - cursor; - lab3: do { - // (, line 148 - // [, line 149 - ket = cursor; - // substring, line 149 - among_var = find_among_b(a_5, 3); - if (among_var == 0) - { - cursor = limit - v_4; - break lab3; - } - // ], line 149 - bra = cursor; - switch(among_var) { - case 0: - cursor = limit - v_4; - break lab3; - case 1: - // (, line 152 - // call R2, line 152 - if (!r_R2()) + // ], line 137 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_3; + break lab2; + case 1: + // (, line 140 + // call R2, line 140 + if (!r_R2()) + { + cursor = limit - v_3; + break lab2; + } + // delete, line 140 + slice_del(); + break; + } + } while (false); + break; + case 8: + // (, line 146 + // call R2, line 147 + if (!r_R2()) + { + return false; + } + // delete, line 147 + slice_del(); + // try, line 148 + v_4 = limit - cursor; + lab3: do { + // (, line 148 + // [, line 149 + ket = cursor; + // substring, line 149 + among_var = find_among_b(a_5, 3); + if (among_var == 0) { cursor = limit - v_4; break lab3; } - // delete, line 152 + // ], line 149 + bra = cursor; + switch(among_var) { + case 0: + cursor = limit - v_4; + break lab3; + case 1: + // (, line 152 + // call R2, line 152 + if (!r_R2()) + { + cursor = limit - v_4; + break lab3; + } + // delete, line 152 + slice_del(); + break; + } + } while (false); + break; + case 9: + // (, line 158 + // call R2, line 159 + if (!r_R2()) + { + return false; + } + // delete, line 159 + slice_del(); + // try, line 160 + v_5 = limit - cursor; + lab4: do { + // (, line 160 + // [, line 161 + ket = cursor; + // literal, line 161 + if (!(eq_s_b(2, "at"))) + { + cursor = limit - v_5; + break lab4; + } + // ], line 161 + bra = cursor; + // call R2, line 161 + if (!r_R2()) + { + cursor = limit - v_5; + break lab4; + } + // delete, line 161 slice_del(); - break; - } - } while (false); - break; - case 9: - // (, line 158 - // call R2, line 159 - if (!r_R2()) - { - return false; + } while (false); + break; } - // delete, line 159 - slice_del(); - // try, line 160 - v_5 = limit - cursor; - lab4: do { - // (, line 160 - // [, line 161 - ket = cursor; - // literal, line 161 - if (!(eq_s_b(2, "at"))) - { - cursor = limit - v_5; - break lab4; - } - // ], line 161 - bra = cursor; - // call R2, line 161 - if (!r_R2()) - { - cursor = limit - v_5; - break lab4; - } - // delete, line 161 - slice_del(); - } while (false); - break; - } - return true; - } + return true; + } - private boolean r_y_verb_suffix() { + private boolean r_y_verb_suffix() { int among_var; int v_1; int v_2; - // (, line 167 - // setlimit, line 168 - v_1 = limit - cursor; - // tomark, line 168 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 168 - // [, line 168 - ket = cursor; - // substring, line 168 - among_var = find_among_b(a_7, 12); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 168 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 171 - // literal, line 171 - if (!(eq_s_b(1, "u"))) + // (, line 167 + // setlimit, line 168 + v_1 = limit - cursor; + // tomark, line 168 + if (cursor < I_pV) { return false; } - // delete, line 171 - slice_del(); - break; - } - return true; - } + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 168 + // [, line 168 + ket = cursor; + // substring, line 168 + among_var = find_among_b(a_7, 12); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 168 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 171 + // literal, line 171 + if (!(eq_s_b(1, "u"))) + { + return false; + } + // delete, line 171 + slice_del(); + break; + } + return true; + } - private boolean r_verb_suffix() { + private boolean r_verb_suffix() { int among_var; int v_1; int v_2; int v_3; int v_4; - // (, line 175 - // setlimit, line 176 - v_1 = limit - cursor; - // tomark, line 176 - if (cursor < I_pV) - { - return false; - } - cursor = I_pV; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 176 - // [, line 176 - ket = cursor; - // substring, line 176 - among_var = find_among_b(a_8, 96); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 176 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 179 - // try, line 179 - v_3 = limit - cursor; - lab0: do { - // (, line 179 - // literal, line 179 - if (!(eq_s_b(1, "u"))) - { - cursor = limit - v_3; - break lab0; - } - // test, line 179 - v_4 = limit - cursor; - // literal, line 179 - if (!(eq_s_b(1, "g"))) - { - cursor = limit - v_3; - break lab0; - } - cursor = limit - v_4; - } while (false); - // ], line 179 + // (, line 175 + // setlimit, line 176 + v_1 = limit - cursor; + // tomark, line 176 + if (cursor < I_pV) + { + return false; + } + cursor = I_pV; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 176 + // [, line 176 + ket = cursor; + // substring, line 176 + among_var = find_among_b(a_8, 96); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 176 bra = cursor; - // delete, line 179 - slice_del(); - break; - case 2: - // (, line 200 - // delete, line 200 - slice_del(); - break; - } - return true; - } + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 179 + // try, line 179 + v_3 = limit - cursor; + lab0: do { + // (, line 179 + // literal, line 179 + if (!(eq_s_b(1, "u"))) + { + cursor = limit - v_3; + break lab0; + } + // test, line 179 + v_4 = limit - cursor; + // literal, line 179 + if (!(eq_s_b(1, "g"))) + { + cursor = limit - v_3; + break lab0; + } + cursor = limit - v_4; + } while (false); + // ], line 179 + bra = cursor; + // delete, line 179 + slice_del(); + break; + case 2: + // (, line 200 + // delete, line 200 + slice_del(); + break; + } + return true; + } - private boolean r_residual_suffix() { + private boolean r_residual_suffix() { int among_var; int v_1; int v_2; - // (, line 204 - // [, line 205 - ket = cursor; - // substring, line 205 - among_var = find_among_b(a_9, 8); - if (among_var == 0) - { - return false; - } - // ], line 205 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 208 - // call RV, line 208 - if (!r_RV()) + // (, line 204 + // [, line 205 + ket = cursor; + // substring, line 205 + among_var = find_among_b(a_9, 8); + if (among_var == 0) { return false; } - // delete, line 208 - slice_del(); - break; - case 2: - // (, line 210 - // call RV, line 210 - if (!r_RV()) - { - return false; + // ], line 205 + bra = cursor; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 208 + // call RV, line 208 + if (!r_RV()) + { + return false; + } + // delete, line 208 + slice_del(); + break; + case 2: + // (, line 210 + // call RV, line 210 + if (!r_RV()) + { + return false; + } + // delete, line 210 + slice_del(); + // try, line 210 + v_1 = limit - cursor; + lab0: do { + // (, line 210 + // [, line 210 + ket = cursor; + // literal, line 210 + if (!(eq_s_b(1, "u"))) + { + cursor = limit - v_1; + break lab0; + } + // ], line 210 + bra = cursor; + // test, line 210 + v_2 = limit - cursor; + // literal, line 210 + if (!(eq_s_b(1, "g"))) + { + cursor = limit - v_1; + break lab0; + } + cursor = limit - v_2; + // call RV, line 210 + if (!r_RV()) + { + cursor = limit - v_1; + break lab0; + } + // delete, line 210 + slice_del(); + } while (false); + break; } - // delete, line 210 - slice_del(); - // try, line 210 - v_1 = limit - cursor; - lab0: do { - // (, line 210 - // [, line 210 - ket = cursor; - // literal, line 210 - if (!(eq_s_b(1, "u"))) - { - cursor = limit - v_1; - break lab0; - } - // ], line 210 - bra = cursor; - // test, line 210 - v_2 = limit - cursor; - // literal, line 210 - if (!(eq_s_b(1, "g"))) - { - cursor = limit - v_1; - break lab0; - } - cursor = limit - v_2; - // call RV, line 210 - if (!r_RV()) - { - cursor = limit - v_1; - break lab0; - } - // delete, line 210 - slice_del(); - } while (false); - break; - } - return true; - } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; - // (, line 215 - // do, line 216 - v_1 = cursor; - lab0: do { - // call mark_regions, line 216 - if (!r_mark_regions()) - { - break lab0; - } - } while (false); - cursor = v_1; - // backwards, line 217 - limit_backward = cursor; cursor = limit; - // (, line 217 - // do, line 218 - v_2 = limit - cursor; - lab1: do { - // call attached_pronoun, line 218 - if (!r_attached_pronoun()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 219 - v_3 = limit - cursor; - lab2: do { - // (, line 219 - // or, line 219 - lab3: do { - v_4 = limit - cursor; - lab4: do { - // call standard_suffix, line 219 - if (!r_standard_suffix()) + // (, line 215 + // do, line 216 + v_1 = cursor; + lab0: do { + // call mark_regions, line 216 + if (!r_mark_regions()) { - break lab4; + break lab0; } - break lab3; } while (false); - cursor = limit - v_4; - lab5: do { - // call y_verb_suffix, line 220 - if (!r_y_verb_suffix()) + cursor = v_1; + // backwards, line 217 + limit_backward = cursor; cursor = limit; + // (, line 217 + // do, line 218 + v_2 = limit - cursor; + lab1: do { + // call attached_pronoun, line 218 + if (!r_attached_pronoun()) { - break lab5; + break lab1; } - break lab3; } while (false); - cursor = limit - v_4; - // call verb_suffix, line 221 - if (!r_verb_suffix()) - { - break lab2; - } - } while (false); - } while (false); - cursor = limit - v_3; - // do, line 223 - v_5 = limit - cursor; - lab6: do { - // call residual_suffix, line 223 - if (!r_residual_suffix()) - { - break lab6; + cursor = limit - v_2; + // do, line 219 + v_3 = limit - cursor; + lab2: do { + // (, line 219 + // or, line 219 + lab3: do { + v_4 = limit - cursor; + lab4: do { + // call standard_suffix, line 219 + if (!r_standard_suffix()) + { + break lab4; + } + break lab3; + } while (false); + cursor = limit - v_4; + lab5: do { + // call y_verb_suffix, line 220 + if (!r_y_verb_suffix()) + { + break lab5; + } + break lab3; + } while (false); + cursor = limit - v_4; + // call verb_suffix, line 221 + if (!r_verb_suffix()) + { + break lab2; + } + } while (false); + } while (false); + cursor = limit - v_3; + // do, line 223 + v_5 = limit - cursor; + lab6: do { + // call residual_suffix, line 223 + if (!r_residual_suffix()) + { + break lab6; + } + } while (false); + cursor = limit - v_5; + cursor = limit_backward; // do, line 225 + v_6 = cursor; + lab7: do { + // call postlude, line 225 + if (!r_postlude()) + { + break lab7; + } + } while (false); + cursor = v_6; + return true; } - } while (false); - cursor = limit - v_5; - cursor = limit_backward; // do, line 225 - v_6 = cursor; - lab7: do { - // call postlude, line 225 - if (!r_postlude()) - { - break lab7; - } - } while (false); - cursor = v_6; - return true; + + @Override + public boolean equals( Object o ) { + return o instanceof SpanishStemmer; } + @Override + public int hashCode() { + return SpanishStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/SwedishStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/SwedishStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/SwedishStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/SwedishStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,349 +1,369 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class SwedishStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "a", -1, 1, "", this), - new Among ( "arna", 0, 1, "", this), - new Among ( "erna", 0, 1, "", this), - new Among ( "heterna", 2, 1, "", this), - new Among ( "orna", 0, 1, "", this), - new Among ( "ad", -1, 1, "", this), - new Among ( "e", -1, 1, "", this), - new Among ( "ade", 6, 1, "", this), - new Among ( "ande", 6, 1, "", this), - new Among ( "arne", 6, 1, "", this), - new Among ( "are", 6, 1, "", this), - new Among ( "aste", 6, 1, "", this), - new Among ( "en", -1, 1, "", this), - new Among ( "anden", 12, 1, "", this), - new Among ( "aren", 12, 1, "", this), - new Among ( "heten", 12, 1, "", this), - new Among ( "ern", -1, 1, "", this), - new Among ( "ar", -1, 1, "", this), - new Among ( "er", -1, 1, "", this), - new Among ( "heter", 18, 1, "", this), - new Among ( "or", -1, 1, "", this), - new Among ( "s", -1, 2, "", this), - new Among ( "as", 21, 1, "", this), - new Among ( "arnas", 22, 1, "", this), - new Among ( "ernas", 22, 1, "", this), - new Among ( "ornas", 22, 1, "", this), - new Among ( "es", 21, 1, "", this), - new Among ( "ades", 26, 1, "", this), - new Among ( "andes", 26, 1, "", this), - new Among ( "ens", 21, 1, "", this), - new Among ( "arens", 29, 1, "", this), - new Among ( "hetens", 29, 1, "", this), - new Among ( "erns", 21, 1, "", this), - new Among ( "at", -1, 1, "", this), - new Among ( "andet", -1, 1, "", this), - new Among ( "het", -1, 1, "", this), - new Among ( "ast", -1, 1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "dd", -1, -1, "", this), - new Among ( "gd", -1, -1, "", this), - new Among ( "nn", -1, -1, "", this), - new Among ( "dt", -1, -1, "", this), - new Among ( "gt", -1, -1, "", this), - new Among ( "kt", -1, -1, "", this), - new Among ( "tt", -1, -1, "", this) - }; + private final static SwedishStemmer methodObject = new SwedishStemmer (); - private Among a_2[] = { - new Among ( "ig", -1, 1, "", this), - new Among ( "lig", 0, 1, "", this), - new Among ( "els", -1, 1, "", this), - new Among ( "fullt", -1, 3, "", this), - new Among ( "l\u00F6st", -1, 2, "", this) - }; + private final static Among a_0[] = { + new Among ( "a", -1, 1, "", methodObject ), + new Among ( "arna", 0, 1, "", methodObject ), + new Among ( "erna", 0, 1, "", methodObject ), + new Among ( "heterna", 2, 1, "", methodObject ), + new Among ( "orna", 0, 1, "", methodObject ), + new Among ( "ad", -1, 1, "", methodObject ), + new Among ( "e", -1, 1, "", methodObject ), + new Among ( "ade", 6, 1, "", methodObject ), + new Among ( "ande", 6, 1, "", methodObject ), + new Among ( "arne", 6, 1, "", methodObject ), + new Among ( "are", 6, 1, "", methodObject ), + new Among ( "aste", 6, 1, "", methodObject ), + new Among ( "en", -1, 1, "", methodObject ), + new Among ( "anden", 12, 1, "", methodObject ), + new Among ( "aren", 12, 1, "", methodObject ), + new Among ( "heten", 12, 1, "", methodObject ), + new Among ( "ern", -1, 1, "", methodObject ), + new Among ( "ar", -1, 1, "", methodObject ), + new Among ( "er", -1, 1, "", methodObject ), + new Among ( "heter", 18, 1, "", methodObject ), + new Among ( "or", -1, 1, "", methodObject ), + new Among ( "s", -1, 2, "", methodObject ), + new Among ( "as", 21, 1, "", methodObject ), + new Among ( "arnas", 22, 1, "", methodObject ), + new Among ( "ernas", 22, 1, "", methodObject ), + new Among ( "ornas", 22, 1, "", methodObject ), + new Among ( "es", 21, 1, "", methodObject ), + new Among ( "ades", 26, 1, "", methodObject ), + new Among ( "andes", 26, 1, "", methodObject ), + new Among ( "ens", 21, 1, "", methodObject ), + new Among ( "arens", 29, 1, "", methodObject ), + new Among ( "hetens", 29, 1, "", methodObject ), + new Among ( "erns", 21, 1, "", methodObject ), + new Among ( "at", -1, 1, "", methodObject ), + new Among ( "andet", -1, 1, "", methodObject ), + new Among ( "het", -1, 1, "", methodObject ), + new Among ( "ast", -1, 1, "", methodObject ) + }; - private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; + private final static Among a_1[] = { + new Among ( "dd", -1, -1, "", methodObject ), + new Among ( "gd", -1, -1, "", methodObject ), + new Among ( "nn", -1, -1, "", methodObject ), + new Among ( "dt", -1, -1, "", methodObject ), + new Among ( "gt", -1, -1, "", methodObject ), + new Among ( "kt", -1, -1, "", methodObject ), + new Among ( "tt", -1, -1, "", methodObject ) + }; - private static final char g_s_ending[] = {119, 127, 149 }; + private final static Among a_2[] = { + new Among ( "ig", -1, 1, "", methodObject ), + new Among ( "lig", 0, 1, "", methodObject ), + new Among ( "els", -1, 1, "", methodObject ), + new Among ( "fullt", -1, 3, "", methodObject ), + new Among ( "l\u00F6st", -1, 2, "", methodObject ) + }; + private static final char g_v[] = {17, 65, 16, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 24, 0, 32 }; + + private static final char g_s_ending[] = {119, 127, 149 }; + private int I_x; private int I_p1; - private void copy_from(SwedishStemmer other) { - I_x = other.I_x; - I_p1 = other.I_p1; - super.copy_from(other); - } + private void copy_from(SwedishStemmer other) { + I_x = other.I_x; + I_p1 = other.I_p1; + super.copy_from(other); + } - private boolean r_mark_regions() { + private boolean r_mark_regions() { int v_1; int v_2; - // (, line 26 - I_p1 = limit; - // test, line 29 - v_1 = cursor; - // (, line 29 - // hop, line 29 - { - int c = cursor + 3; - if (0 > c || c > limit) - { - return false; - } - cursor = c; - } - // setmark x, line 29 - I_x = cursor; - cursor = v_1; - // goto, line 30 - golab0: while(true) - { - v_2 = cursor; - lab1: do { - if (!(in_grouping(g_v, 97, 246))) + // (, line 26 + I_p1 = limit; + // test, line 29 + v_1 = cursor; + // (, line 29 + // hop, line 29 { - break lab1; + int c = cursor + 3; + if (0 > c || c > limit) + { + return false; + } + cursor = c; } - cursor = v_2; - break golab0; - } while (false); - cursor = v_2; - if (cursor >= limit) - { - return false; - } - cursor++; - } - // gopast, line 30 - golab2: while(true) - { - lab3: do { - if (!(out_grouping(g_v, 97, 246))) + // setmark x, line 29 + I_x = cursor; + cursor = v_1; + // goto, line 30 + golab0: while(true) { - break lab3; + v_2 = cursor; + lab1: do { + if (!(in_grouping(g_v, 97, 246))) + { + break lab1; + } + cursor = v_2; + break golab0; + } while (false); + cursor = v_2; + if (cursor >= limit) + { + return false; + } + cursor++; } - break golab2; - } while (false); - if (cursor >= limit) - { - return false; + // gopast, line 30 + golab2: while(true) + { + lab3: do { + if (!(out_grouping(g_v, 97, 246))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + return false; + } + cursor++; + } + // setmark p1, line 30 + I_p1 = cursor; + // try, line 31 + lab4: do { + // (, line 31 + if (!(I_p1 < I_x)) + { + break lab4; + } + I_p1 = I_x; + } while (false); + return true; } - cursor++; - } - // setmark p1, line 30 - I_p1 = cursor; - // try, line 31 - lab4: do { - // (, line 31 - if (!(I_p1 < I_x)) - { - break lab4; - } - I_p1 = I_x; - } while (false); - return true; - } - private boolean r_main_suffix() { + private boolean r_main_suffix() { int among_var; int v_1; int v_2; - // (, line 36 - // setlimit, line 37 - v_1 = limit - cursor; - // tomark, line 37 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 37 - // [, line 37 - ket = cursor; - // substring, line 37 - among_var = find_among_b(a_0, 37); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 37 - bra = cursor; - limit_backward = v_2; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 44 - // delete, line 44 - slice_del(); - break; - case 2: - // (, line 46 - if (!(in_grouping_b(g_s_ending, 98, 121))) + // (, line 36 + // setlimit, line 37 + v_1 = limit - cursor; + // tomark, line 37 + if (cursor < I_p1) { return false; } - // delete, line 46 - slice_del(); - break; - } - return true; - } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 37 + // [, line 37 + ket = cursor; + // substring, line 37 + among_var = find_among_b(a_0, 37); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 37 + bra = cursor; + limit_backward = v_2; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 44 + // delete, line 44 + slice_del(); + break; + case 2: + // (, line 46 + if (!(in_grouping_b(g_s_ending, 98, 121))) + { + return false; + } + // delete, line 46 + slice_del(); + break; + } + return true; + } - private boolean r_consonant_pair() { + private boolean r_consonant_pair() { int v_1; int v_2; int v_3; - // setlimit, line 50 - v_1 = limit - cursor; - // tomark, line 50 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 50 - // and, line 52 - v_3 = limit - cursor; - // among, line 51 - if (find_among_b(a_1, 7) == 0) - { - limit_backward = v_2; - return false; - } - cursor = limit - v_3; - // (, line 52 - // [, line 52 - ket = cursor; - // next, line 52 - if (cursor <= limit_backward) - { - limit_backward = v_2; - return false; - } - cursor--; - // ], line 52 - bra = cursor; - // delete, line 52 - slice_del(); - limit_backward = v_2; - return true; - } + // setlimit, line 50 + v_1 = limit - cursor; + // tomark, line 50 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 50 + // and, line 52 + v_3 = limit - cursor; + // among, line 51 + if (find_among_b(a_1, 7) == 0) + { + limit_backward = v_2; + return false; + } + cursor = limit - v_3; + // (, line 52 + // [, line 52 + ket = cursor; + // next, line 52 + if (cursor <= limit_backward) + { + limit_backward = v_2; + return false; + } + cursor--; + // ], line 52 + bra = cursor; + // delete, line 52 + slice_del(); + limit_backward = v_2; + return true; + } - private boolean r_other_suffix() { + private boolean r_other_suffix() { int among_var; int v_1; int v_2; - // setlimit, line 55 - v_1 = limit - cursor; - // tomark, line 55 - if (cursor < I_p1) - { - return false; - } - cursor = I_p1; - v_2 = limit_backward; - limit_backward = cursor; - cursor = limit - v_1; - // (, line 55 - // [, line 56 - ket = cursor; - // substring, line 56 - among_var = find_among_b(a_2, 5); - if (among_var == 0) - { - limit_backward = v_2; - return false; - } - // ], line 56 - bra = cursor; - switch(among_var) { - case 0: + // setlimit, line 55 + v_1 = limit - cursor; + // tomark, line 55 + if (cursor < I_p1) + { + return false; + } + cursor = I_p1; + v_2 = limit_backward; + limit_backward = cursor; + cursor = limit - v_1; + // (, line 55 + // [, line 56 + ket = cursor; + // substring, line 56 + among_var = find_among_b(a_2, 5); + if (among_var == 0) + { + limit_backward = v_2; + return false; + } + // ], line 56 + bra = cursor; + switch(among_var) { + case 0: + limit_backward = v_2; + return false; + case 1: + // (, line 57 + // delete, line 57 + slice_del(); + break; + case 2: + // (, line 58 + // <-, line 58 + slice_from("l\u00F6s"); + break; + case 3: + // (, line 59 + // <-, line 59 + slice_from("full"); + break; + } limit_backward = v_2; - return false; - case 1: - // (, line 57 - // delete, line 57 - slice_del(); - break; - case 2: - // (, line 58 - // <-, line 58 - slice_from("l\u00F6s"); - break; - case 3: - // (, line 59 - // <-, line 59 - slice_from("full"); - break; - } - limit_backward = v_2; - return true; - } + return true; + } - public boolean stem() { + @Override + public boolean stem() { int v_1; int v_2; int v_3; int v_4; - // (, line 64 - // do, line 66 - v_1 = cursor; - lab0: do { - // call mark_regions, line 66 - if (!r_mark_regions()) - { - break lab0; + // (, line 64 + // do, line 66 + v_1 = cursor; + lab0: do { + // call mark_regions, line 66 + if (!r_mark_regions()) + { + break lab0; + } + } while (false); + cursor = v_1; + // backwards, line 67 + limit_backward = cursor; cursor = limit; + // (, line 67 + // do, line 68 + v_2 = limit - cursor; + lab1: do { + // call main_suffix, line 68 + if (!r_main_suffix()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + // do, line 69 + v_3 = limit - cursor; + lab2: do { + // call consonant_pair, line 69 + if (!r_consonant_pair()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + // do, line 70 + v_4 = limit - cursor; + lab3: do { + // call other_suffix, line 70 + if (!r_other_suffix()) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + cursor = limit_backward; return true; } - } while (false); - cursor = v_1; - // backwards, line 67 - limit_backward = cursor; cursor = limit; - // (, line 67 - // do, line 68 - v_2 = limit - cursor; - lab1: do { - // call main_suffix, line 68 - if (!r_main_suffix()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - // do, line 69 - v_3 = limit - cursor; - lab2: do { - // call consonant_pair, line 69 - if (!r_consonant_pair()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - // do, line 70 - v_4 = limit - cursor; - lab3: do { - // call other_suffix, line 70 - if (!r_other_suffix()) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof SwedishStemmer; } + @Override + public int hashCode() { + return SwedishStemmer.class.getName().hashCode(); + } + + + } Index: 3rdParty_sources/lucene/org/tartarus/snowball/ext/TurkishStemmer.java =================================================================== RCS file: /usr/local/cvsroot/3rdParty_sources/lucene/org/tartarus/snowball/ext/TurkishStemmer.java,v diff -u -r1.1 -r1.1.2.1 --- 3rdParty_sources/lucene/org/tartarus/snowball/ext/TurkishStemmer.java 17 Aug 2012 14:55:09 -0000 1.1 +++ 3rdParty_sources/lucene/org/tartarus/snowball/ext/TurkishStemmer.java 16 Dec 2014 11:31:45 -0000 1.1.2.1 @@ -1,232 +1,239 @@ // This file was generated automatically by the Snowball to Java compiler package org.tartarus.snowball.ext; -import org.tartarus.snowball.SnowballProgram; + import org.tartarus.snowball.Among; +import org.tartarus.snowball.SnowballProgram; -/** - * Generated class implementing code defined by a snowball script. - */ + /** + * This class was automatically generated by a Snowball to Java compiler + * It implements the stemming algorithm defined by a snowball script. + */ + public class TurkishStemmer extends SnowballProgram { - private Among a_0[] = { - new Among ( "m", -1, -1, "", this), - new Among ( "n", -1, -1, "", this), - new Among ( "miz", -1, -1, "", this), - new Among ( "niz", -1, -1, "", this), - new Among ( "muz", -1, -1, "", this), - new Among ( "nuz", -1, -1, "", this), - new Among ( "m\u00FCz", -1, -1, "", this), - new Among ( "n\u00FCz", -1, -1, "", this), - new Among ( "m\u0131z", -1, -1, "", this), - new Among ( "n\u0131z", -1, -1, "", this) - }; +private static final long serialVersionUID = 1L; - private Among a_1[] = { - new Among ( "leri", -1, -1, "", this), - new Among ( "lar\u0131", -1, -1, "", this) - }; + private final static TurkishStemmer methodObject = new TurkishStemmer (); - private Among a_2[] = { - new Among ( "ni", -1, -1, "", this), - new Among ( "nu", -1, -1, "", this), - new Among ( "n\u00FC", -1, -1, "", this), - new Among ( "n\u0131", -1, -1, "", this) - }; + private final static Among a_0[] = { + new Among ( "m", -1, -1, "", methodObject ), + new Among ( "n", -1, -1, "", methodObject ), + new Among ( "miz", -1, -1, "", methodObject ), + new Among ( "niz", -1, -1, "", methodObject ), + new Among ( "muz", -1, -1, "", methodObject ), + new Among ( "nuz", -1, -1, "", methodObject ), + new Among ( "m\u00FCz", -1, -1, "", methodObject ), + new Among ( "n\u00FCz", -1, -1, "", methodObject ), + new Among ( "m\u0131z", -1, -1, "", methodObject ), + new Among ( "n\u0131z", -1, -1, "", methodObject ) + }; - private Among a_3[] = { - new Among ( "in", -1, -1, "", this), - new Among ( "un", -1, -1, "", this), - new Among ( "\u00FCn", -1, -1, "", this), - new Among ( "\u0131n", -1, -1, "", this) - }; + private final static Among a_1[] = { + new Among ( "leri", -1, -1, "", methodObject ), + new Among ( "lar\u0131", -1, -1, "", methodObject ) + }; - private Among a_4[] = { - new Among ( "a", -1, -1, "", this), - new Among ( "e", -1, -1, "", this) - }; + private final static Among a_2[] = { + new Among ( "ni", -1, -1, "", methodObject ), + new Among ( "nu", -1, -1, "", methodObject ), + new Among ( "n\u00FC", -1, -1, "", methodObject ), + new Among ( "n\u0131", -1, -1, "", methodObject ) + }; - private Among a_5[] = { - new Among ( "na", -1, -1, "", this), - new Among ( "ne", -1, -1, "", this) - }; + private final static Among a_3[] = { + new Among ( "in", -1, -1, "", methodObject ), + new Among ( "un", -1, -1, "", methodObject ), + new Among ( "\u00FCn", -1, -1, "", methodObject ), + new Among ( "\u0131n", -1, -1, "", methodObject ) + }; - private Among a_6[] = { - new Among ( "da", -1, -1, "", this), - new Among ( "ta", -1, -1, "", this), - new Among ( "de", -1, -1, "", this), - new Among ( "te", -1, -1, "", this) - }; + private final static Among a_4[] = { + new Among ( "a", -1, -1, "", methodObject ), + new Among ( "e", -1, -1, "", methodObject ) + }; - private Among a_7[] = { - new Among ( "nda", -1, -1, "", this), - new Among ( "nde", -1, -1, "", this) - }; + private final static Among a_5[] = { + new Among ( "na", -1, -1, "", methodObject ), + new Among ( "ne", -1, -1, "", methodObject ) + }; - private Among a_8[] = { - new Among ( "dan", -1, -1, "", this), - new Among ( "tan", -1, -1, "", this), - new Among ( "den", -1, -1, "", this), - new Among ( "ten", -1, -1, "", this) - }; + private final static Among a_6[] = { + new Among ( "da", -1, -1, "", methodObject ), + new Among ( "ta", -1, -1, "", methodObject ), + new Among ( "de", -1, -1, "", methodObject ), + new Among ( "te", -1, -1, "", methodObject ) + }; - private Among a_9[] = { - new Among ( "ndan", -1, -1, "", this), - new Among ( "nden", -1, -1, "", this) - }; + private final static Among a_7[] = { + new Among ( "nda", -1, -1, "", methodObject ), + new Among ( "nde", -1, -1, "", methodObject ) + }; - private Among a_10[] = { - new Among ( "la", -1, -1, "", this), - new Among ( "le", -1, -1, "", this) - }; + private final static Among a_8[] = { + new Among ( "dan", -1, -1, "", methodObject ), + new Among ( "tan", -1, -1, "", methodObject ), + new Among ( "den", -1, -1, "", methodObject ), + new Among ( "ten", -1, -1, "", methodObject ) + }; - private Among a_11[] = { - new Among ( "ca", -1, -1, "", this), - new Among ( "ce", -1, -1, "", this) - }; + private final static Among a_9[] = { + new Among ( "ndan", -1, -1, "", methodObject ), + new Among ( "nden", -1, -1, "", methodObject ) + }; - private Among a_12[] = { - new Among ( "im", -1, -1, "", this), - new Among ( "um", -1, -1, "", this), - new Among ( "\u00FCm", -1, -1, "", this), - new Among ( "\u0131m", -1, -1, "", this) - }; + private final static Among a_10[] = { + new Among ( "la", -1, -1, "", methodObject ), + new Among ( "le", -1, -1, "", methodObject ) + }; - private Among a_13[] = { - new Among ( "sin", -1, -1, "", this), - new Among ( "sun", -1, -1, "", this), - new Among ( "s\u00FCn", -1, -1, "", this), - new Among ( "s\u0131n", -1, -1, "", this) - }; + private final static Among a_11[] = { + new Among ( "ca", -1, -1, "", methodObject ), + new Among ( "ce", -1, -1, "", methodObject ) + }; - private Among a_14[] = { - new Among ( "iz", -1, -1, "", this), - new Among ( "uz", -1, -1, "", this), - new Among ( "\u00FCz", -1, -1, "", this), - new Among ( "\u0131z", -1, -1, "", this) - }; + private final static Among a_12[] = { + new Among ( "im", -1, -1, "", methodObject ), + new Among ( "um", -1, -1, "", methodObject ), + new Among ( "\u00FCm", -1, -1, "", methodObject ), + new Among ( "\u0131m", -1, -1, "", methodObject ) + }; - private Among a_15[] = { - new Among ( "siniz", -1, -1, "", this), - new Among ( "sunuz", -1, -1, "", this), - new Among ( "s\u00FCn\u00FCz", -1, -1, "", this), - new Among ( "s\u0131n\u0131z", -1, -1, "", this) - }; + private final static Among a_13[] = { + new Among ( "sin", -1, -1, "", methodObject ), + new Among ( "sun", -1, -1, "", methodObject ), + new Among ( "s\u00FCn", -1, -1, "", methodObject ), + new Among ( "s\u0131n", -1, -1, "", methodObject ) + }; - private Among a_16[] = { - new Among ( "lar", -1, -1, "", this), - new Among ( "ler", -1, -1, "", this) - }; + private final static Among a_14[] = { + new Among ( "iz", -1, -1, "", methodObject ), + new Among ( "uz", -1, -1, "", methodObject ), + new Among ( "\u00FCz", -1, -1, "", methodObject ), + new Among ( "\u0131z", -1, -1, "", methodObject ) + }; - private Among a_17[] = { - new Among ( "niz", -1, -1, "", this), - new Among ( "nuz", -1, -1, "", this), - new Among ( "n\u00FCz", -1, -1, "", this), - new Among ( "n\u0131z", -1, -1, "", this) - }; + private final static Among a_15[] = { + new Among ( "siniz", -1, -1, "", methodObject ), + new Among ( "sunuz", -1, -1, "", methodObject ), + new Among ( "s\u00FCn\u00FCz", -1, -1, "", methodObject ), + new Among ( "s\u0131n\u0131z", -1, -1, "", methodObject ) + }; - private Among a_18[] = { - new Among ( "dir", -1, -1, "", this), - new Among ( "tir", -1, -1, "", this), - new Among ( "dur", -1, -1, "", this), - new Among ( "tur", -1, -1, "", this), - new Among ( "d\u00FCr", -1, -1, "", this), - new Among ( "t\u00FCr", -1, -1, "", this), - new Among ( "d\u0131r", -1, -1, "", this), - new Among ( "t\u0131r", -1, -1, "", this) - }; + private final static Among a_16[] = { + new Among ( "lar", -1, -1, "", methodObject ), + new Among ( "ler", -1, -1, "", methodObject ) + }; - private Among a_19[] = { - new Among ( "cas\u0131na", -1, -1, "", this), - new Among ( "cesine", -1, -1, "", this) - }; + private final static Among a_17[] = { + new Among ( "niz", -1, -1, "", methodObject ), + new Among ( "nuz", -1, -1, "", methodObject ), + new Among ( "n\u00FCz", -1, -1, "", methodObject ), + new Among ( "n\u0131z", -1, -1, "", methodObject ) + }; - private Among a_20[] = { - new Among ( "di", -1, -1, "", this), - new Among ( "ti", -1, -1, "", this), - new Among ( "dik", -1, -1, "", this), - new Among ( "tik", -1, -1, "", this), - new Among ( "duk", -1, -1, "", this), - new Among ( "tuk", -1, -1, "", this), - new Among ( "d\u00FCk", -1, -1, "", this), - new Among ( "t\u00FCk", -1, -1, "", this), - new Among ( "d\u0131k", -1, -1, "", this), - new Among ( "t\u0131k", -1, -1, "", this), - new Among ( "dim", -1, -1, "", this), - new Among ( "tim", -1, -1, "", this), - new Among ( "dum", -1, -1, "", this), - new Among ( "tum", -1, -1, "", this), - new Among ( "d\u00FCm", -1, -1, "", this), - new Among ( "t\u00FCm", -1, -1, "", this), - new Among ( "d\u0131m", -1, -1, "", this), - new Among ( "t\u0131m", -1, -1, "", this), - new Among ( "din", -1, -1, "", this), - new Among ( "tin", -1, -1, "", this), - new Among ( "dun", -1, -1, "", this), - new Among ( "tun", -1, -1, "", this), - new Among ( "d\u00FCn", -1, -1, "", this), - new Among ( "t\u00FCn", -1, -1, "", this), - new Among ( "d\u0131n", -1, -1, "", this), - new Among ( "t\u0131n", -1, -1, "", this), - new Among ( "du", -1, -1, "", this), - new Among ( "tu", -1, -1, "", this), - new Among ( "d\u00FC", -1, -1, "", this), - new Among ( "t\u00FC", -1, -1, "", this), - new Among ( "d\u0131", -1, -1, "", this), - new Among ( "t\u0131", -1, -1, "", this) - }; + private final static Among a_18[] = { + new Among ( "dir", -1, -1, "", methodObject ), + new Among ( "tir", -1, -1, "", methodObject ), + new Among ( "dur", -1, -1, "", methodObject ), + new Among ( "tur", -1, -1, "", methodObject ), + new Among ( "d\u00FCr", -1, -1, "", methodObject ), + new Among ( "t\u00FCr", -1, -1, "", methodObject ), + new Among ( "d\u0131r", -1, -1, "", methodObject ), + new Among ( "t\u0131r", -1, -1, "", methodObject ) + }; - private Among a_21[] = { - new Among ( "sa", -1, -1, "", this), - new Among ( "se", -1, -1, "", this), - new Among ( "sak", -1, -1, "", this), - new Among ( "sek", -1, -1, "", this), - new Among ( "sam", -1, -1, "", this), - new Among ( "sem", -1, -1, "", this), - new Among ( "san", -1, -1, "", this), - new Among ( "sen", -1, -1, "", this) - }; + private final static Among a_19[] = { + new Among ( "cas\u0131na", -1, -1, "", methodObject ), + new Among ( "cesine", -1, -1, "", methodObject ) + }; - private Among a_22[] = { - new Among ( "mi\u015F", -1, -1, "", this), - new Among ( "mu\u015F", -1, -1, "", this), - new Among ( "m\u00FC\u015F", -1, -1, "", this), - new Among ( "m\u0131\u015F", -1, -1, "", this) - }; + private final static Among a_20[] = { + new Among ( "di", -1, -1, "", methodObject ), + new Among ( "ti", -1, -1, "", methodObject ), + new Among ( "dik", -1, -1, "", methodObject ), + new Among ( "tik", -1, -1, "", methodObject ), + new Among ( "duk", -1, -1, "", methodObject ), + new Among ( "tuk", -1, -1, "", methodObject ), + new Among ( "d\u00FCk", -1, -1, "", methodObject ), + new Among ( "t\u00FCk", -1, -1, "", methodObject ), + new Among ( "d\u0131k", -1, -1, "", methodObject ), + new Among ( "t\u0131k", -1, -1, "", methodObject ), + new Among ( "dim", -1, -1, "", methodObject ), + new Among ( "tim", -1, -1, "", methodObject ), + new Among ( "dum", -1, -1, "", methodObject ), + new Among ( "tum", -1, -1, "", methodObject ), + new Among ( "d\u00FCm", -1, -1, "", methodObject ), + new Among ( "t\u00FCm", -1, -1, "", methodObject ), + new Among ( "d\u0131m", -1, -1, "", methodObject ), + new Among ( "t\u0131m", -1, -1, "", methodObject ), + new Among ( "din", -1, -1, "", methodObject ), + new Among ( "tin", -1, -1, "", methodObject ), + new Among ( "dun", -1, -1, "", methodObject ), + new Among ( "tun", -1, -1, "", methodObject ), + new Among ( "d\u00FCn", -1, -1, "", methodObject ), + new Among ( "t\u00FCn", -1, -1, "", methodObject ), + new Among ( "d\u0131n", -1, -1, "", methodObject ), + new Among ( "t\u0131n", -1, -1, "", methodObject ), + new Among ( "du", -1, -1, "", methodObject ), + new Among ( "tu", -1, -1, "", methodObject ), + new Among ( "d\u00FC", -1, -1, "", methodObject ), + new Among ( "t\u00FC", -1, -1, "", methodObject ), + new Among ( "d\u0131", -1, -1, "", methodObject ), + new Among ( "t\u0131", -1, -1, "", methodObject ) + }; - private Among a_23[] = { - new Among ( "b", -1, 1, "", this), - new Among ( "c", -1, 2, "", this), - new Among ( "d", -1, 3, "", this), - new Among ( "\u011F", -1, 4, "", this) - }; + private final static Among a_21[] = { + new Among ( "sa", -1, -1, "", methodObject ), + new Among ( "se", -1, -1, "", methodObject ), + new Among ( "sak", -1, -1, "", methodObject ), + new Among ( "sek", -1, -1, "", methodObject ), + new Among ( "sam", -1, -1, "", methodObject ), + new Among ( "sem", -1, -1, "", methodObject ), + new Among ( "san", -1, -1, "", methodObject ), + new Among ( "sen", -1, -1, "", methodObject ) + }; - private static final char g_vowel[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1 }; + private final static Among a_22[] = { + new Among ( "mi\u015F", -1, -1, "", methodObject ), + new Among ( "mu\u015F", -1, -1, "", methodObject ), + new Among ( "m\u00FC\u015F", -1, -1, "", methodObject ), + new Among ( "m\u0131\u015F", -1, -1, "", methodObject ) + }; - private static final char g_U[] = {1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1 }; + private final static Among a_23[] = { + new Among ( "b", -1, 1, "", methodObject ), + new Among ( "c", -1, 2, "", methodObject ), + new Among ( "d", -1, 3, "", methodObject ), + new Among ( "\u011F", -1, 4, "", methodObject ) + }; - private static final char g_vowel1[] = {1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + private static final char g_vowel[] = {17, 65, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32, 8, 0, 0, 0, 0, 0, 0, 1 }; - private static final char g_vowel2[] = {17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 }; + private static final char g_U[] = {1, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 1 }; - private static final char g_vowel3[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; + private static final char g_vowel1[] = {1, 64, 16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - private static final char g_vowel4[] = {17 }; + private static final char g_vowel2[] = {17, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 }; - private static final char g_vowel5[] = {65 }; + private static final char g_vowel3[] = {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 }; - private static final char g_vowel6[] = {65 }; + private static final char g_vowel4[] = {17 }; + private static final char g_vowel5[] = {65 }; + + private static final char g_vowel6[] = {65 }; + private boolean B_continue_stemming_noun_suffixes; private int I_strlen; - private void copy_from(TurkishStemmer other) { - B_continue_stemming_noun_suffixes = other.B_continue_stemming_noun_suffixes; - I_strlen = other.I_strlen; - super.copy_from(other); - } + private void copy_from(TurkishStemmer other) { + B_continue_stemming_noun_suffixes = other.B_continue_stemming_noun_suffixes; + I_strlen = other.I_strlen; + super.copy_from(other); + } - private boolean r_check_vowel_harmony() { + private boolean r_check_vowel_harmony() { int v_1; int v_2; int v_3; @@ -238,1036 +245,1036 @@ int v_9; int v_10; int v_11; - // (, line 111 - // test, line 112 - v_1 = limit - cursor; - // (, line 113 - // (, line 114 - // goto, line 114 - golab0: while(true) - { - v_2 = limit - cursor; - lab1: do { - if (!(in_grouping_b(g_vowel, 97, 305))) + // (, line 111 + // test, line 112 + v_1 = limit - cursor; + // (, line 113 + // (, line 114 + // goto, line 114 + golab0: while(true) { - break lab1; - } - cursor = limit - v_2; - break golab0; - } while (false); - cursor = limit - v_2; - if (cursor <= limit_backward) - { - return false; - } - cursor--; - } - // (, line 115 - // or, line 116 - lab2: do { - v_3 = limit - cursor; - lab3: do { - // (, line 116 - // literal, line 116 - if (!(eq_s_b(1, "a"))) - { - break lab3; - } - // goto, line 116 - golab4: while(true) - { - v_4 = limit - cursor; - lab5: do { - if (!(in_grouping_b(g_vowel1, 97, 305))) + v_2 = limit - cursor; + lab1: do { + if (!(in_grouping_b(g_vowel, 97, 305))) { - break lab5; + break lab1; } - cursor = limit - v_4; - break golab4; + cursor = limit - v_2; + break golab0; } while (false); - cursor = limit - v_4; + cursor = limit - v_2; if (cursor <= limit_backward) { - break lab3; + return false; } cursor--; } - break lab2; - } while (false); - cursor = limit - v_3; - lab6: do { - // (, line 117 - // literal, line 117 - if (!(eq_s_b(1, "e"))) - { - break lab6; - } - // goto, line 117 - golab7: while(true) - { - v_5 = limit - cursor; - lab8: do { - if (!(in_grouping_b(g_vowel2, 101, 252))) + // (, line 115 + // or, line 116 + lab2: do { + v_3 = limit - cursor; + lab3: do { + // (, line 116 + // literal, line 116 + if (!(eq_s_b(1, "a"))) { - break lab8; + break lab3; } - cursor = limit - v_5; - break golab7; + // goto, line 116 + golab4: while(true) + { + v_4 = limit - cursor; + lab5: do { + if (!(in_grouping_b(g_vowel1, 97, 305))) + { + break lab5; + } + cursor = limit - v_4; + break golab4; + } while (false); + cursor = limit - v_4; + if (cursor <= limit_backward) + { + break lab3; + } + cursor--; + } + break lab2; } while (false); - cursor = limit - v_5; - if (cursor <= limit_backward) - { - break lab6; - } - cursor--; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab9: do { - // (, line 118 - // literal, line 118 - if (!(eq_s_b(1, "\u0131"))) - { - break lab9; - } - // goto, line 118 - golab10: while(true) - { - v_6 = limit - cursor; - lab11: do { - if (!(in_grouping_b(g_vowel3, 97, 305))) + cursor = limit - v_3; + lab6: do { + // (, line 117 + // literal, line 117 + if (!(eq_s_b(1, "e"))) { - break lab11; + break lab6; } - cursor = limit - v_6; - break golab10; + // goto, line 117 + golab7: while(true) + { + v_5 = limit - cursor; + lab8: do { + if (!(in_grouping_b(g_vowel2, 101, 252))) + { + break lab8; + } + cursor = limit - v_5; + break golab7; + } while (false); + cursor = limit - v_5; + if (cursor <= limit_backward) + { + break lab6; + } + cursor--; + } + break lab2; } while (false); - cursor = limit - v_6; - if (cursor <= limit_backward) - { - break lab9; - } - cursor--; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab12: do { - // (, line 119 - // literal, line 119 - if (!(eq_s_b(1, "i"))) - { - break lab12; - } - // goto, line 119 - golab13: while(true) - { - v_7 = limit - cursor; - lab14: do { - if (!(in_grouping_b(g_vowel4, 101, 105))) + cursor = limit - v_3; + lab9: do { + // (, line 118 + // literal, line 118 + if (!(eq_s_b(1, "\u0131"))) { - break lab14; + break lab9; } - cursor = limit - v_7; - break golab13; + // goto, line 118 + golab10: while(true) + { + v_6 = limit - cursor; + lab11: do { + if (!(in_grouping_b(g_vowel3, 97, 305))) + { + break lab11; + } + cursor = limit - v_6; + break golab10; + } while (false); + cursor = limit - v_6; + if (cursor <= limit_backward) + { + break lab9; + } + cursor--; + } + break lab2; } while (false); - cursor = limit - v_7; - if (cursor <= limit_backward) - { - break lab12; - } - cursor--; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab15: do { - // (, line 120 - // literal, line 120 - if (!(eq_s_b(1, "o"))) - { - break lab15; - } - // goto, line 120 - golab16: while(true) - { - v_8 = limit - cursor; - lab17: do { - if (!(in_grouping_b(g_vowel5, 111, 117))) + cursor = limit - v_3; + lab12: do { + // (, line 119 + // literal, line 119 + if (!(eq_s_b(1, "i"))) { - break lab17; + break lab12; } - cursor = limit - v_8; - break golab16; + // goto, line 119 + golab13: while(true) + { + v_7 = limit - cursor; + lab14: do { + if (!(in_grouping_b(g_vowel4, 101, 105))) + { + break lab14; + } + cursor = limit - v_7; + break golab13; + } while (false); + cursor = limit - v_7; + if (cursor <= limit_backward) + { + break lab12; + } + cursor--; + } + break lab2; } while (false); - cursor = limit - v_8; - if (cursor <= limit_backward) - { - break lab15; - } - cursor--; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab18: do { - // (, line 121 - // literal, line 121 - if (!(eq_s_b(1, "\u00F6"))) - { - break lab18; - } - // goto, line 121 - golab19: while(true) - { - v_9 = limit - cursor; - lab20: do { - if (!(in_grouping_b(g_vowel6, 246, 252))) + cursor = limit - v_3; + lab15: do { + // (, line 120 + // literal, line 120 + if (!(eq_s_b(1, "o"))) { - break lab20; + break lab15; } - cursor = limit - v_9; - break golab19; + // goto, line 120 + golab16: while(true) + { + v_8 = limit - cursor; + lab17: do { + if (!(in_grouping_b(g_vowel5, 111, 117))) + { + break lab17; + } + cursor = limit - v_8; + break golab16; + } while (false); + cursor = limit - v_8; + if (cursor <= limit_backward) + { + break lab15; + } + cursor--; + } + break lab2; } while (false); - cursor = limit - v_9; - if (cursor <= limit_backward) - { - break lab18; - } - cursor--; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab21: do { - // (, line 122 - // literal, line 122 - if (!(eq_s_b(1, "u"))) - { - break lab21; - } - // goto, line 122 - golab22: while(true) - { - v_10 = limit - cursor; - lab23: do { - if (!(in_grouping_b(g_vowel5, 111, 117))) + cursor = limit - v_3; + lab18: do { + // (, line 121 + // literal, line 121 + if (!(eq_s_b(1, "\u00F6"))) { - break lab23; + break lab18; } - cursor = limit - v_10; - break golab22; + // goto, line 121 + golab19: while(true) + { + v_9 = limit - cursor; + lab20: do { + if (!(in_grouping_b(g_vowel6, 246, 252))) + { + break lab20; + } + cursor = limit - v_9; + break golab19; + } while (false); + cursor = limit - v_9; + if (cursor <= limit_backward) + { + break lab18; + } + cursor--; + } + break lab2; } while (false); - cursor = limit - v_10; - if (cursor <= limit_backward) + cursor = limit - v_3; + lab21: do { + // (, line 122 + // literal, line 122 + if (!(eq_s_b(1, "u"))) + { + break lab21; + } + // goto, line 122 + golab22: while(true) + { + v_10 = limit - cursor; + lab23: do { + if (!(in_grouping_b(g_vowel5, 111, 117))) + { + break lab23; + } + cursor = limit - v_10; + break golab22; + } while (false); + cursor = limit - v_10; + if (cursor <= limit_backward) + { + break lab21; + } + cursor--; + } + break lab2; + } while (false); + cursor = limit - v_3; + // (, line 123 + // literal, line 123 + if (!(eq_s_b(1, "\u00FC"))) { - break lab21; + return false; } - cursor--; - } - break lab2; - } while (false); - cursor = limit - v_3; - // (, line 123 - // literal, line 123 - if (!(eq_s_b(1, "\u00FC"))) - { - return false; - } - // goto, line 123 - golab24: while(true) - { - v_11 = limit - cursor; - lab25: do { - if (!(in_grouping_b(g_vowel6, 246, 252))) + // goto, line 123 + golab24: while(true) { - break lab25; + v_11 = limit - cursor; + lab25: do { + if (!(in_grouping_b(g_vowel6, 246, 252))) + { + break lab25; + } + cursor = limit - v_11; + break golab24; + } while (false); + cursor = limit - v_11; + if (cursor <= limit_backward) + { + return false; + } + cursor--; } - cursor = limit - v_11; - break golab24; } while (false); - cursor = limit - v_11; - if (cursor <= limit_backward) - { - return false; - } - cursor--; + cursor = limit - v_1; + return true; } - } while (false); - cursor = limit - v_1; - return true; - } - private boolean r_mark_suffix_with_optional_n_consonant() { + private boolean r_mark_suffix_with_optional_n_consonant() { int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; int v_7; - // (, line 132 - // or, line 134 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 133 - // (, line 133 - // test, line 133 - v_2 = limit - cursor; - // literal, line 133 - if (!(eq_s_b(1, "n"))) - { - break lab1; - } - cursor = limit - v_2; - // next, line 133 - if (cursor <= limit_backward) - { - break lab1; - } - cursor--; - // (, line 133 - // test, line 133 - v_3 = limit - cursor; - if (!(in_grouping_b(g_vowel, 97, 305))) - { - break lab1; - } - cursor = limit - v_3; - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 135 - // (, line 135 - // not, line 135 - { - v_4 = limit - cursor; - lab2: do { + // (, line 132 + // or, line 134 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 133 + // (, line 133 + // test, line 133 + v_2 = limit - cursor; + // literal, line 133 + if (!(eq_s_b(1, "n"))) + { + break lab1; + } + cursor = limit - v_2; + // next, line 133 + if (cursor <= limit_backward) + { + break lab1; + } + cursor--; + // (, line 133 + // test, line 133 + v_3 = limit - cursor; + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab1; + } + cursor = limit - v_3; + break lab0; + } while (false); + cursor = limit - v_1; // (, line 135 + // (, line 135 + // not, line 135 + { + v_4 = limit - cursor; + lab2: do { + // (, line 135 + // test, line 135 + v_5 = limit - cursor; + // literal, line 135 + if (!(eq_s_b(1, "n"))) + { + break lab2; + } + cursor = limit - v_5; + return false; + } while (false); + cursor = limit - v_4; + } // test, line 135 - v_5 = limit - cursor; - // literal, line 135 - if (!(eq_s_b(1, "n"))) + v_6 = limit - cursor; + // (, line 135 + // next, line 135 + if (cursor <= limit_backward) { - break lab2; + return false; } - cursor = limit - v_5; - return false; + cursor--; + // (, line 135 + // test, line 135 + v_7 = limit - cursor; + if (!(in_grouping_b(g_vowel, 97, 305))) + { + return false; + } + cursor = limit - v_7; + cursor = limit - v_6; } while (false); - cursor = limit - v_4; + return true; } - // test, line 135 - v_6 = limit - cursor; - // (, line 135 - // next, line 135 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // (, line 135 - // test, line 135 - v_7 = limit - cursor; - if (!(in_grouping_b(g_vowel, 97, 305))) - { - return false; - } - cursor = limit - v_7; - cursor = limit - v_6; - } while (false); - return true; - } - private boolean r_mark_suffix_with_optional_s_consonant() { + private boolean r_mark_suffix_with_optional_s_consonant() { int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; int v_7; - // (, line 143 - // or, line 145 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 144 - // (, line 144 - // test, line 144 - v_2 = limit - cursor; - // literal, line 144 - if (!(eq_s_b(1, "s"))) - { - break lab1; - } - cursor = limit - v_2; - // next, line 144 - if (cursor <= limit_backward) - { - break lab1; - } - cursor--; - // (, line 144 - // test, line 144 - v_3 = limit - cursor; - if (!(in_grouping_b(g_vowel, 97, 305))) - { - break lab1; - } - cursor = limit - v_3; - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 146 - // (, line 146 - // not, line 146 - { - v_4 = limit - cursor; - lab2: do { + // (, line 143 + // or, line 145 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 144 + // (, line 144 + // test, line 144 + v_2 = limit - cursor; + // literal, line 144 + if (!(eq_s_b(1, "s"))) + { + break lab1; + } + cursor = limit - v_2; + // next, line 144 + if (cursor <= limit_backward) + { + break lab1; + } + cursor--; + // (, line 144 + // test, line 144 + v_3 = limit - cursor; + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab1; + } + cursor = limit - v_3; + break lab0; + } while (false); + cursor = limit - v_1; // (, line 146 + // (, line 146 + // not, line 146 + { + v_4 = limit - cursor; + lab2: do { + // (, line 146 + // test, line 146 + v_5 = limit - cursor; + // literal, line 146 + if (!(eq_s_b(1, "s"))) + { + break lab2; + } + cursor = limit - v_5; + return false; + } while (false); + cursor = limit - v_4; + } // test, line 146 - v_5 = limit - cursor; - // literal, line 146 - if (!(eq_s_b(1, "s"))) + v_6 = limit - cursor; + // (, line 146 + // next, line 146 + if (cursor <= limit_backward) { - break lab2; + return false; } - cursor = limit - v_5; - return false; + cursor--; + // (, line 146 + // test, line 146 + v_7 = limit - cursor; + if (!(in_grouping_b(g_vowel, 97, 305))) + { + return false; + } + cursor = limit - v_7; + cursor = limit - v_6; } while (false); - cursor = limit - v_4; + return true; } - // test, line 146 - v_6 = limit - cursor; - // (, line 146 - // next, line 146 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // (, line 146 - // test, line 146 - v_7 = limit - cursor; - if (!(in_grouping_b(g_vowel, 97, 305))) - { - return false; - } - cursor = limit - v_7; - cursor = limit - v_6; - } while (false); - return true; - } - private boolean r_mark_suffix_with_optional_y_consonant() { + private boolean r_mark_suffix_with_optional_y_consonant() { int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; int v_7; - // (, line 153 - // or, line 155 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 154 - // (, line 154 - // test, line 154 - v_2 = limit - cursor; - // literal, line 154 - if (!(eq_s_b(1, "y"))) - { - break lab1; - } - cursor = limit - v_2; - // next, line 154 - if (cursor <= limit_backward) - { - break lab1; - } - cursor--; - // (, line 154 - // test, line 154 - v_3 = limit - cursor; - if (!(in_grouping_b(g_vowel, 97, 305))) - { - break lab1; - } - cursor = limit - v_3; - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 156 - // (, line 156 - // not, line 156 - { - v_4 = limit - cursor; - lab2: do { + // (, line 153 + // or, line 155 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 154 + // (, line 154 + // test, line 154 + v_2 = limit - cursor; + // literal, line 154 + if (!(eq_s_b(1, "y"))) + { + break lab1; + } + cursor = limit - v_2; + // next, line 154 + if (cursor <= limit_backward) + { + break lab1; + } + cursor--; + // (, line 154 + // test, line 154 + v_3 = limit - cursor; + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab1; + } + cursor = limit - v_3; + break lab0; + } while (false); + cursor = limit - v_1; // (, line 156 + // (, line 156 + // not, line 156 + { + v_4 = limit - cursor; + lab2: do { + // (, line 156 + // test, line 156 + v_5 = limit - cursor; + // literal, line 156 + if (!(eq_s_b(1, "y"))) + { + break lab2; + } + cursor = limit - v_5; + return false; + } while (false); + cursor = limit - v_4; + } // test, line 156 - v_5 = limit - cursor; - // literal, line 156 - if (!(eq_s_b(1, "y"))) + v_6 = limit - cursor; + // (, line 156 + // next, line 156 + if (cursor <= limit_backward) { - break lab2; + return false; } - cursor = limit - v_5; - return false; + cursor--; + // (, line 156 + // test, line 156 + v_7 = limit - cursor; + if (!(in_grouping_b(g_vowel, 97, 305))) + { + return false; + } + cursor = limit - v_7; + cursor = limit - v_6; } while (false); - cursor = limit - v_4; + return true; } - // test, line 156 - v_6 = limit - cursor; - // (, line 156 - // next, line 156 - if (cursor <= limit_backward) - { - return false; - } - cursor--; - // (, line 156 - // test, line 156 - v_7 = limit - cursor; - if (!(in_grouping_b(g_vowel, 97, 305))) - { - return false; - } - cursor = limit - v_7; - cursor = limit - v_6; - } while (false); - return true; - } - private boolean r_mark_suffix_with_optional_U_vowel() { + private boolean r_mark_suffix_with_optional_U_vowel() { int v_1; int v_2; int v_3; int v_4; int v_5; int v_6; int v_7; - // (, line 159 - // or, line 161 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 160 - // (, line 160 - // test, line 160 - v_2 = limit - cursor; - if (!(in_grouping_b(g_U, 105, 305))) + // (, line 159 + // or, line 161 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 160 + // (, line 160 + // test, line 160 + v_2 = limit - cursor; + if (!(in_grouping_b(g_U, 105, 305))) + { + break lab1; + } + cursor = limit - v_2; + // next, line 160 + if (cursor <= limit_backward) + { + break lab1; + } + cursor--; + // (, line 160 + // test, line 160 + v_3 = limit - cursor; + if (!(out_grouping_b(g_vowel, 97, 305))) + { + break lab1; + } + cursor = limit - v_3; + break lab0; + } while (false); + cursor = limit - v_1; + // (, line 162 + // (, line 162 + // not, line 162 + { + v_4 = limit - cursor; + lab2: do { + // (, line 162 + // test, line 162 + v_5 = limit - cursor; + if (!(in_grouping_b(g_U, 105, 305))) + { + break lab2; + } + cursor = limit - v_5; + return false; + } while (false); + cursor = limit - v_4; + } + // test, line 162 + v_6 = limit - cursor; + // (, line 162 + // next, line 162 + if (cursor <= limit_backward) + { + return false; + } + cursor--; + // (, line 162 + // test, line 162 + v_7 = limit - cursor; + if (!(out_grouping_b(g_vowel, 97, 305))) + { + return false; + } + cursor = limit - v_7; + cursor = limit - v_6; + } while (false); + return true; + } + + private boolean r_mark_possessives() { + // (, line 166 + // among, line 167 + if (find_among_b(a_0, 10) == 0) { - break lab1; + return false; } - cursor = limit - v_2; - // next, line 160 - if (cursor <= limit_backward) + // (, line 169 + // call mark_suffix_with_optional_U_vowel, line 169 + if (!r_mark_suffix_with_optional_U_vowel()) { - break lab1; + return false; } - cursor--; - // (, line 160 - // test, line 160 - v_3 = limit - cursor; - if (!(out_grouping_b(g_vowel, 97, 305))) + return true; + } + + private boolean r_mark_sU() { + // (, line 172 + // call check_vowel_harmony, line 173 + if (!r_check_vowel_harmony()) { - break lab1; + return false; } - cursor = limit - v_3; - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 162 - // (, line 162 - // not, line 162 - { - v_4 = limit - cursor; - lab2: do { - // (, line 162 - // test, line 162 - v_5 = limit - cursor; - if (!(in_grouping_b(g_U, 105, 305))) - { - break lab2; - } - cursor = limit - v_5; + if (!(in_grouping_b(g_U, 105, 305))) + { return false; - } while (false); - cursor = limit - v_4; + } + // (, line 175 + // call mark_suffix_with_optional_s_consonant, line 175 + if (!r_mark_suffix_with_optional_s_consonant()) + { + return false; + } + return true; } - // test, line 162 - v_6 = limit - cursor; - // (, line 162 - // next, line 162 - if (cursor <= limit_backward) - { - return false; + + private boolean r_mark_lArI() { + // (, line 178 + // among, line 179 + if (find_among_b(a_1, 2) == 0) + { + return false; + } + return true; } - cursor--; - // (, line 162 - // test, line 162 - v_7 = limit - cursor; - if (!(out_grouping_b(g_vowel, 97, 305))) - { - return false; + + private boolean r_mark_yU() { + // (, line 182 + // call check_vowel_harmony, line 183 + if (!r_check_vowel_harmony()) + { + return false; + } + if (!(in_grouping_b(g_U, 105, 305))) + { + return false; + } + // (, line 185 + // call mark_suffix_with_optional_y_consonant, line 185 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; } - cursor = limit - v_7; - cursor = limit - v_6; - } while (false); - return true; - } - private boolean r_mark_possessives() { - // (, line 166 - // among, line 167 - if (find_among_b(a_0, 10) == 0) - { - return false; - } - // (, line 169 - // call mark_suffix_with_optional_U_vowel, line 169 - if (!r_mark_suffix_with_optional_U_vowel()) - { - return false; - } - return true; - } + private boolean r_mark_nU() { + // (, line 188 + // call check_vowel_harmony, line 189 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 190 + if (find_among_b(a_2, 4) == 0) + { + return false; + } + return true; + } - private boolean r_mark_sU() { - // (, line 172 - // call check_vowel_harmony, line 173 - if (!r_check_vowel_harmony()) - { - return false; - } - if (!(in_grouping_b(g_U, 105, 305))) - { - return false; - } - // (, line 175 - // call mark_suffix_with_optional_s_consonant, line 175 - if (!r_mark_suffix_with_optional_s_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_nUn() { + // (, line 193 + // call check_vowel_harmony, line 194 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 195 + if (find_among_b(a_3, 4) == 0) + { + return false; + } + // (, line 196 + // call mark_suffix_with_optional_n_consonant, line 196 + if (!r_mark_suffix_with_optional_n_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_lArI() { - // (, line 178 - // among, line 179 - if (find_among_b(a_1, 2) == 0) - { - return false; - } - return true; - } + private boolean r_mark_yA() { + // (, line 199 + // call check_vowel_harmony, line 200 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 201 + if (find_among_b(a_4, 2) == 0) + { + return false; + } + // (, line 202 + // call mark_suffix_with_optional_y_consonant, line 202 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_yU() { - // (, line 182 - // call check_vowel_harmony, line 183 - if (!r_check_vowel_harmony()) - { - return false; - } - if (!(in_grouping_b(g_U, 105, 305))) - { - return false; - } - // (, line 185 - // call mark_suffix_with_optional_y_consonant, line 185 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_nA() { + // (, line 205 + // call check_vowel_harmony, line 206 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 207 + if (find_among_b(a_5, 2) == 0) + { + return false; + } + return true; + } - private boolean r_mark_nU() { - // (, line 188 - // call check_vowel_harmony, line 189 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 190 - if (find_among_b(a_2, 4) == 0) - { - return false; - } - return true; - } + private boolean r_mark_DA() { + // (, line 210 + // call check_vowel_harmony, line 211 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 212 + if (find_among_b(a_6, 4) == 0) + { + return false; + } + return true; + } - private boolean r_mark_nUn() { - // (, line 193 - // call check_vowel_harmony, line 194 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 195 - if (find_among_b(a_3, 4) == 0) - { - return false; - } - // (, line 196 - // call mark_suffix_with_optional_n_consonant, line 196 - if (!r_mark_suffix_with_optional_n_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_ndA() { + // (, line 215 + // call check_vowel_harmony, line 216 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 217 + if (find_among_b(a_7, 2) == 0) + { + return false; + } + return true; + } - private boolean r_mark_yA() { - // (, line 199 - // call check_vowel_harmony, line 200 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 201 - if (find_among_b(a_4, 2) == 0) - { - return false; - } - // (, line 202 - // call mark_suffix_with_optional_y_consonant, line 202 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_DAn() { + // (, line 220 + // call check_vowel_harmony, line 221 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 222 + if (find_among_b(a_8, 4) == 0) + { + return false; + } + return true; + } - private boolean r_mark_nA() { - // (, line 205 - // call check_vowel_harmony, line 206 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 207 - if (find_among_b(a_5, 2) == 0) - { - return false; - } - return true; - } + private boolean r_mark_ndAn() { + // (, line 225 + // call check_vowel_harmony, line 226 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 227 + if (find_among_b(a_9, 2) == 0) + { + return false; + } + return true; + } - private boolean r_mark_DA() { - // (, line 210 - // call check_vowel_harmony, line 211 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 212 - if (find_among_b(a_6, 4) == 0) - { - return false; - } - return true; - } + private boolean r_mark_ylA() { + // (, line 230 + // call check_vowel_harmony, line 231 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 232 + if (find_among_b(a_10, 2) == 0) + { + return false; + } + // (, line 233 + // call mark_suffix_with_optional_y_consonant, line 233 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_ndA() { - // (, line 215 - // call check_vowel_harmony, line 216 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 217 - if (find_among_b(a_7, 2) == 0) - { - return false; - } - return true; - } + private boolean r_mark_ki() { + // (, line 236 + // literal, line 237 + if (!(eq_s_b(2, "ki"))) + { + return false; + } + return true; + } - private boolean r_mark_DAn() { - // (, line 220 - // call check_vowel_harmony, line 221 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 222 - if (find_among_b(a_8, 4) == 0) - { - return false; - } - return true; - } + private boolean r_mark_ncA() { + // (, line 240 + // call check_vowel_harmony, line 241 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 242 + if (find_among_b(a_11, 2) == 0) + { + return false; + } + // (, line 243 + // call mark_suffix_with_optional_n_consonant, line 243 + if (!r_mark_suffix_with_optional_n_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_ndAn() { - // (, line 225 - // call check_vowel_harmony, line 226 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 227 - if (find_among_b(a_9, 2) == 0) - { - return false; - } - return true; - } + private boolean r_mark_yUm() { + // (, line 246 + // call check_vowel_harmony, line 247 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 248 + if (find_among_b(a_12, 4) == 0) + { + return false; + } + // (, line 249 + // call mark_suffix_with_optional_y_consonant, line 249 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_ylA() { - // (, line 230 - // call check_vowel_harmony, line 231 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 232 - if (find_among_b(a_10, 2) == 0) - { - return false; - } - // (, line 233 - // call mark_suffix_with_optional_y_consonant, line 233 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_sUn() { + // (, line 252 + // call check_vowel_harmony, line 253 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 254 + if (find_among_b(a_13, 4) == 0) + { + return false; + } + return true; + } - private boolean r_mark_ki() { - // (, line 236 - // literal, line 237 - if (!(eq_s_b(2, "ki"))) - { - return false; - } - return true; - } + private boolean r_mark_yUz() { + // (, line 257 + // call check_vowel_harmony, line 258 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 259 + if (find_among_b(a_14, 4) == 0) + { + return false; + } + // (, line 260 + // call mark_suffix_with_optional_y_consonant, line 260 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_ncA() { - // (, line 240 - // call check_vowel_harmony, line 241 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 242 - if (find_among_b(a_11, 2) == 0) - { - return false; - } - // (, line 243 - // call mark_suffix_with_optional_n_consonant, line 243 - if (!r_mark_suffix_with_optional_n_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_sUnUz() { + // (, line 263 + // among, line 264 + if (find_among_b(a_15, 4) == 0) + { + return false; + } + return true; + } - private boolean r_mark_yUm() { - // (, line 246 - // call check_vowel_harmony, line 247 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 248 - if (find_among_b(a_12, 4) == 0) - { - return false; - } - // (, line 249 - // call mark_suffix_with_optional_y_consonant, line 249 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_lAr() { + // (, line 267 + // call check_vowel_harmony, line 268 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 269 + if (find_among_b(a_16, 2) == 0) + { + return false; + } + return true; + } - private boolean r_mark_sUn() { - // (, line 252 - // call check_vowel_harmony, line 253 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 254 - if (find_among_b(a_13, 4) == 0) - { - return false; - } - return true; - } + private boolean r_mark_nUz() { + // (, line 272 + // call check_vowel_harmony, line 273 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 274 + if (find_among_b(a_17, 4) == 0) + { + return false; + } + return true; + } - private boolean r_mark_yUz() { - // (, line 257 - // call check_vowel_harmony, line 258 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 259 - if (find_among_b(a_14, 4) == 0) - { - return false; - } - // (, line 260 - // call mark_suffix_with_optional_y_consonant, line 260 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } + private boolean r_mark_DUr() { + // (, line 277 + // call check_vowel_harmony, line 278 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 279 + if (find_among_b(a_18, 8) == 0) + { + return false; + } + return true; + } - private boolean r_mark_sUnUz() { - // (, line 263 - // among, line 264 - if (find_among_b(a_15, 4) == 0) - { - return false; - } - return true; - } + private boolean r_mark_cAsInA() { + // (, line 282 + // among, line 283 + if (find_among_b(a_19, 2) == 0) + { + return false; + } + return true; + } - private boolean r_mark_lAr() { - // (, line 267 - // call check_vowel_harmony, line 268 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 269 - if (find_among_b(a_16, 2) == 0) - { - return false; - } - return true; - } + private boolean r_mark_yDU() { + // (, line 286 + // call check_vowel_harmony, line 287 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 288 + if (find_among_b(a_20, 32) == 0) + { + return false; + } + // (, line 292 + // call mark_suffix_with_optional_y_consonant, line 292 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_nUz() { - // (, line 272 - // call check_vowel_harmony, line 273 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 274 - if (find_among_b(a_17, 4) == 0) - { - return false; - } - return true; - } + private boolean r_mark_ysA() { + // (, line 296 + // among, line 297 + if (find_among_b(a_21, 8) == 0) + { + return false; + } + // (, line 298 + // call mark_suffix_with_optional_y_consonant, line 298 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_DUr() { - // (, line 277 - // call check_vowel_harmony, line 278 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 279 - if (find_among_b(a_18, 8) == 0) - { - return false; - } - return true; - } + private boolean r_mark_ymUs_() { + // (, line 301 + // call check_vowel_harmony, line 302 + if (!r_check_vowel_harmony()) + { + return false; + } + // among, line 303 + if (find_among_b(a_22, 4) == 0) + { + return false; + } + // (, line 304 + // call mark_suffix_with_optional_y_consonant, line 304 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_cAsInA() { - // (, line 282 - // among, line 283 - if (find_among_b(a_19, 2) == 0) - { - return false; - } - return true; - } + private boolean r_mark_yken() { + // (, line 307 + // literal, line 308 + if (!(eq_s_b(3, "ken"))) + { + return false; + } + // (, line 308 + // call mark_suffix_with_optional_y_consonant, line 308 + if (!r_mark_suffix_with_optional_y_consonant()) + { + return false; + } + return true; + } - private boolean r_mark_yDU() { - // (, line 286 - // call check_vowel_harmony, line 287 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 288 - if (find_among_b(a_20, 32) == 0) - { - return false; - } - // (, line 292 - // call mark_suffix_with_optional_y_consonant, line 292 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } - - private boolean r_mark_ysA() { - // (, line 296 - // among, line 297 - if (find_among_b(a_21, 8) == 0) - { - return false; - } - // (, line 298 - // call mark_suffix_with_optional_y_consonant, line 298 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } - - private boolean r_mark_ymUs_() { - // (, line 301 - // call check_vowel_harmony, line 302 - if (!r_check_vowel_harmony()) - { - return false; - } - // among, line 303 - if (find_among_b(a_22, 4) == 0) - { - return false; - } - // (, line 304 - // call mark_suffix_with_optional_y_consonant, line 304 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } - - private boolean r_mark_yken() { - // (, line 307 - // literal, line 308 - if (!(eq_s_b(3, "ken"))) - { - return false; - } - // (, line 308 - // call mark_suffix_with_optional_y_consonant, line 308 - if (!r_mark_suffix_with_optional_y_consonant()) - { - return false; - } - return true; - } - - private boolean r_stem_nominal_verb_suffixes() { + private boolean r_stem_nominal_verb_suffixes() { int v_1; int v_2; int v_3; @@ -1278,351 +1285,351 @@ int v_8; int v_9; int v_10; - // (, line 311 - // [, line 312 - ket = cursor; - // set continue_stemming_noun_suffixes, line 313 - B_continue_stemming_noun_suffixes = true; - // or, line 315 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 314 - // or, line 314 - lab2: do { - v_2 = limit - cursor; - lab3: do { - // call mark_ymUs_, line 314 - if (!r_mark_ymUs_()) - { - break lab3; - } - break lab2; + // (, line 311 + // [, line 312 + ket = cursor; + // set continue_stemming_noun_suffixes, line 313 + B_continue_stemming_noun_suffixes = true; + // or, line 315 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 314 + // or, line 314 + lab2: do { + v_2 = limit - cursor; + lab3: do { + // call mark_ymUs_, line 314 + if (!r_mark_ymUs_()) + { + break lab3; + } + break lab2; + } while (false); + cursor = limit - v_2; + lab4: do { + // call mark_yDU, line 314 + if (!r_mark_yDU()) + { + break lab4; + } + break lab2; + } while (false); + cursor = limit - v_2; + lab5: do { + // call mark_ysA, line 314 + if (!r_mark_ysA()) + { + break lab5; + } + break lab2; + } while (false); + cursor = limit - v_2; + // call mark_yken, line 314 + if (!r_mark_yken()) + { + break lab1; + } + } while (false); + break lab0; } while (false); - cursor = limit - v_2; - lab4: do { - // call mark_yDU, line 314 - if (!r_mark_yDU()) + cursor = limit - v_1; + lab6: do { + // (, line 316 + // call mark_cAsInA, line 316 + if (!r_mark_cAsInA()) { - break lab4; + break lab6; } - break lab2; - } while (false); - cursor = limit - v_2; - lab5: do { - // call mark_ysA, line 314 - if (!r_mark_ysA()) + // (, line 316 + // or, line 316 + lab7: do { + v_3 = limit - cursor; + lab8: do { + // call mark_sUnUz, line 316 + if (!r_mark_sUnUz()) + { + break lab8; + } + break lab7; + } while (false); + cursor = limit - v_3; + lab9: do { + // call mark_lAr, line 316 + if (!r_mark_lAr()) + { + break lab9; + } + break lab7; + } while (false); + cursor = limit - v_3; + lab10: do { + // call mark_yUm, line 316 + if (!r_mark_yUm()) + { + break lab10; + } + break lab7; + } while (false); + cursor = limit - v_3; + lab11: do { + // call mark_sUn, line 316 + if (!r_mark_sUn()) + { + break lab11; + } + break lab7; + } while (false); + cursor = limit - v_3; + lab12: do { + // call mark_yUz, line 316 + if (!r_mark_yUz()) + { + break lab12; + } + break lab7; + } while (false); + cursor = limit - v_3; + } while (false); + // call mark_ymUs_, line 316 + if (!r_mark_ymUs_()) { - break lab5; + break lab6; } - break lab2; + break lab0; } while (false); - cursor = limit - v_2; - // call mark_yken, line 314 - if (!r_mark_yken()) - { - break lab1; - } - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab6: do { - // (, line 316 - // call mark_cAsInA, line 316 - if (!r_mark_cAsInA()) - { - break lab6; - } - // (, line 316 - // or, line 316 - lab7: do { - v_3 = limit - cursor; - lab8: do { - // call mark_sUnUz, line 316 - if (!r_mark_sUnUz()) - { - break lab8; - } - break lab7; - } while (false); - cursor = limit - v_3; - lab9: do { - // call mark_lAr, line 316 + cursor = limit - v_1; + lab13: do { + // (, line 318 + // call mark_lAr, line 319 if (!r_mark_lAr()) { - break lab9; + break lab13; } - break lab7; + // ], line 319 + bra = cursor; + // delete, line 319 + slice_del(); + // try, line 319 + v_4 = limit - cursor; + lab14: do { + // (, line 319 + // [, line 319 + ket = cursor; + // (, line 319 + // or, line 319 + lab15: do { + v_5 = limit - cursor; + lab16: do { + // call mark_DUr, line 319 + if (!r_mark_DUr()) + { + break lab16; + } + break lab15; + } while (false); + cursor = limit - v_5; + lab17: do { + // call mark_yDU, line 319 + if (!r_mark_yDU()) + { + break lab17; + } + break lab15; + } while (false); + cursor = limit - v_5; + lab18: do { + // call mark_ysA, line 319 + if (!r_mark_ysA()) + { + break lab18; + } + break lab15; + } while (false); + cursor = limit - v_5; + // call mark_ymUs_, line 319 + if (!r_mark_ymUs_()) + { + cursor = limit - v_4; + break lab14; + } + } while (false); + } while (false); + // unset continue_stemming_noun_suffixes, line 320 + B_continue_stemming_noun_suffixes = false; + break lab0; } while (false); - cursor = limit - v_3; - lab10: do { - // call mark_yUm, line 316 - if (!r_mark_yUm()) + cursor = limit - v_1; + lab19: do { + // (, line 323 + // call mark_nUz, line 323 + if (!r_mark_nUz()) { - break lab10; + break lab19; } - break lab7; - } while (false); - cursor = limit - v_3; - lab11: do { - // call mark_sUn, line 316 - if (!r_mark_sUn()) - { - break lab11; - } - break lab7; - } while (false); - cursor = limit - v_3; - lab12: do { - // call mark_yUz, line 316 - if (!r_mark_yUz()) - { - break lab12; - } - break lab7; - } while (false); - cursor = limit - v_3; - } while (false); - // call mark_ymUs_, line 316 - if (!r_mark_ymUs_()) - { - break lab6; - } - break lab0; - } while (false); - cursor = limit - v_1; - lab13: do { - // (, line 318 - // call mark_lAr, line 319 - if (!r_mark_lAr()) - { - break lab13; - } - // ], line 319 - bra = cursor; - // delete, line 319 - slice_del(); - // try, line 319 - v_4 = limit - cursor; - lab14: do { - // (, line 319 - // [, line 319 - ket = cursor; - // (, line 319 - // or, line 319 - lab15: do { - v_5 = limit - cursor; - lab16: do { - // call mark_DUr, line 319 - if (!r_mark_DUr()) + // (, line 323 + // or, line 323 + lab20: do { + v_6 = limit - cursor; + lab21: do { + // call mark_yDU, line 323 + if (!r_mark_yDU()) + { + break lab21; + } + break lab20; + } while (false); + cursor = limit - v_6; + // call mark_ysA, line 323 + if (!r_mark_ysA()) { - break lab16; + break lab19; } - break lab15; } while (false); - cursor = limit - v_5; - lab17: do { - // call mark_yDU, line 319 - if (!r_mark_yDU()) + break lab0; + } while (false); + cursor = limit - v_1; + lab22: do { + // (, line 325 + // (, line 325 + // or, line 325 + lab23: do { + v_7 = limit - cursor; + lab24: do { + // call mark_sUnUz, line 325 + if (!r_mark_sUnUz()) + { + break lab24; + } + break lab23; + } while (false); + cursor = limit - v_7; + lab25: do { + // call mark_yUz, line 325 + if (!r_mark_yUz()) + { + break lab25; + } + break lab23; + } while (false); + cursor = limit - v_7; + lab26: do { + // call mark_sUn, line 325 + if (!r_mark_sUn()) + { + break lab26; + } + break lab23; + } while (false); + cursor = limit - v_7; + // call mark_yUm, line 325 + if (!r_mark_yUm()) { - break lab17; + break lab22; } - break lab15; } while (false); - cursor = limit - v_5; - lab18: do { - // call mark_ysA, line 319 - if (!r_mark_ysA()) + // ], line 325 + bra = cursor; + // delete, line 325 + slice_del(); + // try, line 325 + v_8 = limit - cursor; + lab27: do { + // (, line 325 + // [, line 325 + ket = cursor; + // call mark_ymUs_, line 325 + if (!r_mark_ymUs_()) { - break lab18; + cursor = limit - v_8; + break lab27; } - break lab15; } while (false); - cursor = limit - v_5; - // call mark_ymUs_, line 319 - if (!r_mark_ymUs_()) - { - cursor = limit - v_4; - break lab14; - } + break lab0; } while (false); - } while (false); - // unset continue_stemming_noun_suffixes, line 320 - B_continue_stemming_noun_suffixes = false; - break lab0; - } while (false); - cursor = limit - v_1; - lab19: do { - // (, line 323 - // call mark_nUz, line 323 - if (!r_mark_nUz()) - { - break lab19; - } - // (, line 323 - // or, line 323 - lab20: do { - v_6 = limit - cursor; - lab21: do { - // call mark_yDU, line 323 - if (!r_mark_yDU()) - { - break lab21; - } - break lab20; - } while (false); - cursor = limit - v_6; - // call mark_ysA, line 323 - if (!r_mark_ysA()) + cursor = limit - v_1; + // (, line 327 + // call mark_DUr, line 327 + if (!r_mark_DUr()) { - break lab19; + return false; } - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab22: do { - // (, line 325 - // (, line 325 - // or, line 325 - lab23: do { - v_7 = limit - cursor; - lab24: do { - // call mark_sUnUz, line 325 - if (!r_mark_sUnUz()) + // ], line 327 + bra = cursor; + // delete, line 327 + slice_del(); + // try, line 327 + v_9 = limit - cursor; + lab28: do { + // (, line 327 + // [, line 327 + ket = cursor; + // (, line 327 + // or, line 327 + lab29: do { + v_10 = limit - cursor; + lab30: do { + // call mark_sUnUz, line 327 + if (!r_mark_sUnUz()) + { + break lab30; + } + break lab29; + } while (false); + cursor = limit - v_10; + lab31: do { + // call mark_lAr, line 327 + if (!r_mark_lAr()) + { + break lab31; + } + break lab29; + } while (false); + cursor = limit - v_10; + lab32: do { + // call mark_yUm, line 327 + if (!r_mark_yUm()) + { + break lab32; + } + break lab29; + } while (false); + cursor = limit - v_10; + lab33: do { + // call mark_sUn, line 327 + if (!r_mark_sUn()) + { + break lab33; + } + break lab29; + } while (false); + cursor = limit - v_10; + lab34: do { + // call mark_yUz, line 327 + if (!r_mark_yUz()) + { + break lab34; + } + break lab29; + } while (false); + cursor = limit - v_10; + } while (false); + // call mark_ymUs_, line 327 + if (!r_mark_ymUs_()) { - break lab24; + cursor = limit - v_9; + break lab28; } - break lab23; } while (false); - cursor = limit - v_7; - lab25: do { - // call mark_yUz, line 325 - if (!r_mark_yUz()) - { - break lab25; - } - break lab23; - } while (false); - cursor = limit - v_7; - lab26: do { - // call mark_sUn, line 325 - if (!r_mark_sUn()) - { - break lab26; - } - break lab23; - } while (false); - cursor = limit - v_7; - // call mark_yUm, line 325 - if (!r_mark_yUm()) - { - break lab22; - } } while (false); - // ], line 325 + // ], line 328 bra = cursor; - // delete, line 325 + // delete, line 328 slice_del(); - // try, line 325 - v_8 = limit - cursor; - lab27: do { - // (, line 325 - // [, line 325 - ket = cursor; - // call mark_ymUs_, line 325 - if (!r_mark_ymUs_()) - { - cursor = limit - v_8; - break lab27; - } - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 327 - // call mark_DUr, line 327 - if (!r_mark_DUr()) - { - return false; + return true; } - // ], line 327 - bra = cursor; - // delete, line 327 - slice_del(); - // try, line 327 - v_9 = limit - cursor; - lab28: do { - // (, line 327 - // [, line 327 - ket = cursor; - // (, line 327 - // or, line 327 - lab29: do { - v_10 = limit - cursor; - lab30: do { - // call mark_sUnUz, line 327 - if (!r_mark_sUnUz()) - { - break lab30; - } - break lab29; - } while (false); - cursor = limit - v_10; - lab31: do { - // call mark_lAr, line 327 - if (!r_mark_lAr()) - { - break lab31; - } - break lab29; - } while (false); - cursor = limit - v_10; - lab32: do { - // call mark_yUm, line 327 - if (!r_mark_yUm()) - { - break lab32; - } - break lab29; - } while (false); - cursor = limit - v_10; - lab33: do { - // call mark_sUn, line 327 - if (!r_mark_sUn()) - { - break lab33; - } - break lab29; - } while (false); - cursor = limit - v_10; - lab34: do { - // call mark_yUz, line 327 - if (!r_mark_yUz()) - { - break lab34; - } - break lab29; - } while (false); - cursor = limit - v_10; - } while (false); - // call mark_ymUs_, line 327 - if (!r_mark_ymUs_()) - { - cursor = limit - v_9; - break lab28; - } - } while (false); - } while (false); - // ], line 328 - bra = cursor; - // delete, line 328 - slice_del(); - return true; - } - private boolean r_stem_suffix_chain_before_ki() { + private boolean r_stem_suffix_chain_before_ki() { int v_1; int v_2; int v_3; @@ -1634,274 +1641,274 @@ int v_9; int v_10; int v_11; - // (, line 332 - // [, line 333 - ket = cursor; - // call mark_ki, line 334 - if (!r_mark_ki()) - { - return false; - } - // (, line 335 - // or, line 342 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 336 - // call mark_DA, line 336 - if (!r_mark_DA()) + // (, line 332 + // [, line 333 + ket = cursor; + // call mark_ki, line 334 + if (!r_mark_ki()) { - break lab1; + return false; } - // ], line 336 - bra = cursor; - // delete, line 336 - slice_del(); - // try, line 336 - v_2 = limit - cursor; - lab2: do { - // (, line 336 - // [, line 336 - ket = cursor; - // or, line 338 - lab3: do { - v_3 = limit - cursor; - lab4: do { - // (, line 337 - // call mark_lAr, line 337 - if (!r_mark_lAr()) - { - break lab4; - } - // ], line 337 - bra = cursor; - // delete, line 337 - slice_del(); - // try, line 337 - v_4 = limit - cursor; - lab5: do { - // (, line 337 - // call stem_suffix_chain_before_ki, line 337 - if (!r_stem_suffix_chain_before_ki()) + // (, line 335 + // or, line 342 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 336 + // call mark_DA, line 336 + if (!r_mark_DA()) + { + break lab1; + } + // ], line 336 + bra = cursor; + // delete, line 336 + slice_del(); + // try, line 336 + v_2 = limit - cursor; + lab2: do { + // (, line 336 + // [, line 336 + ket = cursor; + // or, line 338 + lab3: do { + v_3 = limit - cursor; + lab4: do { + // (, line 337 + // call mark_lAr, line 337 + if (!r_mark_lAr()) + { + break lab4; + } + // ], line 337 + bra = cursor; + // delete, line 337 + slice_del(); + // try, line 337 + v_4 = limit - cursor; + lab5: do { + // (, line 337 + // call stem_suffix_chain_before_ki, line 337 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_4; + break lab5; + } + } while (false); + break lab3; + } while (false); + cursor = limit - v_3; + // (, line 339 + // call mark_possessives, line 339 + if (!r_mark_possessives()) { - cursor = limit - v_4; - break lab5; + cursor = limit - v_2; + break lab2; } + // ], line 339 + bra = cursor; + // delete, line 339 + slice_del(); + // try, line 339 + v_5 = limit - cursor; + lab6: do { + // (, line 339 + // [, line 339 + ket = cursor; + // call mark_lAr, line 339 + if (!r_mark_lAr()) + { + cursor = limit - v_5; + break lab6; + } + // ], line 339 + bra = cursor; + // delete, line 339 + slice_del(); + // call stem_suffix_chain_before_ki, line 339 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_5; + break lab6; + } + } while (false); } while (false); - break lab3; } while (false); - cursor = limit - v_3; - // (, line 339 - // call mark_possessives, line 339 - if (!r_mark_possessives()) + break lab0; + } while (false); + cursor = limit - v_1; + lab7: do { + // (, line 343 + // call mark_nUn, line 343 + if (!r_mark_nUn()) { - cursor = limit - v_2; - break lab2; + break lab7; } - // ], line 339 + // ], line 343 bra = cursor; - // delete, line 339 + // delete, line 343 slice_del(); - // try, line 339 - v_5 = limit - cursor; - lab6: do { - // (, line 339 - // [, line 339 + // try, line 343 + v_6 = limit - cursor; + lab8: do { + // (, line 343 + // [, line 343 ket = cursor; - // call mark_lAr, line 339 - if (!r_mark_lAr()) - { - cursor = limit - v_5; - break lab6; - } - // ], line 339 - bra = cursor; - // delete, line 339 - slice_del(); - // call stem_suffix_chain_before_ki, line 339 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_5; - break lab6; - } + // or, line 345 + lab9: do { + v_7 = limit - cursor; + lab10: do { + // (, line 344 + // call mark_lArI, line 344 + if (!r_mark_lArI()) + { + break lab10; + } + // ], line 344 + bra = cursor; + // delete, line 344 + slice_del(); + break lab9; + } while (false); + cursor = limit - v_7; + lab11: do { + // (, line 346 + // [, line 346 + ket = cursor; + // or, line 346 + lab12: do { + v_8 = limit - cursor; + lab13: do { + // call mark_possessives, line 346 + if (!r_mark_possessives()) + { + break lab13; + } + break lab12; + } while (false); + cursor = limit - v_8; + // call mark_sU, line 346 + if (!r_mark_sU()) + { + break lab11; + } + } while (false); + // ], line 346 + bra = cursor; + // delete, line 346 + slice_del(); + // try, line 346 + v_9 = limit - cursor; + lab14: do { + // (, line 346 + // [, line 346 + ket = cursor; + // call mark_lAr, line 346 + if (!r_mark_lAr()) + { + cursor = limit - v_9; + break lab14; + } + // ], line 346 + bra = cursor; + // delete, line 346 + slice_del(); + // call stem_suffix_chain_before_ki, line 346 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_9; + break lab14; + } + } while (false); + break lab9; + } while (false); + cursor = limit - v_7; + // (, line 348 + // call stem_suffix_chain_before_ki, line 348 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_6; + break lab8; + } + } while (false); } while (false); + break lab0; } while (false); - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab7: do { - // (, line 343 - // call mark_nUn, line 343 - if (!r_mark_nUn()) - { - break lab7; - } - // ], line 343 - bra = cursor; - // delete, line 343 - slice_del(); - // try, line 343 - v_6 = limit - cursor; - lab8: do { - // (, line 343 - // [, line 343 - ket = cursor; - // or, line 345 - lab9: do { - v_7 = limit - cursor; - lab10: do { - // (, line 344 - // call mark_lArI, line 344 + cursor = limit - v_1; + // (, line 351 + // call mark_ndA, line 351 + if (!r_mark_ndA()) + { + return false; + } + // (, line 351 + // or, line 353 + lab15: do { + v_10 = limit - cursor; + lab16: do { + // (, line 352 + // call mark_lArI, line 352 if (!r_mark_lArI()) { - break lab10; + break lab16; } - // ], line 344 + // ], line 352 bra = cursor; - // delete, line 344 + // delete, line 352 slice_del(); - break lab9; + break lab15; } while (false); - cursor = limit - v_7; - lab11: do { - // (, line 346 - // [, line 346 - ket = cursor; - // or, line 346 - lab12: do { - v_8 = limit - cursor; - lab13: do { - // call mark_possessives, line 346 - if (!r_mark_possessives()) - { - break lab13; - } - break lab12; - } while (false); - cursor = limit - v_8; - // call mark_sU, line 346 - if (!r_mark_sU()) - { - break lab11; - } - } while (false); - // ], line 346 + cursor = limit - v_10; + lab17: do { + // (, line 354 + // (, line 354 + // call mark_sU, line 354 + if (!r_mark_sU()) + { + break lab17; + } + // ], line 354 bra = cursor; - // delete, line 346 + // delete, line 354 slice_del(); - // try, line 346 - v_9 = limit - cursor; - lab14: do { - // (, line 346 - // [, line 346 + // try, line 354 + v_11 = limit - cursor; + lab18: do { + // (, line 354 + // [, line 354 ket = cursor; - // call mark_lAr, line 346 + // call mark_lAr, line 354 if (!r_mark_lAr()) { - cursor = limit - v_9; - break lab14; + cursor = limit - v_11; + break lab18; } - // ], line 346 + // ], line 354 bra = cursor; - // delete, line 346 + // delete, line 354 slice_del(); - // call stem_suffix_chain_before_ki, line 346 + // call stem_suffix_chain_before_ki, line 354 if (!r_stem_suffix_chain_before_ki()) { - cursor = limit - v_9; - break lab14; + cursor = limit - v_11; + break lab18; } } while (false); - break lab9; + break lab15; } while (false); - cursor = limit - v_7; - // (, line 348 - // call stem_suffix_chain_before_ki, line 348 + cursor = limit - v_10; + // (, line 356 + // call stem_suffix_chain_before_ki, line 356 if (!r_stem_suffix_chain_before_ki()) { - cursor = limit - v_6; - break lab8; + return false; } } while (false); } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 351 - // call mark_ndA, line 351 - if (!r_mark_ndA()) - { - return false; + return true; } - // (, line 351 - // or, line 353 - lab15: do { - v_10 = limit - cursor; - lab16: do { - // (, line 352 - // call mark_lArI, line 352 - if (!r_mark_lArI()) - { - break lab16; - } - // ], line 352 - bra = cursor; - // delete, line 352 - slice_del(); - break lab15; - } while (false); - cursor = limit - v_10; - lab17: do { - // (, line 354 - // (, line 354 - // call mark_sU, line 354 - if (!r_mark_sU()) - { - break lab17; - } - // ], line 354 - bra = cursor; - // delete, line 354 - slice_del(); - // try, line 354 - v_11 = limit - cursor; - lab18: do { - // (, line 354 - // [, line 354 - ket = cursor; - // call mark_lAr, line 354 - if (!r_mark_lAr()) - { - cursor = limit - v_11; - break lab18; - } - // ], line 354 - bra = cursor; - // delete, line 354 - slice_del(); - // call stem_suffix_chain_before_ki, line 354 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_11; - break lab18; - } - } while (false); - break lab15; - } while (false); - cursor = limit - v_10; - // (, line 356 - // call stem_suffix_chain_before_ki, line 356 - if (!r_stem_suffix_chain_before_ki()) - { - return false; - } - } while (false); - } while (false); - return true; - } - private boolean r_stem_noun_suffixes() { + private boolean r_stem_noun_suffixes() { int v_1; int v_2; int v_3; @@ -1929,739 +1936,739 @@ int v_25; int v_26; int v_27; - // (, line 361 - // or, line 363 - lab0: do { - v_1 = limit - cursor; - lab1: do { - // (, line 362 - // [, line 362 - ket = cursor; - // call mark_lAr, line 362 - if (!r_mark_lAr()) - { - break lab1; - } - // ], line 362 - bra = cursor; - // delete, line 362 - slice_del(); - // try, line 362 - v_2 = limit - cursor; - lab2: do { - // (, line 362 - // call stem_suffix_chain_before_ki, line 362 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_2; - break lab2; - } - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab3: do { - // (, line 364 - // [, line 364 - ket = cursor; - // call mark_ncA, line 364 - if (!r_mark_ncA()) - { - break lab3; - } - // ], line 364 - bra = cursor; - // delete, line 364 - slice_del(); - // try, line 365 - v_3 = limit - cursor; - lab4: do { - // (, line 365 - // or, line 367 - lab5: do { - v_4 = limit - cursor; - lab6: do { - // (, line 366 - // [, line 366 - ket = cursor; - // call mark_lArI, line 366 - if (!r_mark_lArI()) + // (, line 361 + // or, line 363 + lab0: do { + v_1 = limit - cursor; + lab1: do { + // (, line 362 + // [, line 362 + ket = cursor; + // call mark_lAr, line 362 + if (!r_mark_lAr()) + { + break lab1; + } + // ], line 362 + bra = cursor; + // delete, line 362 + slice_del(); + // try, line 362 + v_2 = limit - cursor; + lab2: do { + // (, line 362 + // call stem_suffix_chain_before_ki, line 362 + if (!r_stem_suffix_chain_before_ki()) { - break lab6; + cursor = limit - v_2; + break lab2; } - // ], line 366 - bra = cursor; - // delete, line 366 - slice_del(); - break lab5; } while (false); - cursor = limit - v_4; - lab7: do { - // (, line 368 - // [, line 368 - ket = cursor; - // or, line 368 - lab8: do { - v_5 = limit - cursor; - lab9: do { - // call mark_possessives, line 368 - if (!r_mark_possessives()) + break lab0; + } while (false); + cursor = limit - v_1; + lab3: do { + // (, line 364 + // [, line 364 + ket = cursor; + // call mark_ncA, line 364 + if (!r_mark_ncA()) + { + break lab3; + } + // ], line 364 + bra = cursor; + // delete, line 364 + slice_del(); + // try, line 365 + v_3 = limit - cursor; + lab4: do { + // (, line 365 + // or, line 367 + lab5: do { + v_4 = limit - cursor; + lab6: do { + // (, line 366 + // [, line 366 + ket = cursor; + // call mark_lArI, line 366 + if (!r_mark_lArI()) { - break lab9; + break lab6; } - break lab8; + // ], line 366 + bra = cursor; + // delete, line 366 + slice_del(); + break lab5; } while (false); - cursor = limit - v_5; - // call mark_sU, line 368 - if (!r_mark_sU()) - { - break lab7; - } - } while (false); - // ], line 368 - bra = cursor; - // delete, line 368 - slice_del(); - // try, line 368 - v_6 = limit - cursor; - lab10: do { - // (, line 368 - // [, line 368 + cursor = limit - v_4; + lab7: do { + // (, line 368 + // [, line 368 + ket = cursor; + // or, line 368 + lab8: do { + v_5 = limit - cursor; + lab9: do { + // call mark_possessives, line 368 + if (!r_mark_possessives()) + { + break lab9; + } + break lab8; + } while (false); + cursor = limit - v_5; + // call mark_sU, line 368 + if (!r_mark_sU()) + { + break lab7; + } + } while (false); + // ], line 368 + bra = cursor; + // delete, line 368 + slice_del(); + // try, line 368 + v_6 = limit - cursor; + lab10: do { + // (, line 368 + // [, line 368 + ket = cursor; + // call mark_lAr, line 368 + if (!r_mark_lAr()) + { + cursor = limit - v_6; + break lab10; + } + // ], line 368 + bra = cursor; + // delete, line 368 + slice_del(); + // call stem_suffix_chain_before_ki, line 368 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_6; + break lab10; + } + } while (false); + break lab5; + } while (false); + cursor = limit - v_4; + // (, line 370 + // [, line 370 ket = cursor; - // call mark_lAr, line 368 + // call mark_lAr, line 370 if (!r_mark_lAr()) { - cursor = limit - v_6; - break lab10; + cursor = limit - v_3; + break lab4; } - // ], line 368 + // ], line 370 bra = cursor; - // delete, line 368 + // delete, line 370 slice_del(); - // call stem_suffix_chain_before_ki, line 368 + // call stem_suffix_chain_before_ki, line 370 if (!r_stem_suffix_chain_before_ki()) { - cursor = limit - v_6; - break lab10; + cursor = limit - v_3; + break lab4; } } while (false); - break lab5; } while (false); - cursor = limit - v_4; - // (, line 370 - // [, line 370 - ket = cursor; - // call mark_lAr, line 370 - if (!r_mark_lAr()) - { - cursor = limit - v_3; - break lab4; - } - // ], line 370 - bra = cursor; - // delete, line 370 - slice_del(); - // call stem_suffix_chain_before_ki, line 370 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_3; - break lab4; - } + break lab0; } while (false); - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab11: do { - // (, line 374 - // [, line 374 - ket = cursor; - // (, line 374 - // or, line 374 - lab12: do { - v_7 = limit - cursor; - lab13: do { - // call mark_ndA, line 374 - if (!r_mark_ndA()) - { - break lab13; - } - break lab12; - } while (false); - cursor = limit - v_7; - // call mark_nA, line 374 - if (!r_mark_nA()) - { - break lab11; - } - } while (false); - // (, line 375 - // or, line 377 - lab14: do { - v_8 = limit - cursor; - lab15: do { - // (, line 376 - // call mark_lArI, line 376 - if (!r_mark_lArI()) - { - break lab15; - } - // ], line 376 - bra = cursor; - // delete, line 376 - slice_del(); - break lab14; - } while (false); - cursor = limit - v_8; - lab16: do { - // (, line 378 - // call mark_sU, line 378 - if (!r_mark_sU()) - { - break lab16; - } - // ], line 378 - bra = cursor; - // delete, line 378 - slice_del(); - // try, line 378 - v_9 = limit - cursor; - lab17: do { - // (, line 378 - // [, line 378 - ket = cursor; - // call mark_lAr, line 378 - if (!r_mark_lAr()) + cursor = limit - v_1; + lab11: do { + // (, line 374 + // [, line 374 + ket = cursor; + // (, line 374 + // or, line 374 + lab12: do { + v_7 = limit - cursor; + lab13: do { + // call mark_ndA, line 374 + if (!r_mark_ndA()) + { + break lab13; + } + break lab12; + } while (false); + cursor = limit - v_7; + // call mark_nA, line 374 + if (!r_mark_nA()) { - cursor = limit - v_9; - break lab17; + break lab11; } - // ], line 378 - bra = cursor; - // delete, line 378 - slice_del(); - // call stem_suffix_chain_before_ki, line 378 + } while (false); + // (, line 375 + // or, line 377 + lab14: do { + v_8 = limit - cursor; + lab15: do { + // (, line 376 + // call mark_lArI, line 376 + if (!r_mark_lArI()) + { + break lab15; + } + // ], line 376 + bra = cursor; + // delete, line 376 + slice_del(); + break lab14; + } while (false); + cursor = limit - v_8; + lab16: do { + // (, line 378 + // call mark_sU, line 378 + if (!r_mark_sU()) + { + break lab16; + } + // ], line 378 + bra = cursor; + // delete, line 378 + slice_del(); + // try, line 378 + v_9 = limit - cursor; + lab17: do { + // (, line 378 + // [, line 378 + ket = cursor; + // call mark_lAr, line 378 + if (!r_mark_lAr()) + { + cursor = limit - v_9; + break lab17; + } + // ], line 378 + bra = cursor; + // delete, line 378 + slice_del(); + // call stem_suffix_chain_before_ki, line 378 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_9; + break lab17; + } + } while (false); + break lab14; + } while (false); + cursor = limit - v_8; + // (, line 380 + // call stem_suffix_chain_before_ki, line 380 if (!r_stem_suffix_chain_before_ki()) { - cursor = limit - v_9; - break lab17; + break lab11; } } while (false); - break lab14; + break lab0; } while (false); - cursor = limit - v_8; - // (, line 380 - // call stem_suffix_chain_before_ki, line 380 - if (!r_stem_suffix_chain_before_ki()) - { - break lab11; - } - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab18: do { - // (, line 384 - // [, line 384 - ket = cursor; - // (, line 384 - // or, line 384 - lab19: do { - v_10 = limit - cursor; - lab20: do { - // call mark_ndAn, line 384 - if (!r_mark_ndAn()) - { - break lab20; - } - break lab19; - } while (false); - cursor = limit - v_10; - // call mark_nU, line 384 - if (!r_mark_nU()) - { - break lab18; - } - } while (false); - // (, line 384 - // or, line 384 - lab21: do { - v_11 = limit - cursor; - lab22: do { + cursor = limit - v_1; + lab18: do { // (, line 384 - // call mark_sU, line 384 - if (!r_mark_sU()) - { - break lab22; - } - // ], line 384 - bra = cursor; - // delete, line 384 - slice_del(); - // try, line 384 - v_12 = limit - cursor; - lab23: do { - // (, line 384 - // [, line 384 - ket = cursor; - // call mark_lAr, line 384 - if (!r_mark_lAr()) + // [, line 384 + ket = cursor; + // (, line 384 + // or, line 384 + lab19: do { + v_10 = limit - cursor; + lab20: do { + // call mark_ndAn, line 384 + if (!r_mark_ndAn()) + { + break lab20; + } + break lab19; + } while (false); + cursor = limit - v_10; + // call mark_nU, line 384 + if (!r_mark_nU()) { - cursor = limit - v_12; - break lab23; + break lab18; } - // ], line 384 - bra = cursor; - // delete, line 384 - slice_del(); - // call stem_suffix_chain_before_ki, line 384 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_12; - break lab23; - } } while (false); - break lab21; - } while (false); - cursor = limit - v_11; - // (, line 384 - // call mark_lArI, line 384 - if (!r_mark_lArI()) - { - break lab18; - } - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab24: do { - // (, line 386 - // [, line 386 - ket = cursor; - // call mark_DAn, line 386 - if (!r_mark_DAn()) - { - break lab24; - } - // ], line 386 - bra = cursor; - // delete, line 386 - slice_del(); - // try, line 386 - v_13 = limit - cursor; - lab25: do { - // (, line 386 - // [, line 386 - ket = cursor; - // (, line 387 - // or, line 389 - lab26: do { - v_14 = limit - cursor; - lab27: do { - // (, line 388 - // call mark_possessives, line 388 - if (!r_mark_possessives()) - { - break lab27; - } - // ], line 388 - bra = cursor; - // delete, line 388 - slice_del(); - // try, line 388 - v_15 = limit - cursor; - lab28: do { - // (, line 388 - // [, line 388 - ket = cursor; - // call mark_lAr, line 388 - if (!r_mark_lAr()) + // (, line 384 + // or, line 384 + lab21: do { + v_11 = limit - cursor; + lab22: do { + // (, line 384 + // call mark_sU, line 384 + if (!r_mark_sU()) { - cursor = limit - v_15; - break lab28; + break lab22; } - // ], line 388 + // ], line 384 bra = cursor; - // delete, line 388 + // delete, line 384 slice_del(); - // call stem_suffix_chain_before_ki, line 388 + // try, line 384 + v_12 = limit - cursor; + lab23: do { + // (, line 384 + // [, line 384 + ket = cursor; + // call mark_lAr, line 384 + if (!r_mark_lAr()) + { + cursor = limit - v_12; + break lab23; + } + // ], line 384 + bra = cursor; + // delete, line 384 + slice_del(); + // call stem_suffix_chain_before_ki, line 384 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_12; + break lab23; + } + } while (false); + break lab21; + } while (false); + cursor = limit - v_11; + // (, line 384 + // call mark_lArI, line 384 + if (!r_mark_lArI()) + { + break lab18; + } + } while (false); + break lab0; + } while (false); + cursor = limit - v_1; + lab24: do { + // (, line 386 + // [, line 386 + ket = cursor; + // call mark_DAn, line 386 + if (!r_mark_DAn()) + { + break lab24; + } + // ], line 386 + bra = cursor; + // delete, line 386 + slice_del(); + // try, line 386 + v_13 = limit - cursor; + lab25: do { + // (, line 386 + // [, line 386 + ket = cursor; + // (, line 387 + // or, line 389 + lab26: do { + v_14 = limit - cursor; + lab27: do { + // (, line 388 + // call mark_possessives, line 388 + if (!r_mark_possessives()) + { + break lab27; + } + // ], line 388 + bra = cursor; + // delete, line 388 + slice_del(); + // try, line 388 + v_15 = limit - cursor; + lab28: do { + // (, line 388 + // [, line 388 + ket = cursor; + // call mark_lAr, line 388 + if (!r_mark_lAr()) + { + cursor = limit - v_15; + break lab28; + } + // ], line 388 + bra = cursor; + // delete, line 388 + slice_del(); + // call stem_suffix_chain_before_ki, line 388 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_15; + break lab28; + } + } while (false); + break lab26; + } while (false); + cursor = limit - v_14; + lab29: do { + // (, line 390 + // call mark_lAr, line 390 + if (!r_mark_lAr()) + { + break lab29; + } + // ], line 390 + bra = cursor; + // delete, line 390 + slice_del(); + // try, line 390 + v_16 = limit - cursor; + lab30: do { + // (, line 390 + // call stem_suffix_chain_before_ki, line 390 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_16; + break lab30; + } + } while (false); + break lab26; + } while (false); + cursor = limit - v_14; + // (, line 392 + // call stem_suffix_chain_before_ki, line 392 if (!r_stem_suffix_chain_before_ki()) { - cursor = limit - v_15; - break lab28; + cursor = limit - v_13; + break lab25; } } while (false); - break lab26; } while (false); - cursor = limit - v_14; - lab29: do { - // (, line 390 - // call mark_lAr, line 390 - if (!r_mark_lAr()) + break lab0; + } while (false); + cursor = limit - v_1; + lab31: do { + // (, line 396 + // [, line 396 + ket = cursor; + // or, line 396 + lab32: do { + v_17 = limit - cursor; + lab33: do { + // call mark_nUn, line 396 + if (!r_mark_nUn()) + { + break lab33; + } + break lab32; + } while (false); + cursor = limit - v_17; + // call mark_ylA, line 396 + if (!r_mark_ylA()) { - break lab29; + break lab31; } - // ], line 390 - bra = cursor; - // delete, line 390 - slice_del(); - // try, line 390 - v_16 = limit - cursor; - lab30: do { - // (, line 390 - // call stem_suffix_chain_before_ki, line 390 + } while (false); + // ], line 396 + bra = cursor; + // delete, line 396 + slice_del(); + // try, line 397 + v_18 = limit - cursor; + lab34: do { + // (, line 397 + // or, line 399 + lab35: do { + v_19 = limit - cursor; + lab36: do { + // (, line 398 + // [, line 398 + ket = cursor; + // call mark_lAr, line 398 + if (!r_mark_lAr()) + { + break lab36; + } + // ], line 398 + bra = cursor; + // delete, line 398 + slice_del(); + // call stem_suffix_chain_before_ki, line 398 + if (!r_stem_suffix_chain_before_ki()) + { + break lab36; + } + break lab35; + } while (false); + cursor = limit - v_19; + lab37: do { + // (, line 400 + // [, line 400 + ket = cursor; + // or, line 400 + lab38: do { + v_20 = limit - cursor; + lab39: do { + // call mark_possessives, line 400 + if (!r_mark_possessives()) + { + break lab39; + } + break lab38; + } while (false); + cursor = limit - v_20; + // call mark_sU, line 400 + if (!r_mark_sU()) + { + break lab37; + } + } while (false); + // ], line 400 + bra = cursor; + // delete, line 400 + slice_del(); + // try, line 400 + v_21 = limit - cursor; + lab40: do { + // (, line 400 + // [, line 400 + ket = cursor; + // call mark_lAr, line 400 + if (!r_mark_lAr()) + { + cursor = limit - v_21; + break lab40; + } + // ], line 400 + bra = cursor; + // delete, line 400 + slice_del(); + // call stem_suffix_chain_before_ki, line 400 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_21; + break lab40; + } + } while (false); + break lab35; + } while (false); + cursor = limit - v_19; + // call stem_suffix_chain_before_ki, line 402 if (!r_stem_suffix_chain_before_ki()) { - cursor = limit - v_16; - break lab30; + cursor = limit - v_18; + break lab34; } } while (false); - break lab26; } while (false); - cursor = limit - v_14; - // (, line 392 - // call stem_suffix_chain_before_ki, line 392 - if (!r_stem_suffix_chain_before_ki()) + break lab0; + } while (false); + cursor = limit - v_1; + lab41: do { + // (, line 406 + // [, line 406 + ket = cursor; + // call mark_lArI, line 406 + if (!r_mark_lArI()) { - cursor = limit - v_13; - break lab25; + break lab41; } + // ], line 406 + bra = cursor; + // delete, line 406 + slice_del(); + break lab0; } while (false); - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab31: do { - // (, line 396 - // [, line 396 - ket = cursor; - // or, line 396 - lab32: do { - v_17 = limit - cursor; - lab33: do { - // call mark_nUn, line 396 - if (!r_mark_nUn()) + cursor = limit - v_1; + lab42: do { + // (, line 408 + // call stem_suffix_chain_before_ki, line 408 + if (!r_stem_suffix_chain_before_ki()) { - break lab33; + break lab42; } - break lab32; + break lab0; } while (false); - cursor = limit - v_17; - // call mark_ylA, line 396 - if (!r_mark_ylA()) - { - break lab31; - } - } while (false); - // ], line 396 - bra = cursor; - // delete, line 396 - slice_del(); - // try, line 397 - v_18 = limit - cursor; - lab34: do { - // (, line 397 - // or, line 399 - lab35: do { - v_19 = limit - cursor; - lab36: do { - // (, line 398 - // [, line 398 - ket = cursor; - // call mark_lAr, line 398 - if (!r_mark_lAr()) + cursor = limit - v_1; + lab43: do { + // (, line 410 + // [, line 410 + ket = cursor; + // or, line 410 + lab44: do { + v_22 = limit - cursor; + lab45: do { + // call mark_DA, line 410 + if (!r_mark_DA()) + { + break lab45; + } + break lab44; + } while (false); + cursor = limit - v_22; + lab46: do { + // call mark_yU, line 410 + if (!r_mark_yU()) + { + break lab46; + } + break lab44; + } while (false); + cursor = limit - v_22; + // call mark_yA, line 410 + if (!r_mark_yA()) { - break lab36; + break lab43; } - // ], line 398 - bra = cursor; - // delete, line 398 - slice_del(); - // call stem_suffix_chain_before_ki, line 398 - if (!r_stem_suffix_chain_before_ki()) - { - break lab36; - } - break lab35; } while (false); - cursor = limit - v_19; - lab37: do { - // (, line 400 - // [, line 400 + // ], line 410 + bra = cursor; + // delete, line 410 + slice_del(); + // try, line 410 + v_23 = limit - cursor; + lab47: do { + // (, line 410 + // [, line 410 ket = cursor; - // or, line 400 - lab38: do { - v_20 = limit - cursor; - lab39: do { - // call mark_possessives, line 400 + // (, line 410 + // or, line 410 + lab48: do { + v_24 = limit - cursor; + lab49: do { + // (, line 410 + // call mark_possessives, line 410 if (!r_mark_possessives()) { - break lab39; + break lab49; } - break lab38; + // ], line 410 + bra = cursor; + // delete, line 410 + slice_del(); + // try, line 410 + v_25 = limit - cursor; + lab50: do { + // (, line 410 + // [, line 410 + ket = cursor; + // call mark_lAr, line 410 + if (!r_mark_lAr()) + { + cursor = limit - v_25; + break lab50; + } + } while (false); + break lab48; } while (false); - cursor = limit - v_20; - // call mark_sU, line 400 - if (!r_mark_sU()) + cursor = limit - v_24; + // call mark_lAr, line 410 + if (!r_mark_lAr()) { - break lab37; + cursor = limit - v_23; + break lab47; } } while (false); - // ], line 400 + // ], line 410 bra = cursor; - // delete, line 400 + // delete, line 410 slice_del(); - // try, line 400 - v_21 = limit - cursor; - lab40: do { - // (, line 400 - // [, line 400 - ket = cursor; - // call mark_lAr, line 400 - if (!r_mark_lAr()) - { - cursor = limit - v_21; - break lab40; - } - // ], line 400 - bra = cursor; - // delete, line 400 - slice_del(); - // call stem_suffix_chain_before_ki, line 400 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_21; - break lab40; - } - } while (false); - break lab35; + // [, line 410 + ket = cursor; + // call stem_suffix_chain_before_ki, line 410 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_23; + break lab47; + } } while (false); - cursor = limit - v_19; - // call stem_suffix_chain_before_ki, line 402 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_18; - break lab34; - } + break lab0; } while (false); - } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - lab41: do { - // (, line 406 - // [, line 406 - ket = cursor; - // call mark_lArI, line 406 - if (!r_mark_lArI()) - { - break lab41; - } - // ], line 406 - bra = cursor; - // delete, line 406 - slice_del(); - break lab0; - } while (false); - cursor = limit - v_1; - lab42: do { - // (, line 408 - // call stem_suffix_chain_before_ki, line 408 - if (!r_stem_suffix_chain_before_ki()) - { - break lab42; - } - break lab0; - } while (false); - cursor = limit - v_1; - lab43: do { - // (, line 410 - // [, line 410 - ket = cursor; - // or, line 410 - lab44: do { - v_22 = limit - cursor; - lab45: do { - // call mark_DA, line 410 - if (!r_mark_DA()) - { - break lab45; - } - break lab44; - } while (false); - cursor = limit - v_22; - lab46: do { - // call mark_yU, line 410 - if (!r_mark_yU()) - { - break lab46; - } - break lab44; - } while (false); - cursor = limit - v_22; - // call mark_yA, line 410 - if (!r_mark_yA()) - { - break lab43; - } - } while (false); - // ], line 410 - bra = cursor; - // delete, line 410 - slice_del(); - // try, line 410 - v_23 = limit - cursor; - lab47: do { - // (, line 410 - // [, line 410 + cursor = limit - v_1; + // (, line 412 + // [, line 412 ket = cursor; - // (, line 410 - // or, line 410 - lab48: do { - v_24 = limit - cursor; - lab49: do { - // (, line 410 - // call mark_possessives, line 410 + // or, line 412 + lab51: do { + v_26 = limit - cursor; + lab52: do { + // call mark_possessives, line 412 if (!r_mark_possessives()) { - break lab49; + break lab52; } - // ], line 410 - bra = cursor; - // delete, line 410 - slice_del(); - // try, line 410 - v_25 = limit - cursor; - lab50: do { - // (, line 410 - // [, line 410 - ket = cursor; - // call mark_lAr, line 410 - if (!r_mark_lAr()) - { - cursor = limit - v_25; - break lab50; - } - } while (false); - break lab48; + break lab51; } while (false); - cursor = limit - v_24; - // call mark_lAr, line 410 - if (!r_mark_lAr()) + cursor = limit - v_26; + // call mark_sU, line 412 + if (!r_mark_sU()) { - cursor = limit - v_23; - break lab47; + return false; } } while (false); - // ], line 410 + // ], line 412 bra = cursor; - // delete, line 410 + // delete, line 412 slice_del(); - // [, line 410 - ket = cursor; - // call stem_suffix_chain_before_ki, line 410 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_23; - break lab47; - } + // try, line 412 + v_27 = limit - cursor; + lab53: do { + // (, line 412 + // [, line 412 + ket = cursor; + // call mark_lAr, line 412 + if (!r_mark_lAr()) + { + cursor = limit - v_27; + break lab53; + } + // ], line 412 + bra = cursor; + // delete, line 412 + slice_del(); + // call stem_suffix_chain_before_ki, line 412 + if (!r_stem_suffix_chain_before_ki()) + { + cursor = limit - v_27; + break lab53; + } + } while (false); } while (false); - break lab0; - } while (false); - cursor = limit - v_1; - // (, line 412 - // [, line 412 - ket = cursor; - // or, line 412 - lab51: do { - v_26 = limit - cursor; - lab52: do { - // call mark_possessives, line 412 - if (!r_mark_possessives()) - { - break lab52; - } - break lab51; - } while (false); - cursor = limit - v_26; - // call mark_sU, line 412 - if (!r_mark_sU()) + return true; + } + + private boolean r_post_process_last_consonants() { + int among_var; + // (, line 415 + // [, line 416 + ket = cursor; + // substring, line 416 + among_var = find_among_b(a_23, 4); + if (among_var == 0) { return false; } - } while (false); - // ], line 412 - bra = cursor; - // delete, line 412 - slice_del(); - // try, line 412 - v_27 = limit - cursor; - lab53: do { - // (, line 412 - // [, line 412 - ket = cursor; - // call mark_lAr, line 412 - if (!r_mark_lAr()) - { - cursor = limit - v_27; - break lab53; - } - // ], line 412 + // ], line 416 bra = cursor; - // delete, line 412 - slice_del(); - // call stem_suffix_chain_before_ki, line 412 - if (!r_stem_suffix_chain_before_ki()) - { - cursor = limit - v_27; - break lab53; + switch(among_var) { + case 0: + return false; + case 1: + // (, line 417 + // <-, line 417 + slice_from("p"); + break; + case 2: + // (, line 418 + // <-, line 418 + slice_from("\u00E7"); + break; + case 3: + // (, line 419 + // <-, line 419 + slice_from("t"); + break; + case 4: + // (, line 420 + // <-, line 420 + slice_from("k"); + break; } - } while (false); - } while (false); - return true; - } + return true; + } - private boolean r_post_process_last_consonants() { - int among_var; - // (, line 415 - // [, line 416 - ket = cursor; - // substring, line 416 - among_var = find_among_b(a_23, 4); - if (among_var == 0) - { - return false; - } - // ], line 416 - bra = cursor; - switch(among_var) { - case 0: - return false; - case 1: - // (, line 417 - // <-, line 417 - slice_from("p"); - break; - case 2: - // (, line 418 - // <-, line 418 - slice_from("\u00E7"); - break; - case 3: - // (, line 419 - // <-, line 419 - slice_from("t"); - break; - case 4: - // (, line 420 - // <-, line 420 - slice_from("k"); - break; - } - return true; - } - - private boolean r_append_U_to_stems_ending_with_d_or_g() { + private boolean r_append_U_to_stems_ending_with_d_or_g() { int v_1; int v_2; int v_3; @@ -2677,454 +2684,467 @@ int v_13; int v_14; int v_15; - // (, line 430 - // test, line 431 - v_1 = limit - cursor; - // (, line 431 - // or, line 431 - lab0: do { - v_2 = limit - cursor; - lab1: do { - // literal, line 431 - if (!(eq_s_b(1, "d"))) - { - break lab1; - } - break lab0; - } while (false); - cursor = limit - v_2; - // literal, line 431 - if (!(eq_s_b(1, "g"))) - { - return false; - } - } while (false); - cursor = limit - v_1; - // or, line 433 - lab2: do { - v_3 = limit - cursor; - lab3: do { - // (, line 432 - // test, line 432 - v_4 = limit - cursor; - // (, line 432 - // (, line 432 - // goto, line 432 - golab4: while(true) - { - v_5 = limit - cursor; - lab5: do { - if (!(in_grouping_b(g_vowel, 97, 305))) + // (, line 430 + // test, line 431 + v_1 = limit - cursor; + // (, line 431 + // or, line 431 + lab0: do { + v_2 = limit - cursor; + lab1: do { + // literal, line 431 + if (!(eq_s_b(1, "d"))) { - break lab5; + break lab1; } - cursor = limit - v_5; - break golab4; + break lab0; } while (false); - cursor = limit - v_5; - if (cursor <= limit_backward) + cursor = limit - v_2; + // literal, line 431 + if (!(eq_s_b(1, "g"))) { - break lab3; + return false; } - cursor--; - } - // or, line 432 - lab6: do { - v_6 = limit - cursor; - lab7: do { - // literal, line 432 - if (!(eq_s_b(1, "a"))) + } while (false); + cursor = limit - v_1; + // or, line 433 + lab2: do { + v_3 = limit - cursor; + lab3: do { + // (, line 432 + // test, line 432 + v_4 = limit - cursor; + // (, line 432 + // (, line 432 + // goto, line 432 + golab4: while(true) { - break lab7; + v_5 = limit - cursor; + lab5: do { + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab5; + } + cursor = limit - v_5; + break golab4; + } while (false); + cursor = limit - v_5; + if (cursor <= limit_backward) + { + break lab3; + } + cursor--; } - break lab6; - } while (false); - cursor = limit - v_6; - // literal, line 432 - if (!(eq_s_b(1, "\u0131"))) - { - break lab3; - } - } while (false); - cursor = limit - v_4; - // <+, line 432 - { - int c = cursor; - insert(cursor, cursor, "\u0131"); - cursor = c; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab8: do { - // (, line 434 - // test, line 434 - v_7 = limit - cursor; - // (, line 434 - // (, line 434 - // goto, line 434 - golab9: while(true) - { - v_8 = limit - cursor; - lab10: do { - if (!(in_grouping_b(g_vowel, 97, 305))) + // or, line 432 + lab6: do { + v_6 = limit - cursor; + lab7: do { + // literal, line 432 + if (!(eq_s_b(1, "a"))) + { + break lab7; + } + break lab6; + } while (false); + cursor = limit - v_6; + // literal, line 432 + if (!(eq_s_b(1, "\u0131"))) + { + break lab3; + } + } while (false); + cursor = limit - v_4; + // <+, line 432 { - break lab10; + int c = cursor; + insert(cursor, cursor, "\u0131"); + cursor = c; } - cursor = limit - v_8; - break golab9; + break lab2; } while (false); - cursor = limit - v_8; - if (cursor <= limit_backward) - { - break lab8; - } - cursor--; - } - // or, line 434 - lab11: do { - v_9 = limit - cursor; - lab12: do { - // literal, line 434 - if (!(eq_s_b(1, "e"))) + cursor = limit - v_3; + lab8: do { + // (, line 434 + // test, line 434 + v_7 = limit - cursor; + // (, line 434 + // (, line 434 + // goto, line 434 + golab9: while(true) { - break lab12; + v_8 = limit - cursor; + lab10: do { + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab10; + } + cursor = limit - v_8; + break golab9; + } while (false); + cursor = limit - v_8; + if (cursor <= limit_backward) + { + break lab8; + } + cursor--; } - break lab11; + // or, line 434 + lab11: do { + v_9 = limit - cursor; + lab12: do { + // literal, line 434 + if (!(eq_s_b(1, "e"))) + { + break lab12; + } + break lab11; + } while (false); + cursor = limit - v_9; + // literal, line 434 + if (!(eq_s_b(1, "i"))) + { + break lab8; + } + } while (false); + cursor = limit - v_7; + // <+, line 434 + { + int c = cursor; + insert(cursor, cursor, "i"); + cursor = c; + } + break lab2; } while (false); - cursor = limit - v_9; - // literal, line 434 - if (!(eq_s_b(1, "i"))) - { - break lab8; - } - } while (false); - cursor = limit - v_7; - // <+, line 434 - { - int c = cursor; - insert(cursor, cursor, "i"); - cursor = c; - } - break lab2; - } while (false); - cursor = limit - v_3; - lab13: do { - // (, line 436 - // test, line 436 - v_10 = limit - cursor; - // (, line 436 - // (, line 436 - // goto, line 436 - golab14: while(true) - { - v_11 = limit - cursor; - lab15: do { - if (!(in_grouping_b(g_vowel, 97, 305))) + cursor = limit - v_3; + lab13: do { + // (, line 436 + // test, line 436 + v_10 = limit - cursor; + // (, line 436 + // (, line 436 + // goto, line 436 + golab14: while(true) { - break lab15; + v_11 = limit - cursor; + lab15: do { + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab15; + } + cursor = limit - v_11; + break golab14; + } while (false); + cursor = limit - v_11; + if (cursor <= limit_backward) + { + break lab13; + } + cursor--; } - cursor = limit - v_11; - break golab14; + // or, line 436 + lab16: do { + v_12 = limit - cursor; + lab17: do { + // literal, line 436 + if (!(eq_s_b(1, "o"))) + { + break lab17; + } + break lab16; + } while (false); + cursor = limit - v_12; + // literal, line 436 + if (!(eq_s_b(1, "u"))) + { + break lab13; + } + } while (false); + cursor = limit - v_10; + // <+, line 436 + { + int c = cursor; + insert(cursor, cursor, "u"); + cursor = c; + } + break lab2; } while (false); - cursor = limit - v_11; - if (cursor <= limit_backward) + cursor = limit - v_3; + // (, line 438 + // test, line 438 + v_13 = limit - cursor; + // (, line 438 + // (, line 438 + // goto, line 438 + golab18: while(true) { - break lab13; + v_14 = limit - cursor; + lab19: do { + if (!(in_grouping_b(g_vowel, 97, 305))) + { + break lab19; + } + cursor = limit - v_14; + break golab18; + } while (false); + cursor = limit - v_14; + if (cursor <= limit_backward) + { + return false; + } + cursor--; } - cursor--; - } - // or, line 436 - lab16: do { - v_12 = limit - cursor; - lab17: do { - // literal, line 436 - if (!(eq_s_b(1, "o"))) + // or, line 438 + lab20: do { + v_15 = limit - cursor; + lab21: do { + // literal, line 438 + if (!(eq_s_b(1, "\u00F6"))) + { + break lab21; + } + break lab20; + } while (false); + cursor = limit - v_15; + // literal, line 438 + if (!(eq_s_b(1, "\u00FC"))) { - break lab17; + return false; } - break lab16; } while (false); - cursor = limit - v_12; - // literal, line 436 - if (!(eq_s_b(1, "u"))) + cursor = limit - v_13; + // <+, line 438 { - break lab13; + int c = cursor; + insert(cursor, cursor, "\u00FC"); + cursor = c; } } while (false); - cursor = limit - v_10; - // <+, line 436 + return true; + } + + private boolean r_more_than_one_syllable_word() { + int v_1; + int v_3; + // (, line 445 + // test, line 446 + v_1 = cursor; + // (, line 446 + // atleast, line 446 { - int c = cursor; - insert(cursor, cursor, "u"); - cursor = c; - } - break lab2; - } while (false); - cursor = limit - v_3; - // (, line 438 - // test, line 438 - v_13 = limit - cursor; - // (, line 438 - // (, line 438 - // goto, line 438 - golab18: while(true) - { - v_14 = limit - cursor; - lab19: do { - if (!(in_grouping_b(g_vowel, 97, 305))) + int v_2 = 2; + // atleast, line 446 + replab0: while(true) { - break lab19; + v_3 = cursor; + lab1: do { + // (, line 446 + // gopast, line 446 + golab2: while(true) + { + lab3: do { + if (!(in_grouping(g_vowel, 97, 305))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + break lab1; + } + cursor++; + } + v_2--; + continue replab0; + } while (false); + cursor = v_3; + break replab0; } - cursor = limit - v_14; - break golab18; - } while (false); - cursor = limit - v_14; - if (cursor <= limit_backward) - { - return false; - } - cursor--; - } - // or, line 438 - lab20: do { - v_15 = limit - cursor; - lab21: do { - // literal, line 438 - if (!(eq_s_b(1, "\u00F6"))) + if (v_2 > 0) { - break lab21; + return false; } - break lab20; - } while (false); - cursor = limit - v_15; - // literal, line 438 - if (!(eq_s_b(1, "\u00FC"))) - { - return false; } - } while (false); - cursor = limit - v_13; - // <+, line 438 - { - int c = cursor; - insert(cursor, cursor, "\u00FC"); - cursor = c; + cursor = v_1; + return true; } - } while (false); - return true; - } - private boolean r_more_than_one_syllable_word() { + private boolean r_is_reserved_word() { int v_1; - int v_3; - // (, line 445 - // test, line 446 - v_1 = cursor; - // (, line 446 - // atleast, line 446 - { - int v_2 = 2; - // atleast, line 446 - replab0: while(true) - { - v_3 = cursor; - lab1: do { - // (, line 446 - // gopast, line 446 - golab2: while(true) + int v_2; + int v_4; + // (, line 449 + // or, line 451 + lab0: do { + v_1 = cursor; + lab1: do { + // test, line 450 + v_2 = cursor; + // (, line 450 + // gopast, line 450 + golab2: while(true) + { + lab3: do { + // literal, line 450 + if (!(eq_s(2, "ad"))) + { + break lab3; + } + break golab2; + } while (false); + if (cursor >= limit) + { + break lab1; + } + cursor++; + } + // (, line 450 + I_strlen = 2; + // (, line 450 + if (!(I_strlen == limit)) + { + break lab1; + } + cursor = v_2; + break lab0; + } while (false); + cursor = v_1; + // test, line 452 + v_4 = cursor; + // (, line 452 + // gopast, line 452 + golab4: while(true) { - lab3: do { - if (!(in_grouping(g_vowel, 97, 305))) + lab5: do { + // literal, line 452 + if (!(eq_s(5, "soyad"))) { - break lab3; + break lab5; } - break golab2; + break golab4; } while (false); if (cursor >= limit) { - break lab1; + return false; } cursor++; } - v_2--; - continue replab0; + // (, line 452 + I_strlen = 5; + // (, line 452 + if (!(I_strlen == limit)) + { + return false; + } + cursor = v_4; } while (false); - cursor = v_3; - break replab0; + return true; } - if (v_2 > 0) - { - return false; - } - } - cursor = v_1; - return true; - } - private boolean r_is_reserved_word() { + private boolean r_postlude() { int v_1; int v_2; - int v_4; - // (, line 449 - // or, line 451 - lab0: do { - v_1 = cursor; - lab1: do { - // test, line 450 - v_2 = cursor; - // (, line 450 - // gopast, line 450 - golab2: while(true) + int v_3; + // (, line 455 + // not, line 456 { - lab3: do { - // literal, line 450 - if (!(eq_s(2, "ad"))) + v_1 = cursor; + lab0: do { + // (, line 456 + // call is_reserved_word, line 456 + if (!r_is_reserved_word()) { - break lab3; + break lab0; } - break golab2; + return false; } while (false); - if (cursor >= limit) + cursor = v_1; + } + // backwards, line 457 + limit_backward = cursor; cursor = limit; + // (, line 457 + // do, line 458 + v_2 = limit - cursor; + lab1: do { + // call append_U_to_stems_ending_with_d_or_g, line 458 + if (!r_append_U_to_stems_ending_with_d_or_g()) { break lab1; } - cursor++; - } - // (, line 450 - I_strlen = 2; - // (, line 450 - if (!(I_strlen == limit)) + } while (false); + cursor = limit - v_2; + // do, line 459 + v_3 = limit - cursor; + lab2: do { + // call post_process_last_consonants, line 459 + if (!r_post_process_last_consonants()) + { + break lab2; + } + } while (false); + cursor = limit - v_3; + cursor = limit_backward; return true; + } + + @Override + public boolean stem() { + int v_1; + int v_2; + // (, line 464 + // (, line 465 + // call more_than_one_syllable_word, line 465 + if (!r_more_than_one_syllable_word()) { - break lab1; + return false; } - cursor = v_2; - break lab0; - } while (false); - cursor = v_1; - // test, line 452 - v_4 = cursor; - // (, line 452 - // gopast, line 452 - golab4: while(true) - { - lab5: do { - // literal, line 452 - if (!(eq_s(5, "soyad"))) + // (, line 466 + // backwards, line 467 + limit_backward = cursor; cursor = limit; + // (, line 467 + // do, line 468 + v_1 = limit - cursor; + lab0: do { + // call stem_nominal_verb_suffixes, line 468 + if (!r_stem_nominal_verb_suffixes()) { - break lab5; + break lab0; } - break golab4; } while (false); - if (cursor >= limit) + cursor = limit - v_1; + // Boolean test continue_stemming_noun_suffixes, line 469 + if (!(B_continue_stemming_noun_suffixes)) { return false; } - cursor++; - } - // (, line 452 - I_strlen = 5; - // (, line 452 - if (!(I_strlen == limit)) - { - return false; - } - cursor = v_4; - } while (false); - return true; - } - - private boolean r_postlude() { - int v_1; - int v_2; - int v_3; - // (, line 455 - // not, line 456 - { - v_1 = cursor; - lab0: do { - // (, line 456 - // call is_reserved_word, line 456 - if (!r_is_reserved_word()) + // do, line 470 + v_2 = limit - cursor; + lab1: do { + // call stem_noun_suffixes, line 470 + if (!r_stem_noun_suffixes()) + { + break lab1; + } + } while (false); + cursor = limit - v_2; + cursor = limit_backward; // call postlude, line 473 + if (!r_postlude()) { - break lab0; + return false; } - return false; - } while (false); - cursor = v_1; - } - // backwards, line 457 - limit_backward = cursor; cursor = limit; - // (, line 457 - // do, line 458 - v_2 = limit - cursor; - lab1: do { - // call append_U_to_stems_ending_with_d_or_g, line 458 - if (!r_append_U_to_stems_ending_with_d_or_g()) - { - break lab1; + return true; } - } while (false); - cursor = limit - v_2; - // do, line 459 - v_3 = limit - cursor; - lab2: do { - // call post_process_last_consonants, line 459 - if (!r_post_process_last_consonants()) - { - break lab2; - } - } while (false); - cursor = limit - v_3; - cursor = limit_backward; return true; + + @Override + public boolean equals( Object o ) { + return o instanceof TurkishStemmer; } - public boolean stem() { - int v_1; - int v_2; - // (, line 464 - // (, line 465 - // call more_than_one_syllable_word, line 465 - if (!r_more_than_one_syllable_word()) - { - return false; - } - // (, line 466 - // backwards, line 467 - limit_backward = cursor; cursor = limit; - // (, line 467 - // do, line 468 - v_1 = limit - cursor; - lab0: do { - // call stem_nominal_verb_suffixes, line 468 - if (!r_stem_nominal_verb_suffixes()) - { - break lab0; - } - } while (false); - cursor = limit - v_1; - // Boolean test continue_stemming_noun_suffixes, line 469 - if (!(B_continue_stemming_noun_suffixes)) - { - return false; - } - // do, line 470 - v_2 = limit - cursor; - lab1: do { - // call stem_noun_suffixes, line 470 - if (!r_stem_noun_suffixes()) - { - break lab1; - } - } while (false); - cursor = limit - v_2; - cursor = limit_backward; // call postlude, line 473 - if (!r_postlude()) - { - return false; - } - return true; + @Override + public int hashCode() { + return TurkishStemmer.class.getName().hashCode(); } + + } Fisheye: Tag 1.1 refers to a dead (removed) revision in file `3rdParty_sources/lucene/org/tartarus/snowball/ext/package.html'. Fisheye: No comparison available. Pass `N' to diff?