--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/Makefile Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,74 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+#
+
+include ../../make-rules/shared-macros.mk
+
+COMPONENT_NAME = indri
+COMPONENT_VERSION = 5.4
+COMPONENT_PROJECT_URL = http://lemurproject.org/indri.php
+COMPONENT_SRC = $(COMPONENT_NAME)-$(COMPONENT_VERSION)
+COMPONENT_ARCHIVE = $(COMPONENT_SRC).tar.gz
+COMPONENT_ARCHIVE_HASH = \
+ sha256:b1d27f6da4cb15776cee0121c9511ed0e998d47564d785a2bb41a44c654e3e3f
+COMPONENT_ARCHIVE_URL = http://sourceforge.net/projects/lemur/files/lemur/$(COMPONENT_SRC)/$(COMPONENT_SRC).tar.gz/download
+COMPONENT_BUGDB = library/indri
+
+include ../../make-rules/prep.mk
+include ../../make-rules/configure.mk
+include ../../make-rules/ips.mk
+
+COMPONENT_PRE_CONFIGURE_ACTION = $(CLONEY) $(SOURCE_DIR) $(@D)
+COMPILER = gcc
+CONFIGURE_OPTIONS += "CFLAGS=$(CFLAGS)"
+CONFIGURE_OPTIONS += "CXXFLAGS=$(CXXFLAGS)"
+CONFIGURE_OPTIONS += "LDFLAGS=$(LDFLAGS)"
+COMPONENT_INSTALL_ARGS = "includedir=$(PROTOUSRINCDIR)"
+COMPONENT_INSTALL_ARGS += "datarootdir=$(PROTOUSRSHAREDIR)"
+
+$(BUILD_DIR_32)/.installed: COMPONENT_INSTALL_ARGS += "bindir=$(PROTOUSRBINDIR)"
+$(BUILD_DIR_32)/.installed: COMPONENT_INSTALL_ARGS += "libdir=$(PROTOUSRLIBDIR)"
+$(BUILD_DIR_64)/.installed: COMPONENT_INSTALL_ARGS += "bindir=$(PROTOUSRBINDIR64)"
+$(BUILD_DIR_64)/.installed: COMPONENT_INSTALL_ARGS += "libdir=$(PROTOUSRLIBDIR64)"
+# Because of 18041236
+$(BUILD_DIR_64)/.built: LD_B_DIRECT =
+
+COMPONENT_POST_BUILD_ACTION = \
+ (cd $(@D) ; $(ENV) $(COMPONENT_BUILD_ENV) \
+ $(GMAKE) -f Makefile.app)
+
+GPATCH_FLAGS += -E # remove empty files
+
+ASLR_MODE = $(ASLR_ENABLE)
+
+# common targets
+build: $(BUILD_32_and_64)
+
+install: $(INSTALL_32_and_64)
+
+test: $(NO_TESTS)
+
+BUILD_PKG_DEPENDENCIES = $(BUILD_TOOLS)
+
+include ../../make-rules/depend.mk
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/indri.p5m Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,322 @@
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+# Copyright (c) 2014, Oracle and/or its affiliates. All rights reserved.
+#
+
+set name=pkg.fmri \
+ value=pkg:/library/indri@$(IPS_COMPONENT_VERSION),$(BUILD_VERSION)
+set name=pkg.summary value="Text search engine"
+set name=pkg.description \
+ value="Indri is a search engine that provides state-of-the-art text search and a rich structured query language for text collections of up to 50 million documents (single machine) or 500 million documents (distributed search). Available for Linux, Solaris, Windows and Mac OSX."
+set name=com.oracle.info.description value="the Indri search engine"
+set name=com.oracle.info.tpno value=13668
+set name=info.classification \
+ value=org.opensolaris.category.2008:System/Libraries
+set name=info.source-url value=$(COMPONENT_ARCHIVE_URL)
+set name=info.upstream-url value=$(COMPONENT_PROJECT_URL)
+set name=org.opensolaris.arc-caseid value=PSARC/2013/232
+set name=org.opensolaris.consolidation value=$(CONSOLIDATION)
+
+# usr/lib/indri directory does not get applied usr/bin defaults, so we have to
+# name them here
+file usr/bin/$(MACH64)/IndriBuildIndex path=usr/lib/indri/IndriBuildIndex group=bin owner=root mode=0555
+file usr/bin/$(MACH64)/IndriRunQuery path=usr/lib/indri/IndriRunQuery group=bin owner=root mode=0555
+
+# Clashes with userland antlr
+# dir path=usr/include/antlr
+# ...
+
+# usr/include
+dir path=usr/include/indri
+file path=usr/include/indri/AnchorTextAnnotator.hpp
+file path=usr/include/indri/AnchorTextHarvester.hpp
+file path=usr/include/indri/AnchorTextWriter.hpp
+file path=usr/include/indri/Annotator.hpp
+file path=usr/include/indri/Appliers.hpp
+file path=usr/include/indri/ArabicStemmerTransformation.hpp
+file path=usr/include/indri/Arabic_Stemmer_utf8.hpp
+file path=usr/include/indri/AttributeValuePair.hpp
+file path=usr/include/indri/BeliefNode.hpp
+file path=usr/include/indri/BooleanAndNode.hpp
+file path=usr/include/indri/Buffer.hpp
+file path=usr/include/indri/BulkTree.hpp
+file path=usr/include/indri/CachedFrequencyBeliefNode.hpp
+file path=usr/include/indri/Collection.hpp
+file path=usr/include/indri/CombinedVocabularyIterator.hpp
+file path=usr/include/indri/Combiner.hpp
+file path=usr/include/indri/CompressedCollection.hpp
+file path=usr/include/indri/ConditionVariable.hpp
+file path=usr/include/indri/Conflater.hpp
+file path=usr/include/indri/ConflationPattern.hpp
+file path=usr/include/indri/ContextCountAccumulator.hpp
+file path=usr/include/indri/ContextCountGraphCopier.hpp
+file path=usr/include/indri/ContextCountGraphExtractor.hpp
+file path=usr/include/indri/ContextInclusionAndNode.hpp
+file path=usr/include/indri/ContextSimpleCountAccumulator.hpp
+file path=usr/include/indri/ContextSimpleCountCollectorCopier.hpp
+file path=usr/include/indri/Copier.hpp
+file path=usr/include/indri/CorpusStatistics.hpp
+file path=usr/include/indri/DagCopier.hpp
+file path=usr/include/indri/DateFieldAnnotator.hpp
+file path=usr/include/indri/DateParse.hpp
+file path=usr/include/indri/DeletedDocumentList.hpp
+file path=usr/include/indri/DirectoryIterator.hpp
+file path=usr/include/indri/DirichletTermScoreFunction.hpp
+file path=usr/include/indri/DiskDocExtentListIterator.hpp
+file path=usr/include/indri/DiskDocListFileIterator.hpp
+file path=usr/include/indri/DiskDocListIterator.hpp
+file path=usr/include/indri/DiskDocumentDataIterator.hpp
+file path=usr/include/indri/DiskFrequentVocabularyIterator.hpp
+file path=usr/include/indri/DiskIndex.hpp
+file path=usr/include/indri/DiskKeyfileVocabularyIterator.hpp
+file path=usr/include/indri/DiskTermData.hpp
+file path=usr/include/indri/DiskTermListFileIterator.hpp
+file path=usr/include/indri/DocExtentListIterator.hpp
+file path=usr/include/indri/DocExtentListMemoryBuilder.hpp
+file path=usr/include/indri/DocListFileIterator.hpp
+file path=usr/include/indri/DocListIterator.hpp
+file path=usr/include/indri/DocListIteratorNode.hpp
+file path=usr/include/indri/DocListMemoryBuilder.hpp
+file path=usr/include/indri/DocumentCount.hpp
+file path=usr/include/indri/DocumentData.hpp
+file path=usr/include/indri/DocumentDataIterator.hpp
+file path=usr/include/indri/DocumentIterator.hpp
+file path=usr/include/indri/DocumentIteratorFactory.hpp
+file path=usr/include/indri/DocumentStructure.hpp
+file path=usr/include/indri/DocumentStructureHolderNode.hpp
+file path=usr/include/indri/DocumentVector.hpp
+file path=usr/include/indri/EvaluatorNode.hpp
+file path=usr/include/indri/Extent.hpp
+file path=usr/include/indri/ExtentAndNode.hpp
+file path=usr/include/indri/ExtentChildNode.hpp
+file path=usr/include/indri/ExtentDescendantNode.hpp
+file path=usr/include/indri/ExtentEnforcementNode.hpp
+file path=usr/include/indri/ExtentInsideNode.hpp
+file path=usr/include/indri/ExtentOrNode.hpp
+file path=usr/include/indri/ExtentParentNode.hpp
+file path=usr/include/indri/ExtentRestrictionModelAnnotatorCopier.hpp
+file path=usr/include/indri/ExtentRestrictionNode.hpp
+file path=usr/include/indri/FieldBelowWalker.hpp
+file path=usr/include/indri/FieldBetweenNode.hpp
+file path=usr/include/indri/FieldEqualsNode.hpp
+file path=usr/include/indri/FieldExtent.hpp
+file path=usr/include/indri/FieldGreaterNode.hpp
+file path=usr/include/indri/FieldIteratorNode.hpp
+file path=usr/include/indri/FieldLessNode.hpp
+file path=usr/include/indri/FieldListIterator.hpp
+file path=usr/include/indri/FieldStatistics.hpp
+file path=usr/include/indri/FieldWildcardNode.hpp
+file path=usr/include/indri/File.hpp
+file path=usr/include/indri/FileClassEnvironment.hpp
+file path=usr/include/indri/FileClassEnvironmentFactory.hpp
+file path=usr/include/indri/FileTreeIterator.hpp
+file path=usr/include/indri/FilterNode.hpp
+file path=usr/include/indri/FilterRejectNode.hpp
+file path=usr/include/indri/FilterRequireNode.hpp
+file path=usr/include/indri/FixedPassageNode.hpp
+file path=usr/include/indri/FrequencyListCopier.hpp
+file path=usr/include/indri/HTMLParser.hpp
+file path=usr/include/indri/HashTable.hpp
+file path=usr/include/indri/Index.hpp
+file path=usr/include/indri/IndexEnvironment.hpp
+file path=usr/include/indri/IndexWriter.hpp
+file path=usr/include/indri/IndriParser.hpp
+file path=usr/include/indri/IndriTimer.hpp
+file path=usr/include/indri/IndriTokenizer.hpp
+file path=usr/include/indri/InferenceNetwork.hpp
+file path=usr/include/indri/InferenceNetworkBuilder.hpp
+file path=usr/include/indri/InferenceNetworkNode.hpp
+file path=usr/include/indri/InternalFileBuffer.hpp
+file path=usr/include/indri/JelinekMercerTermScoreFunction.hpp
+file path=usr/include/indri/KrovetzStemmer.hpp
+file path=usr/include/indri/KrovetzStemmerTransformation.hpp
+file path=usr/include/indri/LengthPriorNode.hpp
+file path=usr/include/indri/ListAccumulator.hpp
+file path=usr/include/indri/ListBeliefNode.hpp
+file path=usr/include/indri/ListCache.hpp
+file path=usr/include/indri/ListIteratorNode.hpp
+file path=usr/include/indri/LocalQueryServer.hpp
+file path=usr/include/indri/Lockable.hpp
+file path=usr/include/indri/MaxNode.hpp
+file path=usr/include/indri/MboxDocumentIterator.hpp
+file path=usr/include/indri/MemoryDocumentDataIterator.hpp
+file path=usr/include/indri/MemoryIndex.hpp
+file path=usr/include/indri/MemoryIndexDocListFileIterator.hpp
+file path=usr/include/indri/MemoryIndexTermListFileIterator.hpp
+file path=usr/include/indri/MemoryIndexVocabularyIterator.hpp
+file path=usr/include/indri/MetadataPair.hpp
+file path=usr/include/indri/Mutex.hpp
+file path=usr/include/indri/NestedExtentInsideNode.hpp
+file path=usr/include/indri/NestedListBeliefNode.hpp
+file path=usr/include/indri/NetworkListener.hpp
+file path=usr/include/indri/NetworkMessageStream.hpp
+file path=usr/include/indri/NetworkServerProxy.hpp
+file path=usr/include/indri/NetworkServerStub.hpp
+file path=usr/include/indri/NetworkStream.hpp
+file path=usr/include/indri/NexiLexer.hpp
+file path=usr/include/indri/NexiLexerTokenTypes.hpp
+file path=usr/include/indri/NexiParser.hpp
+file path=usr/include/indri/NormalDistribution.hpp
+file path=usr/include/indri/NormalizationTransformation.hpp
+file path=usr/include/indri/NotNode.hpp
+file path=usr/include/indri/NullListNode.hpp
+file path=usr/include/indri/NullScorerNode.hpp
+file path=usr/include/indri/NumericFieldAnnotator.hpp
+file path=usr/include/indri/ObjectHandler.hpp
+file path=usr/include/indri/OfficeHelper.hpp
+file path=usr/include/indri/OffsetAnnotationAnnotator.hpp
+file path=usr/include/indri/OffsetMetadataAnnotator.hpp
+file path=usr/include/indri/OrNode.hpp
+file path=usr/include/indri/OrderedWindowNode.hpp
+file path=usr/include/indri/Packer.hpp
+file path=usr/include/indri/PageRank.hpp
+file path=usr/include/indri/Parameters.hpp
+file path=usr/include/indri/ParsedDocument.hpp
+file path=usr/include/indri/ParserFactory.hpp
+file path=usr/include/indri/Path.hpp
+file path=usr/include/indri/PlusNode.hpp
+file path=usr/include/indri/PonteExpander.hpp
+file path=usr/include/indri/PorterStemmerTransformation.hpp
+file path=usr/include/indri/Porter_Stemmer.hpp
+file path=usr/include/indri/PowerPointDocumentExtractor.hpp
+file path=usr/include/indri/PriorFactory.hpp
+file path=usr/include/indri/PriorListIterator.hpp
+file path=usr/include/indri/PriorNode.hpp
+file path=usr/include/indri/QueryAnnotation.hpp
+file path=usr/include/indri/QueryEnvironment.hpp
+file path=usr/include/indri/QueryExpander.hpp
+file path=usr/include/indri/QueryLexer.hpp
+file path=usr/include/indri/QueryLexerTokenTypes.hpp
+file path=usr/include/indri/QueryParser.hpp
+file path=usr/include/indri/QueryParserFactory.hpp
+file path=usr/include/indri/QueryResponsePacker.hpp
+file path=usr/include/indri/QueryResponseUnpacker.hpp
+file path=usr/include/indri/QueryServer.hpp
+file path=usr/include/indri/QuerySpec.hpp
+file path=usr/include/indri/QueryStopper.hpp
+file path=usr/include/indri/QueryTFWalker.hpp
+file path=usr/include/indri/RMExpander.hpp
+file path=usr/include/indri/RVLCompressStream.hpp
+file path=usr/include/indri/RVLDecompressStream.hpp
+file path=usr/include/indri/RawScorerNodeExtractor.hpp
+file path=usr/include/indri/RawTextParser.hpp
+file path=usr/include/indri/ReaderLockable.hpp
+file path=usr/include/indri/ReadersWritersLock.hpp
+file path=usr/include/indri/ReformulateQuery.hpp
+file path=usr/include/indri/RegionAllocator.hpp
+file path=usr/include/indri/RelevanceModel.hpp
+file path=usr/include/indri/Repository.hpp
+file path=usr/include/indri/RepositoryLoadThread.hpp
+file path=usr/include/indri/RepositoryMaintenanceThread.hpp
+file path=usr/include/indri/ScopedLock.hpp
+file path=usr/include/indri/ScopedMonitor.hpp
+file path=usr/include/indri/ScoredExtentAccumulator.hpp
+file path=usr/include/indri/ScoredExtentResult.hpp
+file path=usr/include/indri/SequentialReadBuffer.hpp
+file path=usr/include/indri/SequentialWriteBuffer.hpp
+file path=usr/include/indri/ShrinkageBeliefNode.hpp
+file path=usr/include/indri/SimpleCopier.hpp
+file path=usr/include/indri/SkippingCapableNode.hpp
+file path=usr/include/indri/SmoothingAnnotatorWalker.hpp
+file path=usr/include/indri/SnippetBuilder.hpp
+file path=usr/include/indri/StemmerFactory.hpp
+file path=usr/include/indri/StopStructureRemover.hpp
+file path=usr/include/indri/StopperTransformation.hpp
+file path=usr/include/indri/SumNode.hpp
+file path=usr/include/indri/TFIDFExpander.hpp
+file path=usr/include/indri/TFIDFTermScoreFunction.hpp
+file path=usr/include/indri/Tag.hpp
+file path=usr/include/indri/TagDocumentIterator.hpp
+file path=usr/include/indri/TagEvent.hpp
+file path=usr/include/indri/TagExtent.hpp
+file path=usr/include/indri/TagList.hpp
+file path=usr/include/indri/TaggedDocumentIterator.hpp
+file path=usr/include/indri/TaggedTextParser.hpp
+file path=usr/include/indri/TermBitmap.hpp
+file path=usr/include/indri/TermData.hpp
+file path=usr/include/indri/TermExtent.hpp
+file path=usr/include/indri/TermFieldStatistics.hpp
+file path=usr/include/indri/TermFrequencyBeliefNode.hpp
+file path=usr/include/indri/TermList.hpp
+file path=usr/include/indri/TermListFileIterator.hpp
+file path=usr/include/indri/TermRecorder.hpp
+file path=usr/include/indri/TermScoreFunction.hpp
+file path=usr/include/indri/TermScoreFunctionFactory.hpp
+file path=usr/include/indri/TermTranslator.hpp
+file path=usr/include/indri/TextDocumentExtractor.hpp
+file path=usr/include/indri/TextParser.hpp
+file path=usr/include/indri/TextTokenizer.hpp
+file path=usr/include/indri/TextTokenizerPIA.hpp
+file path=usr/include/indri/Thread.hpp
+file path=usr/include/indri/TokenizedDocument.hpp
+file path=usr/include/indri/TokenizerFactory.hpp
+file path=usr/include/indri/Transformation.hpp
+file path=usr/include/indri/TreePrinterWalker.hpp
+file path=usr/include/indri/TwoStageTermScoreFunction.hpp
+file path=usr/include/indri/URLTextAnnotator.hpp
+file path=usr/include/indri/UTF8CaseNormalizationTransformation.hpp
+file path=usr/include/indri/UTF8Transcoder.hpp
+file path=usr/include/indri/UnnecessaryNodeRemoverCopier.hpp
+file path=usr/include/indri/UnorderedWindowNode.hpp
+file path=usr/include/indri/Unpacker.hpp
+file path=usr/include/indri/UnparsedDocument.hpp
+file path=usr/include/indri/UtilityThread.hpp
+file path=usr/include/indri/VocabularyIterator.hpp
+file path=usr/include/indri/WARCDocumentIterator.hpp
+file path=usr/include/indri/WPlusNode.hpp
+file path=usr/include/indri/Walker.hpp
+file path=usr/include/indri/WeightFoldingCopier.hpp
+file path=usr/include/indri/WeightedAndNode.hpp
+file path=usr/include/indri/WeightedExtentOrNode.hpp
+file path=usr/include/indri/WeightedSumNode.hpp
+file path=usr/include/indri/WordDocumentExtractor.hpp
+file path=usr/include/indri/WriterLockable.hpp
+file path=usr/include/indri/XMLNode.hpp
+file path=usr/include/indri/XMLReader.hpp
+file path=usr/include/indri/XMLWriter.hpp
+file path=usr/include/indri/atomic.hpp
+file path=usr/include/indri/count_iterator
+file path=usr/include/indri/delete_range.hpp
+file path=usr/include/indri/greedy_vector
+file path=usr/include/indri/indri-platform.h
+file path=usr/include/indri/ref_ptr.hpp
+file path=usr/include/indri/uint64comp.hpp
+
+# usr/lib
+link path=usr/lib/$(MACH64)/libindri.so target=libindri.so.1
+file usr/lib/$(MACH64)/libindri.so.1 path=usr/lib/$(MACH64)/libindri.so.1
+link path=usr/lib/$(MACH64)/libpia_wrapper.so target=libpia_wrapper.so.1
+file usr/lib/$(MACH64)/libpia_wrapper.so.1 \
+ path=usr/lib/$(MACH64)/libpia_wrapper.so.1
+link path=usr/lib/libindri.so target=libindri.so.1
+file usr/lib/libindri.so.1 path=usr/lib/libindri.so.1
+link path=usr/lib/libpia_wrapper.so target=libpia_wrapper.so.1
+file usr/lib/libpia_wrapper.so.1 path=usr/lib/libpia_wrapper.so.1
+
+# LICENSE.txt is taken from indri sources
+license LICENSE.txt license="Indri license"
+
+# not relevant for our usage as it is obsoleted
+# http://lemurproject.org/lemur.php
+# dir path=usr/include/lemur
+
+# not relevant for our usage
+# dir path=usr/share/indri
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/patches/64bit.patch Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,13 @@
+Add support for 64bit build
+
+--- indri-5.4/site-search/cgi/Makefile 2013-09-17 05:38:33.511459071 -0700
++++ indri-5.4/site-search/cgi/Makefile 2013-09-17 05:37:32.444587645 -0700
+@@ -27,7 +27,7 @@
+ all: $(PROG)
+
+ $(PROG): $(OBJS) $(LIBDEPS)
+- $(CXX) -o $@ $(OBJS) $(LDFLAGS)
++ $(CXX) $(CXXFLAGS) -o $@ $(OBJS) $(LDFLAGS)
+
+ clean:
+ rm -f $(PROG) $(OBJS)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/patches/bigendian.patch Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,12 @@
+Indri tries to define htonll and ntohll function which clash with solaris definitions
+--- indri-5.4/contrib/lemur/include/lemur/lemur-compat.hpp 2013-10-08 00:25:53.513213629 +0200
++++ indri-5.4/contrib/lemur/include/lemur/lemur-compat.hpp 2013-10-08 00:25:34.165330285 +0200
+@@ -212,6 +212,8 @@
+ #endif
+
+ #if defined(WORDS_BIGENDIAN)
++#undef htonll
++#undef ntohll
+ inline UINT64 htonll( UINT64 native ) {
+ return native;
+ }
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/patches/build_also_shared.patch Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,19 @@
+Add support for building shared libraries
+--- indri-5.4/src/Makefile 2013-09-04 06:16:24.212280233 -0700
++++ indri-5.4/src/Makefile 2013-09-04 06:15:35.997458620 -0700
+@@ -17,6 +17,7 @@
+ # how to make a library from object files
+ $(OBJLIB): $(SPECHEADERS) $(OBJ) $(SPECOBJS)
+ rm -f $@; $(AR) -rs $@ $(OBJ)
++ $(CC) $(CFLAGS) -shared -o $(OBJLIB:.a=.so).1 -h $(OBJLIB:.a=.so).1 $(OBJ) -Wl,-z -Wl,allextract ../contrib/lemur/obj/liblemur.a -Wl,-z -Wl,allextract ../contrib/antlr/obj/libantlr.a
+
+ #check this.
+ $(SPECHEADERS): $(QUERYSPEC)
+@@ -40,6 +41,7 @@
+ install:
+ $(INSTALL_DATA) $(ALLHEADER) $(pkgincludedir)
+ $(AR) -rs $(libdir)/$(INDRILIB) $(OBJ)
++ cp $(OBJLIB:.a=.so).1 $(libdir)
+
+ stamp:
+ awk -f ../src/version-stamper "stamp=`date`" ../include/indri/indri-platform.h > ../include/indri/indri-platform.h2
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/patches/pia.patch Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,1028 @@
+Add our PIA wrapper to indri sources. This patch does several things:
+ - Add pia wrapper sources to indri source tree
+ - Add new tokenizer which does not treat '_' as a separator
+ - The TextTokenizerPIA.l differs from TextTokenizer.l only in single character
+ -[a-zA-Z0-9']+ { byte_position += tokleng; return ASCII_TOKEN; }
+ +[a-zA-Z0-9_']+ { byte_position += tokleng; return ASCII_TOKEN; }
+ - plus many symbol renames so that the parsers can coexist (toktext -> piatoktext etc.)
+ - TextTokenizerPIA.hpp contains only symbol renamse
+ - Rest are modifications to make indri build PIA wrapper
+
+
+--- indri-5.4/pia_wrapper.cpp po črc 15 14:30:41 2013
++++ indri-5.4/pia_wrapper.cpp po črc 15 14:29:09 2013
+@@ -0,0 +1,222 @@
++/*
++ * TO compile :
++ * g++ -o libpia_wrapper.so -shared -fPIC -I../vlad-libs/sparc/usr/include/ -L../vlad-libs/sparc/usr/lib/ -lclucene-core -lnvpair pia_wrapper.cc
++ *
++ */
++
++#include <sys/stat.h>
++#include <strings.h>
++#include <stdio.h>
++#include <libnvpair.h>
++
++#include <iostream>
++#include <string>
++#include <sstream>
++#include <fstream>
++
++#include <vector>
++#include "indri/QueryEnvironment.hpp"
++#include "indri/SnippetBuilder.hpp"
++#include "indri/Repository.hpp"
++
++using namespace std;
++
++using namespace indri::api;
++
++#define MAX_RESULTS 3
++#define PIA_DATABASE "/var/db/piadb"
++#define PIA_DATABASE_STORAGE PIA_DATABASE "/collection/storage"
++
++indri::collection::Repository repository;
++
++std::string
++getFieldText(int documentID, std::string field) {
++ std::string ret_val = "";
++ indri::collection::Repository::index_state repIndexState = repository.indexes();
++ indri::index::Index *thisIndex=(*repIndexState)[0];
++ int fieldID=thisIndex->field(field);
++
++ if (fieldID < 1) {
++ return "";
++ }
++
++ const indri::index::TermList *termList=thisIndex->termList(documentID);
++
++ if (!termList) {
++ return "";
++ }
++
++ indri::utility::greedy_vector< indri::index::FieldExtent > fieldVec=termList->fields();
++ indri::utility::greedy_vector< indri::index::FieldExtent >::iterator fIter=fieldVec.begin();
++ while (fIter!=fieldVec.end()) {
++
++ if ((*fIter).id==fieldID) {
++ int beginTerm=(*fIter).begin;
++ int endTerm=(*fIter).end;
++
++ /*
++ * note that the text is inclusive of the beginning
++ * but exclusive of the ending
++ */
++ for (int t=beginTerm; t < endTerm; t++) {
++ int thisTermID=termList->terms()[t];
++ ret_val = ret_val + thisIndex->term(thisTermID) + " ";
++ }
++ }
++
++ fIter++;
++ }
++
++ delete termList;
++ termList=NULL;
++ return ret_val;
++}
++
++/*
++ * Returns NULL on failure
++ * nvlist *
++ * search(
++ * nvlist_t *search_params,
++ * char **errmsg // Similar to pia_index()
++ * );
++ */
++nvlist *
++search (nvlist_t *search_params, char **errmsg) {
++
++ char *index_path = PIA_DATABASE;
++ nvlist_t **nvl_list_result;
++ nvlist_t *nvl_return;
++ nvlist_t *nvl_result;
++ nvlist_t *results = NULL;
++
++ if (nvlist_alloc(&results, NV_UNIQUE_NAME, 0) != 0) {
++ *errmsg = strdup("nvlist_alloc failed\n");
++ return NULL;
++ }
++
++ try {
++ std::string query;
++ char *panicstack;
++ (void) nvlist_lookup_string(search_params, "stack", &panicstack);
++
++ QueryEnvironment indriEnvironment;
++ indriEnvironment.addIndex(index_path);
++
++ /* Create Indri query */
++ query = "#combine (" + std::string(panicstack) + ")";
++
++ QueryAnnotation *QAresults=indriEnvironment.runAnnotatedQuery(query.c_str(), MAX_RESULTS);
++
++ std::vector<indri::api::ScoredExtentResult> resultVector=QAresults->getResults();
++
++ int totalNumResults=resultVector.size();
++
++ /* Get Parsed document of the results */
++ std::vector<ParsedDocument*> parsedDocs=indriEnvironment.documents(resultVector);
++
++ int results_to_return = 0;
++ for ( size_t i=0; i < totalNumResults && i < MAX_RESULTS; i++ ) {
++ results_to_return++;
++ }
++
++ /* Open Repository */
++ repository.openRead(index_path);
++
++ nvl_list_result = (nvlist_t **) malloc(results_to_return * sizeof(nvlist_t *));
++
++ for ( size_t i=0; i < results_to_return; i++ ) {
++
++ std::string ret="";
++
++ int thisResultDocID=resultVector[i].document;
++
++ if (nvlist_alloc(&nvl_list_result[i], NV_UNIQUE_NAME, 0) != 0) {
++ *errmsg = strdup("nvlist_alloc failed\n");
++ return NULL;
++ }
++
++ if ((ret = getFieldText(thisResultDocID, "bug")) == "") {
++ *errmsg = strdup("Lookup of bugid failed\n");
++ return NULL;
++ } else if (nvlist_add_string(nvl_list_result[i], "pia-bugid", ret.c_str())) {
++ *errmsg = strdup("nvlist_add bugid failed\n");
++ return NULL;
++ }
++
++ if ((ret = getFieldText(thisResultDocID, "stack")) == "") {
++ *errmsg = strdup("Lookup of stack failed\n");
++ return NULL;
++ } else if (nvlist_add_string(nvl_list_result[i], "pia-stack", ret.c_str())) {
++ *errmsg = strdup("nvlist_add stack failed\n");
++ return NULL;
++ }
++
++ if ((ret = getFieldText(thisResultDocID, "signature")) == "") {
++ *errmsg = strdup("Lookup of signature failed\n");
++ return NULL;
++ } else if (nvlist_add_string(nvl_list_result[i], "pia-signature", ret.c_str())) {
++ *errmsg = strdup("nvlist_add signature failed\n");
++ return NULL;
++ }
++
++ int indri_score = 1000 + (int)resultVector[i].score*1000;
++ if (nvlist_add_int32(nvl_list_result[i], "pia-score", indri_score)) {
++ *errmsg = strdup("nvlist_add score failed\n");
++ return NULL;
++ }
++ }
++ repository.close();
++
++ nvlist_add_nvlist_array(results, "results", nvl_list_result, results_to_return);
++
++ for (int i=0; i<results_to_return; i++) {
++ nvlist_free(nvl_list_result[i]);
++ }
++
++ return results;
++
++ } catch(...){
++ nvl_list_result = (nvlist_t **) malloc(1 * sizeof(nvlist_t **));
++
++ if (nvlist_alloc(&nvl_result, NV_UNIQUE_NAME, 0) != 0) {
++ *errmsg = strdup("nvlist_alloc failed\n");
++ return NULL;
++ }
++
++ if (nvlist_add_string(nvl_result, "error", "Indri Error")) {
++ *errmsg = strdup("nvlist_add error failed\n");
++ return NULL;
++ }
++
++ nvlist_dup(nvl_result, &nvl_list_result[0], 0);
++ nvlist_free(nvl_result);
++ nvlist_add_nvlist_array(results, "results", nvl_list_result, 1);
++
++ return results;
++ }
++}
++
++extern "C" nvlist*
++pia_search (nvlist_t *search_params, char **errmsg) {
++
++ return search (search_params, errmsg);
++
++}
++
++int
++init () {
++
++ struct stat sb;
++ if (stat(PIA_DATABASE_STORAGE, &sb) != 0) {
++ return 1;
++ }
++
++ return 0;
++}
++
++extern "C" int
++pia_init () {
++
++ return init ();
++
++}
+--- indri-5.4/src/TextTokenizerPIA.l po črc 15 14:38:12 2013
++++ indri-5.4/src/TextTokenizerPIA.l po črc 15 14:36:55 2013
+@@ -0,0 +1,588 @@
++%option noyywrap
++%option never-interactive
++%option prefix="piatok"
++
++%{
++
++/*==========================================================================
++ * Copyright (c) 2004 University of Massachusetts. All Rights Reserved.
++ *
++ * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
++ * is subject to the terms of the software license set forth in the LICENSE
++ * file included with this software, and also available at
++ * http://www.lemurproject.org/license.html
++ *
++ *==========================================================================
++ */
++
++//
++// TextTokenizerPIA
++//
++// 15 September 2005 -- mwb
++//
++
++#include <string.h>
++#include <ctype.h>
++#include "indri/TextTokenizerPIA.hpp"
++#include "indri/TermExtent.hpp"
++#include "indri/TagEvent.hpp"
++#include "indri/TokenizedDocument.hpp"
++#include "indri/UnparsedDocument.hpp"
++#include "indri/UTF8Transcoder.hpp"
++#include "indri/AttributeValuePair.hpp"
++
++static long byte_position;
++
++#define ZAP 1
++#define TAG 2
++#define ASCII_TOKEN 3
++#define UTF8_TOKEN 4
++
++%}
++%start COMMENT
++%%
++
++"<!--" { BEGIN(COMMENT); byte_position += piatokleng; return ZAP; }
++<COMMENT>[^-]+ { byte_position += piatokleng; return ZAP; }
++<COMMENT>"-->" { BEGIN(INITIAL); byte_position += piatokleng; return ZAP; }
++<COMMENT>"-" { byte_position += piatokleng; return ZAP; }
++"<!"[^\>]*">" { byte_position += piatokleng; return ZAP; }
++\<[a-zA-Z/][^\>]*\> { byte_position += piatokleng; return TAG; }
++[&]([a-zA-Z]+|[#]([0-9]+|[xX][a-fA-F0-9]+))[;] { byte_position += piatokleng; return ZAP; /* symbols */ }
++[A-Z0-9]"."([A-Z0-9]".")* { byte_position += piatokleng; return ASCII_TOKEN; }
++[a-zA-Z0-9_']+ { byte_position += piatokleng; return ASCII_TOKEN; }
++"-"[0-9]+("."[0-9]+)? { byte_position += piatokleng; return ASCII_TOKEN; }
++[a-zA-Z0-9\x80-\xFD]+ { byte_position += piatokleng; return UTF8_TOKEN; }
++
++[\n] { byte_position += piatokleng; return ZAP; }
++. { byte_position += piatokleng; return ZAP; }
++
++%%
++
++indri::parse::TokenizedDocument* indri::parse::TextTokenizerPIA::tokenize( indri::parse::UnparsedDocument* document ) {
++
++ _termBuffer.clear();
++ if ( _tokenize_entire_words)
++ _termBuffer.grow( document->textLength * 4);
++ else
++ _termBuffer.grow( document->textLength * 8 ); // extra null per char.
++
++ _document.terms.clear();
++ _document.tags.clear();
++ _document.positions.clear();
++
++ _document.metadata = document->metadata;
++ _document.text = document->text;
++ _document.textLength = document->textLength;
++ _document.content = document->content;
++ _document.contentLength = document->contentLength;
++
++ // byte offset
++ byte_position = document->content - document->text;
++
++ piatok_scan_bytes( document->content, document->contentLength );
++
++ // Main Tokenizer loop
++
++ int type;
++
++ while ( type = piatoklex() ) {
++
++ switch ( type ) {
++
++ case ASCII_TOKEN: processASCIIToken(); break;
++
++ case UTF8_TOKEN: processUTF8Token(); break;
++
++ case TAG: if ( _tokenize_markup ) processTag(); break;
++
++ default:
++ case ZAP:
++ break;
++
++ }
++
++ }
++
++ piatok_delete_buffer( YY_CURRENT_BUFFER );
++
++ return &_document;
++}
++
++// Member functions for processing tokenization events as dispatched
++// from the main tokenizer loop
++
++void indri::parse::TextTokenizerPIA::processTag() {
++
++ // Here, we parse the tag in a fashion that is relatively robust to
++ // malformed markup. toktext matches this pattern: <[^>]+>
++
++ if ( piatoktext[1] == '?' || piatoktext[1] == '!' ) {
++
++ // XML declaration like <? ... ?> and <!DOCTYPE ... >
++ return; // ignore
++
++ } else if ( piatoktext[1] == '/' ) { // close tag, eg. </FOO>
++
++ // Downcase the tag name.
++
++ int len = 0;
++
++ for ( char *c = piatoktext + 2;
++#ifndef WIN32
++ isalnum( *c ) || *c == '-' || *c == '_' || *c == ':' ; c++ ) {
++#else
++ ((*c >= 0) && isalnum( *c )) || *c == '-' || *c == '_' || *c == ':' ; c++ ) {
++#endif
++
++ *c = tolower( *c );
++ if ( *c == ':' ) *c = '_'; /* replace colon (from namespaces) */
++ len++;
++ }
++
++ TagEvent te;
++
++ te.open_tag = false;
++
++ // We need to write len characters, plus a NULL
++ char* write_loc = _termBuffer.write( len + 1 );
++ strncpy( write_loc, piatoktext + 2, len );
++ write_loc[len] = '\0';
++ te.name = write_loc;
++
++ // token position of tag event w/r/t token string
++ te.pos = _document.terms.size();
++
++ te.begin = byte_position - piatokleng;
++ te.end = byte_position;
++
++ _document.tags.push_back( te );
++
++#ifndef WIN32
++ } else if ( isalpha( piatoktext[1] ) ) {
++#else
++ } else if ( (piatoktext[1] >= 0) && (isalpha( piatoktext[1] ) )) {
++#endif
++
++ // Try to extract the tag name:
++
++ char* c = piatoktext + 1;
++ int i = 0;
++ int offset = 1; // current offset w/r/t byte_position - piatokleng
++ // it starts at one because it is incremented when c is, and c starts at one.
++ char* write_loc;
++
++#ifndef WIN32
++ while ( isalnum( c[i] ) || c[i] == '-' || c[i] == '_' || c[i] == ':' ) i++;
++#else
++ while ( ( (c[i] >= 0) && isalnum( c[i] )) || c[i] == '-' || c[i] == '_' || c[i] == ':' ) i++;
++#endif
++ if ( c[i] == '>' ) {
++
++ // open tag with no attributes, eg. <title>
++
++ // Ensure tag name is downcased
++ for ( int j = 0; j < i; j++ ) {
++ c[j] = tolower( c[j] );
++ if ( c[j] == ':' ) c[j] = '_'; /* replace colon (from namespaces) */
++ }
++
++ TagEvent te;
++
++ te.open_tag = true;
++
++ // need to write i characters, plus a NULL
++ char* write_loc = _termBuffer.write( i + 1 );
++ strncpy( write_loc, c, i );
++ write_loc[i] = '\0';
++ te.name = write_loc;
++
++ te.pos = _document.terms.size();
++
++ te.begin = byte_position - piatokleng;
++ te.end = byte_position;
++
++ _document.tags.push_back( te );
++
++#ifndef WIN32
++ } else if ( isspace( c[i] ) ) {
++#else
++ } else if ( (c[i] >= 0) && (isspace( c[i] ) )) {
++#endif
++
++ // open tag with attributes, eg. <A HREF="www.foo.com/bar">
++
++ TagEvent te;
++
++ te.open_tag = true;
++
++ // Ensure tag name is downcased
++ for ( int j = 0; j < i; j++ ) {
++ c[j] = tolower( c[j] );
++ if ( c[j] == ':' ) c[j] = '_'; /* replace colon (from namespaces) */
++ }
++
++ // need to write i characters, plus a NULL
++ char* write_loc = _termBuffer.write( i + 1 );
++ strncpy( write_loc, c, i );
++ write_loc[i] = '\0';
++ te.name = write_loc;
++ c += i;
++ offset += i;
++
++#ifndef WIN32
++ while ( isspace( *c ) ) { c++; offset++; }
++#else
++ while (((*c) >=0) && isspace( *c )) { c++; offset++; }
++#endif
++
++ te.pos = _document.terms.size();
++
++ te.begin = byte_position - piatokleng;
++ te.end = byte_position;
++
++ // Now search for attributes:
++
++ while ( *c != '>' && *c != '\0' ) {
++
++ AttributeValuePair avp;
++
++ // Try to extract attribute name:
++
++ i = 0;
++#ifndef WIN32
++ while ( isalnum( c[i] ) || c[i] == '-' || c[i] == '_' ) i++;
++#else
++ while ( (c[i] >= 0) && isalnum( c[i] ) || c[i] == '-' || c[i] == '_') i++;
++#endif
++
++ if ( i == 0 ) break;
++
++ // Ensure attribute name is downcased
++ for ( int j = 0; j < i; j++ )
++ c[j] = tolower( c[j] );
++
++ // need to write i characters, plus a NULL
++ write_loc = _termBuffer.write( i + 1 );
++ strncpy( write_loc, c, i );
++ write_loc[i] = '\0';
++ avp.attribute = write_loc;
++ c += i;
++ offset += i;
++
++ // attributes can be foo\s*=\s*"bar[">] or foo\s*=\s*bar
++
++ // ignore any spaces
++#ifndef WIN32
++ while ( isspace( *c ) ) { c++; offset++; }
++#else
++ while (((*c) >=0) && isspace( *c )) { c++; offset++; }
++#endif
++
++ if ( *c == '=' ) {
++
++ c++; // get past the '=' sign.
++ offset++;
++
++#ifndef WIN32
++ while ( isspace( *c ) ) { c++; offset++; }
++#else
++ while (((*c) >=0) && isspace( *c )) { c++; offset++; }
++#endif
++
++ if ( *c == '>' ) {
++
++ // common malformed markup <a href=>
++
++ // Insert empty attribute value
++ // need to write a single NULL
++ write_loc = _termBuffer.write( 1 );
++ write_loc[0] = '\0';
++ avp.value = write_loc;
++ avp.begin = byte_position - piatokleng + offset;
++ avp.end = byte_position - piatokleng + offset;
++
++ } else {
++
++ bool quoted = true;
++ char quote_char;
++ if ( *c == '"' || *c =='\'' ) { quote_char = *c; c++; offset++; }
++ else quoted = false;
++
++ // Attribute value starts here.
++
++ i = 0;
++// make sure the opening and closing quote character match...
++ if ( quoted )
++// while ( c[i] != '"' && c[i] != '>' && c[i] !='\'') i++;
++ while ( c[i] != quote_char && c[i] != '>') i++;
++ else
++#ifndef WIN32
++ while ( ! isspace( c[i] ) && c[i] != '>' ) i++;
++#else
++ while ( ((c[i] >= 0) && ! isspace( c[i] ) ) && c[i] != '>' ) i++;
++#endif
++
++ // need to write i characters, plus a NULL
++ write_loc = _termBuffer.write( i + 1 );
++ strncpy( write_loc, c, i );
++ write_loc[i] = '\0';
++ avp.value = write_loc;
++ avp.begin = byte_position - piatokleng + offset;
++ avp.end = byte_position - piatokleng + offset + i;
++ c += i;
++ offset += i;
++
++ }
++ } else {
++
++ // Insert empty attribute value
++ // need to write a single NULL
++ write_loc = _termBuffer.write( 1 );
++ write_loc[0] = '\0';
++ avp.value = write_loc;
++ avp.begin = byte_position - piatokleng + offset;
++ avp.end = byte_position - piatokleng + offset;
++ }
++#ifndef WIN32
++ while ( isspace( *c ) || *c == '"' ) { c++; offset++; }
++#else
++ while ( ((*c >= 0) && isspace( *c )) || *c == '"' ) { c++; offset++; }
++#endif
++
++ te.attributes.push_back( avp );
++ }
++
++ _document.tags.push_back( te );
++
++ }
++
++ // One of the cases that is ignored is this common malformed
++ // markup <foo=bar> with no tag name. Another is the case
++ // of an email address <[email protected]>
++
++
++ }
++}
++
++void indri::parse::TextTokenizerPIA::processUTF8Token() {
++
++ // A UTF-8 token, as recognized by flex, could actually be
++ // a mixed ASCII/UTF-8 string containing any number of
++ // UTF-8 characters, so we re-tokenize it here.
++
++ indri::utility::HashTable<UINT64,const int>& unicode = _transcoder.unicode();
++
++ int len = strlen( piatoktext );
++
++ UINT64* unicode_chars = new UINT64[len + 1];
++ int* offsets = new int[len + 1];
++ int* lengths = new int[len + 1];
++ _transcoder.utf8_decode( piatoktext, &unicode_chars, NULL, NULL,
++ &offsets, &lengths );
++
++ const int* p;
++ int cls; // Character class of current UTF-8 character
++ // offset of current UTF-8 character w/r/t toktext stored in offsets[i]
++ // byte length of current UTF-8 character stored in lengths[i]
++
++ int offset = 0; // Position of start of current *token* (not character) w/r/t toktext
++ int extent = 0; // Extent for this *token* including trailing punct
++ int piatoken_len = 0; // Same as above, minus the trailing punctuation
++
++ char buf[64];
++
++ // If this flag is true, we have punctuation symbols at the end of a
++ // token, so do not attach another letter to this token.
++ bool no_letter = false;
++
++ // In case there are malformed characters preceding the good
++ // characters:
++ offset = offsets[0];
++
++ for ( int i = 0; unicode_chars[i] != 0; i++ ) {
++
++ p = unicode.find( unicode_chars[i] );
++ cls = p ? *p : 0;
++
++ if ( ! _tokenize_entire_words ) { // Tokenize by character
++
++ if ( cls != 0 && cls != 3 && cls != 5 && cls != 9 ) {
++
++ writeToken( piatoktext + offsets[i], lengths[i],
++ byte_position - piatokleng + offsets[i],
++ byte_position - piatokleng + offsets[i] + lengths[i] );
++ }
++ continue;
++ }
++
++ // If this is not the first time through this loop, we need
++ // to check to see if any bytes in toktext were skipped
++ // during the UTF-8 analysis:
++
++ if ( i != 0 && offset + piatoken_len != offsets[i] ) {
++
++ // Write out the token we are working on, if any:
++
++ if ( piatoken_len > 0 ) {
++
++ writeToken( piatoktext + offset, piatoken_len,
++ byte_position - piatokleng + offset,
++ byte_position - piatokleng + offset + extent );
++ }
++
++ extent = 0;
++ piatoken_len = 0;
++ no_letter = false;
++ offset = offsets[i];
++ }
++
++ // Tokenize by word:
++
++ switch ( cls ) {
++
++ case 4: // Currency symbol: always extracted alone
++ // Action: write the token we are working on,
++ // and write this symbol as a separate token
++ writeToken( piatoktext + offset, extent,
++ byte_position - piatokleng + offset,
++ byte_position - piatokleng + offset + extent );
++
++ offset += extent;
++
++ writeToken( piatoktext + offset, lengths[i],
++ byte_position - piatokleng + offset,
++ byte_position - piatokleng + offset + lengths[i] );
++
++ offset += lengths[i];
++ piatoken_len = 0;
++ extent = 0;
++ no_letter = false;
++ break;
++
++ case 1: // Apostrophe
++ case 10: // Decimal separator
++ case 6: // Letter
++ case 7: // Digit
++ // Action: add this character to the end of the token we are
++ // working on
++ if ( no_letter ) { // This is a token boundary
++ writeToken( piatoktext + offset, piatoken_len,
++ byte_position - piatokleng + offset,
++ byte_position - piatokleng + offset + extent );
++
++ offset += extent;
++ extent = 0;
++ piatoken_len = 0;
++ no_letter = false;
++
++ }
++
++ extent += lengths[i];
++ piatoken_len += lengths[i];
++ break;
++
++ case 2: // Percent
++ case 8: // Punctuation
++ case 12: // Thousands separator
++ case 11: // Hyphen
++ // Action: These characters are included in the extent of the
++ // token we are working on.
++ no_letter = true;
++ extent += lengths[i];
++ break;
++
++ case 0: // No character class!
++ case 3: // Control character
++ case 5: // Non-punctuation symbol
++ case 9: // Whitespace
++ default:
++ // Action: write the token we are working on. Do not include
++ // this character in any future token.
++ writeToken( piatoktext + offset, piatoken_len,
++ byte_position - piatokleng + offset,
++ byte_position - piatokleng + offset + extent );
++
++ offset += (extent + lengths[i]); // Include current character
++ extent = 0;
++ piatoken_len = 0;
++ no_letter = false;
++
++ break;
++ }
++ }
++
++ // Write out last token
++ if ( piatoken_len > 0 )
++ writeToken( piatoktext + offset, piatoken_len,
++ byte_position - piatokleng + offset,
++ byte_position - piatokleng + offset + extent );
++
++ delete[] unicode_chars;
++ delete[] offsets;
++ delete[] lengths;
++}
++
++void indri::parse::TextTokenizerPIA::processASCIIToken() {
++
++ int piatoken_len = strlen( piatoktext );
++
++ // token_len here is the length of the token without
++ // any trailing punctuation.
++
++ for ( int i = piatoken_len - 1; i > 0; i-- ) {
++
++ if ( ! ispunct( piatoktext[i] ) )
++ break;
++ else
++ piatoken_len--;
++ }
++
++ if ( _tokenize_entire_words ) {
++
++ writeToken( piatoktext, piatoken_len, byte_position - piatokleng, byte_position );
++
++ } else {
++
++ for ( int i = 0; i < piatoken_len; i++ )
++ writeToken( piatoktext + i, 1, byte_position - piatokleng + i,
++ byte_position - piatokleng + i + 1 );
++ }
++}
++
++
++// ObjectHandler implementation
++
++void indri::parse::TextTokenizerPIA::handle( indri::parse::UnparsedDocument* document ) {
++
++ _handler->handle( tokenize( document ) );
++}
++
++void indri::parse::TextTokenizerPIA::setHandler( ObjectHandler<indri::parse::TokenizedDocument>& h ) {
++
++ _handler = &h;
++}
++
++void indri::parse::TextTokenizerPIA::writeToken( char* token, int piatoken_len,
++ int extent_begin, int extent_end ) {
++
++
++ // The TermExtent for a token will include trailing punctuation.
++ // The purpose for this is that it makes for a nicer display when a
++ // sequence of tokens (say, a sentence) is retrieved and shown to
++ // the user.
++
++ TermExtent extent;
++ extent.begin = extent_begin;
++ extent.end = extent_end;
++ _document.positions.push_back( extent );
++
++ // The terms entry for a token won't include the punctuation.
++
++ char* write_loc = _termBuffer.write( piatoken_len + 1 );
++ strncpy( write_loc, token, piatoken_len );
++ write_loc[piatoken_len] = '\0';
++ _document.terms.push_back( write_loc );
++}
++
++
+--- indri-5.4/include/indri/TextTokenizerPIA.hpp po črc 15 14:38:50 2013
++++ indri-5.4/include/indri/TextTokenizerPIA.hpp po črc 15 14:36:54 2013
+@@ -0,0 +1,73 @@
++/*==========================================================================
++ * Copyright (c) 2003-2005 University of Massachusetts. All Rights Reserved.
++ *
++ * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
++ * is subject to the terms of the software license set forth in the LICENSE
++ * file included with this software, and also available at
++ * http://www.lemurproject.org/license.html
++ *
++ *==========================================================================
++ */
++
++//
++// TextTokenizerPIA
++//
++// 15 September 2005 -- mwb
++//
++
++#ifndef INDRI_TEXTTOKENIZERPIA_HPP
++#define INDRI_TEXTTOKENIZERPIA_HPP
++
++#include <stdio.h>
++#include <string>
++#include <map>
++
++#include "indri/IndriTokenizer.hpp"
++#include "indri/Buffer.hpp"
++#include "indri/TagEvent.hpp"
++#include "indri/UnparsedDocument.hpp"
++#include "indri/TokenizedDocument.hpp"
++#include "indri/UTF8Transcoder.hpp"
++
++namespace indri {
++ namespace parse {
++
++ class TextTokenizerPIA : public Tokenizer {
++
++ public:
++ TextTokenizerPIA( bool tokenize_markup = true, bool tokenize_entire_words = true ) : _handler(0) {
++
++ _tokenize_markup = tokenize_markup;
++ _tokenize_entire_words = tokenize_entire_words;
++ }
++
++ ~TextTokenizerPIA() {}
++
++ TokenizedDocument* tokenize( UnparsedDocument* document );
++
++ void handle( UnparsedDocument* document );
++ void setHandler( ObjectHandler<TokenizedDocument>& h );
++
++ protected:
++ void processASCIIToken();
++ void processUTF8Token();
++ void processTag();
++
++ indri::utility::Buffer _termBuffer;
++ UTF8Transcoder _transcoder;
++
++ bool _tokenize_markup;
++ bool _tokenize_entire_words;
++
++ private:
++ ObjectHandler<TokenizedDocument>* _handler;
++ TokenizedDocument _document;
++
++ void writeToken( char* token, int token_len, int extent_begin,
++ int extent_end );
++ };
++ }
++}
++
++#endif // INDRI_TEXTTOKENIZERPIA_HPP
++
+--- indri-5.4/src/TokenizerFactory.cpp po črc 15 14:39:30 2013
++++ indri-5.4/src/TokenizerFactory.cpp po črc 15 14:29:11 2013
+@@ -22,6 +22,7 @@
+
+ #include "indri/TextTokenizer.hpp"
+ // Add an #include for your Tokenizer here.
++#include "indri/TextTokenizerPIA.hpp"
+
+
+ #define TOKENIZER_WORD ("Word")
+@@ -29,6 +30,8 @@
+ #define TOKENIZER_CHAR ("Char")
+ #define TOKENIZER_CHAR_NO_MARKUP ("Char without Markup")
+ // Add a #define for your Tokenizer here.
++#define TOKENIZER_PIA ("PIA")
++#define TOKENIZER_PIA_NO_MARKUP ("PIA without Markup")
+
+
+ //
+@@ -78,8 +81,23 @@
+ // got "char"
+ return TOKENIZER_CHAR;
+
++ } else if ( ( name[0] == 'p' || name[0] == 'P' ) &&
++ ( name[1] == 'i' || name[1] == 'I' ) &&
++ ( name[2] == 'a' || name[3] == 'A' ) ) {
++
++ if ( name[4] == '-' &&
++ ( name[5] == 'n' || name[5] == 'N' ) &&
++ ( name[5] == 'o' || name[5] == 'O' ) ) {
++
++ // got "pia-nomarkup"
++ return TOKENIZER_PIA_NO_MARKUP;
++ }
++
++ // got "pia"
++ return TOKENIZER_PIA;
+ }
+
++
+ return "";
+ }
+
+@@ -105,6 +123,14 @@
+
+ tokenizer = new indri::parse::TextTokenizer( false, false );
+
++ } else if ( preferred == TOKENIZER_PIA ) {
++
++ tokenizer = new indri::parse::TextTokenizerPIA();
++
++ } else if ( preferred == TOKENIZER_PIA_NO_MARKUP ) {
++
++ tokenizer = new indri::parse::TextTokenizerPIA( false );
++
+ } else {
+
+ LEMUR_THROW( LEMUR_RUNTIME_ERROR, name + " is not a known tokenizer." );
+--- indri-5.4/src/FileClassEnvironmentFactory.cpp po črc 15 14:40:19 2013
++++ indri-5.4/src/FileClassEnvironmentFactory.cpp po črc 15 14:29:12 2013
+@@ -189,6 +189,20 @@
+ trec_conflations // conflations
+ },
+ {
++ "trecpia", // name
++ "xml", // parser
++ "pia", // tokenizer
++ "tagged", // iterator
++ "<DOC>", // startDocTag
++ "</DOC>", // endDocTag
++ NULL, // endMetadataTag
++ trec_include_tags, // includeTags
++ NULL, // excludeTags
++ trec_index_tags, // indexTags
++ trec_metadata_tags, // metadataTags
++ trec_conflations // conflations
++ },
++ {
+ "trecchar", // name
+ "xml", // parser
+ "char", // tokenizer
+--- indri-5.4/Makefile.app.in 2013-09-04 06:31:06.740210927 -0700
++++ indri-5.4/Makefile.app.in 2013-09-04 06:27:24.857989779 -0700
+@@ -1,22 +1,26 @@
++include MakeDefns
++
+ ## your application name here
+-APP=
++APP=pia_wrapper
+ SRC=$(APP).cpp
+ ## extra object files for your app here
+ OBJ=
++OUTPUT=lib$(APP).so.1
+
+ prefix = @prefix@
+ exec_prefix = ${prefix}
+ libdir = @libdir@
+ includedir = @includedir@
+-INCPATH=-I$(includedir)
+-LIBPATH=-L$(libdir)
++INCPATH=-Iinclude -Icontrib/lemur/include
++LIBPATH=-Lobj
+ CXXFLAGS=@DEFS@ @CPPFLAGS@ @CXXFLAGS@ $(INCPATH)
+-CPPLDFLAGS = @LDFLAGS@ -lindri @LIBS@
++CPPLDFLAGS = @LDFLAGS@ -lnvpair -lindri @LIBS@
+
+ all:
+- $(CXX) $(CXXFLAGS) $(SRC) -o $(APP) $(OBJ) $(LIBPATH) $(CPPLDFLAGS)
++ $(CXX) $(CXXFLAGS) $(SRC) -fpic -shared -static-libgcc -h $(OUTPUT) -o $(OUTPUT) $(OBJ) $(LIBPATH) $(CPPLDFLAGS)
+
+ clean:
+ rm -f $(APP)
+
+-
++install:
++ cp $(OUTPUT) $(libdir)
+--- indri-5.4/Makefile 2013-09-12 07:39:16.027125829 -0700
++++ indri-5.4/Makefile 2013-09-12 07:38:44.720450641 -0700
+@@ -73,5 +73,6 @@
+ $(MAKE) install -C doc
+ $(MAKE) -C site-search install
+ $(INSTALL_DATA) Makefile.app $(pkgdatadir)
++ $(MAKE) -f Makefile.app install
+
+ test:
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/patches/remove_xpdf.patch Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,358 @@
+Remove xpdf support from the build.
+--- indri-5.4/MakeDefns.in čt črc 4 15:01:17 2013
++++ indri-5.4/MakeDefns.in čt črc 4 15:00:40 2013
+@@ -48,7 +48,7 @@
+ PHPINCLUDE = @PHPINCLUDE@
+ MCS=@MCS@
+
+-DEPENDENCIES = lemur xpdf
++DEPENDENCIES = lemur
+ ifeq ($(NEED_ANTLR), 1)
+ DEPENDENCIES += antlr
+ endif
+--- indri-5.4/src/PDFDocumentExtractor.cpp čt črc 4 15:08:46 2013
++++ indri-5.4/src/PDFDocumentExtractor.cpp čt črc 4 15:08:28 2013
+@@ -1,214 +1,0 @@
+-/*==========================================================================
+- * Copyright (c) 2003-2004 University of Massachusetts. All Rights Reserved.
+- *
+- * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
+- * is subject to the terms of the software license set forth in the LICENSE
+- * file included with this software, and also available at
+- * http://www.lemurproject.org/license.html
+- *
+- *==========================================================================
+-*/
+-
+-
+-//
+-// PDFDocumentExtractor
+-//
+-// 25 June 2004 -- tds
+-//
+-
+-#include "indri/PDFDocumentExtractor.hpp"
+-#include "indri/Buffer.hpp"
+-
+-#include "GString.h"
+-#include "TextOutputDev.h"
+-#include "PDFDoc.h"
+-
+-#include "Object.h"
+-#include "Stream.h"
+-#include "Array.h"
+-#include "Dict.h"
+-#include "XRef.h"
+-#include "Page.h"
+-#include "CharTypes.h"
+-#include "GlobalParams.h"
+-#include "lemur/Exception.hpp"
+-
+-static void buffer_write( void* stream, char* text, int len ) {
+- indri::utility::Buffer* buffer = (indri::utility::Buffer*) stream;
+-
+- if( buffer->position() ) {
+- buffer->unwrite(1);
+- }
+-
+- memcpy( buffer->write(len), text, len );
+- if( text[len-1] != 0 )
+- *buffer->write(1) = 0;
+-}
+-
+-indri::parse::PDFDocumentExtractor::PDFDocumentExtractor() {
+- globalParams = new GlobalParams(0);
+- _title="";
+- _author="";
+-}
+-
+-indri::parse::PDFDocumentExtractor::~PDFDocumentExtractor() {
+- delete globalParams;
+- globalParams = 0;
+-}
+-
+-
+-void indri::parse::PDFDocumentExtractor::seekValue(indri::xml::XMLNode* node, std::string &metaTag) {
+- if (node == NULL) {
+- return;
+- }
+-
+- const std::vector<indri::xml::XMLNode*>& children = node->getChildren();
+- for( size_t i=0; i<children.size(); i++ ) {
+- indri::xml::XMLNode* child = children[i];
+- metaTag = child->getValue();
+- if(metaTag.length()==0)
+- seekValue(child,metaTag);
+- else
+- return;
+- }
+-
+-}
+-
+-void indri::parse::PDFDocumentExtractor::appendPdfMetaData(indri::xml::XMLNode* node) {
+- indri::xml::XMLNode* current = 0;
+-
+- if (node == NULL) {
+- return;
+- }
+-
+- const std::vector<indri::xml::XMLNode*>& children = node->getChildren();
+-
+- for( size_t i=0; i<children.size(); i++ ) {
+- indri::xml::XMLNode* child = children[i];
+- std::string name = child->getName();
+- if(name=="dccreator")
+- {
+- seekValue(child,_author);
+- }
+- if(name=="dctitle")
+- {
+- seekValue(child,_title);
+- }
+- appendPdfMetaData(child);
+-
+- }
+-
+-
+-
+-}
+-
+-void indri::parse::PDFDocumentExtractor::open( const std::string& filename ) {
+- _documentTextBuffer.clear();
+- _documentPath = filename;
+-}
+-
+-void indri::parse::PDFDocumentExtractor::close() {
+- _documentPath = "";
+-}
+-
+-indri::parse::UnparsedDocument* indri::parse::PDFDocumentExtractor::nextDocument() {
+- if( !_documentPath.length() )
+- return 0;
+-
+- PDFDoc* doc = 0;
+- TextOutputDev* textOut = 0;
+- GString* gfilename = new GString(_documentPath.c_str());
+- doc = new PDFDoc( gfilename );
+- // if the doc is not ok, or ok to copy, it
+- // will be a document of length 0.
+- if( doc->isOk() && doc->okToCopy() ) {
+- void* stream = &_documentTextBuffer;
+- textOut = new TextOutputDev( buffer_write, stream, gFalse, gFalse);
+- if ( textOut->isOk() ) {
+- int firstPage = 1;
+- int lastPage = doc->getNumPages();
+- double hDPI=72.0;
+- double vDPI=72.0;
+- int rotate=0;
+- GBool useMediaBox=gFalse;
+- GBool crop=gTrue;
+- GBool printing=gFalse;
+- if(doc->readMetadata()!=NULL)
+- {
+- GString rawMetaData = doc->readMetadata();
+- GString preparedMetaData="";
+-
+- //zoek <rdf:RDF en eindig bij </rdf:RDF>!!
+- for(int x=0; x<rawMetaData.getLength(); x++) {
+- if(rawMetaData.getChar(x)!='?' && rawMetaData.getChar(x)!=':') {
+- //skip characters which the XMLReader doesn't understand
+- preparedMetaData.append(rawMetaData.getChar(x));
+- }
+- }
+- std::string metaData(preparedMetaData.getCString());
+- int startbegin = metaData.find("<rdf");
+- int stopend = metaData.find(">", metaData.rfind("</rdf") );
+- metaData = metaData.substr(startbegin, (stopend-startbegin)+1 );
+-
+-
+- indri::xml::XMLReader reader;
+-
+- try {
+- std::auto_ptr<indri::xml::XMLNode> result( reader.read( metaData.c_str() ) );
+- appendPdfMetaData( result.get() );
+- } catch( lemur::api::Exception& e ) {
+- LEMUR_RETHROW( e, "Had trouble reading PDF metadata" );
+- }
+- if( _author.length()>0 || _title.length()>0 )
+- {
+- std::string createdPdfHeader;
+- createdPdfHeader="<head>\n";
+- if(_title.length()>0) {
+- createdPdfHeader+="<title>";
+- createdPdfHeader+=_title;
+- createdPdfHeader+="</title>\n";
+- }
+- if(_author.length()>0) {
+- createdPdfHeader+="<author>";
+- createdPdfHeader+=_author;
+- createdPdfHeader+="</author>\n";
+- }
+- createdPdfHeader+="</head>\n";
+- char *metastream = _documentTextBuffer.write( createdPdfHeader.length()+1 );
+- strcpy(metastream, createdPdfHeader.c_str());
+- }
+- }
+- doc->displayPages(textOut, firstPage, lastPage, hDPI, vDPI, rotate, useMediaBox, crop, printing);
+- }
+- }
+-
+-
+- delete textOut;
+- delete doc;
+-
+- _unparsedDocument.textLength = _documentTextBuffer.position();
+- _unparsedDocument.contentLength = _unparsedDocument.textLength ? _documentTextBuffer.position() - 1 : 0 ; // no null 0 if text is empty.
+- char* docnoPoint = _documentTextBuffer.write( _documentPath.length()+1 );
+- strcpy( docnoPoint, _documentPath.c_str() );
+- _unparsedDocument.text = _documentTextBuffer.front();
+- _unparsedDocument.content = _documentTextBuffer.front();
+- _unparsedDocument.metadata.clear();
+-
+- indri::parse::MetadataPair pair;
+-
+- pair.key = "path";
+- pair.value = docnoPoint;
+- pair.valueLength = _documentPath.length()+1;
+- _unparsedDocument.metadata.push_back( pair );
+-
+- _docnostring.assign(_documentPath.c_str() );
+- cleanDocno();
+- pair.value = _docnostring.c_str();
+- pair.valueLength = _docnostring.length()+1;
+- pair.key = "docno";
+- _unparsedDocument.metadata.push_back( pair );
+-
+- _documentPath = "";
+-
+- return &_unparsedDocument;
+-}
+--- indri-5.4/include/indri/PDFDocumentExtractor.hpp čt črc 4 15:16:04 2013
++++ indri-5.4/include/indri/PDFDocumentExtractor.hpp čt črc 4 15:15:00 2013
+@@ -1,57 +1,0 @@
+-/*==========================================================================
+- * Copyright (c) 2003-2004 University of Massachusetts. All Rights Reserved.
+- *
+- * Use of the Lemur Toolkit for Language Modeling and Information Retrieval
+- * is subject to the terms of the software license set forth in the LICENSE
+- * file included with this software, and also available at
+- * http://www.lemurproject.org/license.html
+- *
+- *==========================================================================
+- */
+-
+-
+-//
+-// PDFDocumentExtractor
+-//
+-// 25 June 2004 -- tds
+-//
+-
+-#ifndef INDRI_PDFDOCUMENTEXTRACTOR_HPP
+-#define INDRI_PDFDOCUMENTEXTRACTOR_HPP
+-
+-#include "lemur/lemur-compat.hpp"
+-#include "indri/Buffer.hpp"
+-#include "indri/UnparsedDocument.hpp"
+-#include "indri/DocumentIterator.hpp"
+-#include "indri/XMLReader.hpp"
+-#include "indri/XMLNode.hpp"
+-#include "indri/XMLWriter.hpp"
+-#include <string>
+-namespace indri
+-{
+- namespace parse
+- {
+-
+- class PDFDocumentExtractor : public DocumentIterator {
+- indri::utility::Buffer _documentTextBuffer;
+- UnparsedDocument _unparsedDocument;
+- std::string _documentPath;
+-
+- public:
+- PDFDocumentExtractor();
+- ~PDFDocumentExtractor();
+-
+- void open( const std::string& filename );
+- UnparsedDocument* nextDocument();
+- void appendPdfMetaData(indri::xml::XMLNode* node);
+- void seekValue(indri::xml::XMLNode* node, std::string &metaTag);
+- void close();
+- private:
+- std::string _title;
+- std::string _author;
+-
+- };
+- }
+-}
+-
+-#endif // INDRI_PDFDOCUMENTEXTRACTOR_HPP
+--- indri-5.4/src/DocumentIteratorFactory.cpp čt črc 4 15:24:24 2013
++++ indri-5.4/src/DocumentIteratorFactory.cpp čt črc 4 15:23:27 2013
+@@ -18,7 +18,6 @@
+
+ #include "indri/DocumentIteratorFactory.hpp"
+
+-#include "indri/PDFDocumentExtractor.hpp"
+ #include "indri/TaggedDocumentIterator.hpp"
+ #include "indri/WARCDocumentIterator.hpp"
+ #include "indri/TextDocumentExtractor.hpp"
+@@ -36,7 +35,6 @@
+
+ #define TYPE_TAGGED ( "Tagged Document Collection" )
+ #define TYPE_WARC ( "WARC Document Collection" )
+-#define TYPE_PDF ( "Adobe PDF" )
+ #define TYPE_WORD ( "Microsoft Word" )
+ #define TYPE_PPT ( "Microsoft PowerPoint" )
+ #define TYPE_MBOX ( "Mailbox" )
+@@ -53,8 +51,6 @@
+ result = iter;
+ } else if( preferred == TYPE_WARC ) {
+ result = new indri::parse::WARCDocumentIterator();
+- } else if( preferred == TYPE_PDF ) {
+- result = new indri::parse::PDFDocumentExtractor();
+ } else if( preferred == TYPE_TEXT ) {
+ result = new indri::parse::TextDocumentExtractor();
+ } else if( preferred == TYPE_MBOX ) {
+@@ -83,8 +79,6 @@
+ return TYPE_TAGGED;
+ } else if( type == "warc" || type == TYPE_WARC ) {
+ return TYPE_WARC;
+- } else if( type == "pdf" || type == "adobe pdf" || type == TYPE_PDF ) {
+- return TYPE_PDF;
+ } else if( type == "doc" || type == "msword" || type == "word" || type == "microsoft word" || type == TYPE_WORD ) {
+ return TYPE_WORD;
+ } else if( type == "ppt" || type == "powerpoint" || type == "msppt" || type == "microsoft powerpoint" || type == TYPE_PPT ) {
+--- indri-5.4/src/FileClassEnvironmentFactory.cpp čt črc 4 15:33:56 2013
++++ indri-5.4/src/FileClassEnvironmentFactory.cpp čt črc 4 15:33:20 2013
+@@ -55,8 +55,6 @@
+ // case. Values specified here can be in mixed case, since values are
+ // matched in a case-sensitive manner.
+
+-static const char* pdf_index_tags[] = { "title", "author", 0 };
+-static const char* pdf_metadata_tags[] = { "title", "author", 0 };
+ static const char* html_index_tags[] = { "title", "author", "h1", "h2", "h3", "h4", 0 };
+ static const char* html_metadata_tags[] = { "title", "author", 0 };
+ //static const char* html_conflations[] = { "h1", NULL, NULL, "heading", "h2", NULL, NULL, "heading", "h3", NULL, NULL, "heading", "h4", NULL, NULL, "heading", "bloghpno", NULL, NULL, "docno", 0, 0, 0, 0 };
+@@ -279,21 +277,6 @@
+ #endif
+
+ {
+- "pdf", // name
+- "html", // parser
+- "word", // tokenizer
+- "pdf", // iterator
+- NULL, // startDocTag
+- NULL, // endDocTag
+- NULL, // endMetadataTag
+- NULL, // includeTags
+- NULL, // excludeTags
+- pdf_index_tags, // indexTags
+- pdf_metadata_tags, // metadataTags
+- NULL // conflations
+- },
+-
+- {
+ "txt", // name
+ "text", // parser
+ "word", // tokenizer
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/indri/resolve.deps Tue Jan 07 12:31:30 2014 +0100
@@ -0,0 +1,5 @@
+library/zlib
+system/library
+system/library/gcc/gcc-c++-runtime
+system/library/gcc/gcc-c-runtime
+system/library/math