Index: lams_build/build.xml
===================================================================
diff -u -rf90ca89942ca1a4ebca88351343904dcfa0a654e -r8e753b5d4147c553bdb1336b44d67fd9cae827ad
--- lams_build/build.xml (.../build.xml) (revision f90ca89942ca1a4ebca88351343904dcfa0a654e)
+++ lams_build/build.xml (.../build.xml) (revision 8e753b5d4147c553bdb1336b44d67fd9cae827ad)
@@ -452,11 +452,12 @@
+
-
+
Index: lams_build/lib/apache-tika/juniversalchardet-2.4.0.jar
===================================================================
diff -u
Binary files differ
Index: lams_build/lib/apache-tika/tika.module.xml
===================================================================
diff -u -rf90ca89942ca1a4ebca88351343904dcfa0a654e -r8e753b5d4147c553bdb1336b44d67fd9cae827ad
--- lams_build/lib/apache-tika/tika.module.xml (.../tika.module.xml) (revision f90ca89942ca1a4ebca88351343904dcfa0a654e)
+++ lams_build/lib/apache-tika/tika.module.xml (.../tika.module.xml) (revision 8e753b5d4147c553bdb1336b44d67fd9cae827ad)
@@ -27,6 +27,7 @@
+
Index: lams_common/src/java/org/lamsfoundation/lams/util/FileUtil.java
===================================================================
diff -u -rdd71ad672752285a596518b851aa4d1ecd687bd5 -r8e753b5d4147c553bdb1336b44d67fd9cae827ad
--- lams_common/src/java/org/lamsfoundation/lams/util/FileUtil.java (.../FileUtil.java) (revision dd71ad672752285a596518b851aa4d1ecd687bd5)
+++ lams_common/src/java/org/lamsfoundation/lams/util/FileUtil.java (.../FileUtil.java) (revision 8e753b5d4147c553bdb1336b44d67fd9cae827ad)
@@ -32,8 +32,10 @@
import org.apache.log4j.Logger;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
+import org.apache.tika.parser.AutoDetectParser;
+import org.apache.tika.parser.EmptyParser;
import org.apache.tika.parser.ParseContext;
-import org.apache.tika.parser.pdf.PDFParser;
+import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.hibernate.id.Configurable;
import org.hibernate.id.IdentifierGenerator;
@@ -959,17 +961,26 @@
FileUtils.deleteQuietly(uploadDir);
}
- public static String getPDFContents(File file) throws TikaException, IOException, SAXException {
+ public static String getTextFileContents(File file) throws TikaException, IOException, SAXException {
BodyContentHandler handler = new BodyContentHandler(-1);
- ParseContext pcontext = new ParseContext();
+ ParseContext parseContext = new ParseContext();
+ parseContext.set(Parser.class, new EmptyParser());
Metadata metadata = new Metadata();
- PDFParser pdfparser = new PDFParser();
+ AutoDetectParser parser = new AutoDetectParser();
+ String contents = null;
try (InputStream inputStream = new FileInputStream(file)) {
- pdfparser.parse(inputStream, handler, metadata, pcontext);
+ parser.parse(inputStream, handler, metadata, parseContext);
+ } catch (Exception e) {
+ contents = handler.toString().strip();
+ if (StringUtils.isBlank(contents)) {
+ throw e;
+ }
}
- String contents = handler.toString().strip();
+ if (contents == null) {
+ contents = handler.toString().strip();
+ }
if (log.isDebugEnabled()) {
- log.debug("PDF contents:\n" + contents);
+ log.debug("File contents:\n" + contents);
}
return contents;