Index: lams_build/build.xml =================================================================== diff -u -rf90ca89942ca1a4ebca88351343904dcfa0a654e -r8e753b5d4147c553bdb1336b44d67fd9cae827ad --- lams_build/build.xml (.../build.xml) (revision f90ca89942ca1a4ebca88351343904dcfa0a654e) +++ lams_build/build.xml (.../build.xml) (revision 8e753b5d4147c553bdb1336b44d67fd9cae827ad) @@ -452,11 +452,12 @@ + - + Index: lams_build/lib/apache-tika/juniversalchardet-2.4.0.jar =================================================================== diff -u Binary files differ Index: lams_build/lib/apache-tika/tika.module.xml =================================================================== diff -u -rf90ca89942ca1a4ebca88351343904dcfa0a654e -r8e753b5d4147c553bdb1336b44d67fd9cae827ad --- lams_build/lib/apache-tika/tika.module.xml (.../tika.module.xml) (revision f90ca89942ca1a4ebca88351343904dcfa0a654e) +++ lams_build/lib/apache-tika/tika.module.xml (.../tika.module.xml) (revision 8e753b5d4147c553bdb1336b44d67fd9cae827ad) @@ -27,6 +27,7 @@ + Index: lams_common/src/java/org/lamsfoundation/lams/util/FileUtil.java =================================================================== diff -u -rdd71ad672752285a596518b851aa4d1ecd687bd5 -r8e753b5d4147c553bdb1336b44d67fd9cae827ad --- lams_common/src/java/org/lamsfoundation/lams/util/FileUtil.java (.../FileUtil.java) (revision dd71ad672752285a596518b851aa4d1ecd687bd5) +++ lams_common/src/java/org/lamsfoundation/lams/util/FileUtil.java (.../FileUtil.java) (revision 8e753b5d4147c553bdb1336b44d67fd9cae827ad) @@ -32,8 +32,10 @@ import org.apache.log4j.Logger; import org.apache.tika.exception.TikaException; import org.apache.tika.metadata.Metadata; +import org.apache.tika.parser.AutoDetectParser; +import org.apache.tika.parser.EmptyParser; import org.apache.tika.parser.ParseContext; -import org.apache.tika.parser.pdf.PDFParser; +import org.apache.tika.parser.Parser; import org.apache.tika.sax.BodyContentHandler; import org.hibernate.id.Configurable; import org.hibernate.id.IdentifierGenerator; @@ -959,17 +961,26 @@ FileUtils.deleteQuietly(uploadDir); } - public static String getPDFContents(File file) throws TikaException, IOException, SAXException { + public static String getTextFileContents(File file) throws TikaException, IOException, SAXException { BodyContentHandler handler = new BodyContentHandler(-1); - ParseContext pcontext = new ParseContext(); + ParseContext parseContext = new ParseContext(); + parseContext.set(Parser.class, new EmptyParser()); Metadata metadata = new Metadata(); - PDFParser pdfparser = new PDFParser(); + AutoDetectParser parser = new AutoDetectParser(); + String contents = null; try (InputStream inputStream = new FileInputStream(file)) { - pdfparser.parse(inputStream, handler, metadata, pcontext); + parser.parse(inputStream, handler, metadata, parseContext); + } catch (Exception e) { + contents = handler.toString().strip(); + if (StringUtils.isBlank(contents)) { + throw e; + } } - String contents = handler.toString().strip(); + if (contents == null) { + contents = handler.toString().strip(); + } if (log.isDebugEnabled()) { - log.debug("PDF contents:\n" + contents); + log.debug("File contents:\n" + contents); } return contents;