diff --git a/flint-core/src/main/java/uk/bl/dpt/qa/flint/Flint.java b/flint-core/src/main/java/uk/bl/dpt/qa/flint/Flint.java index 664b812..12817b0 100644 --- a/flint-core/src/main/java/uk/bl/dpt/qa/flint/Flint.java +++ b/flint-core/src/main/java/uk/bl/dpt/qa/flint/Flint.java @@ -38,7 +38,7 @@ import java.util.Map; import java.util.Set; -import static uk.bl.dpt.qa.flint.wrappers.TikaWrapper.getMimetype; +import uk.bl.dpt.qa.flint.wrappers.TikaWrapper; import static uk.bl.dpt.utils.util.FileUtil.traverse; /** @@ -168,7 +168,7 @@ public List check(File pFile) { boolean checked = false; - String mimetype = getMimetype(pFile); + String mimetype = new TikaWrapper().getMimetype(pFile); List results = new ArrayList(); diff --git a/flint-core/src/main/java/uk/bl/dpt/qa/flint/checks/TimedValidation.java b/flint-core/src/main/java/uk/bl/dpt/qa/flint/checks/TimedValidation.java index 20b89d0..105aafa 100644 --- a/flint-core/src/main/java/uk/bl/dpt/qa/flint/checks/TimedValidation.java +++ b/flint-core/src/main/java/uk/bl/dpt/qa/flint/checks/TimedValidation.java @@ -38,8 +38,6 @@ public class TimedValidation { private static Logger LOGGER = LoggerFactory.getLogger(TimedValidation.class); - protected static ExecutorService executor; - private TimedValidation(){} /** @@ -49,7 +47,7 @@ private TimedValidation(){} * @return output from the TimedTask */ public static LinkedHashMap validate(TimedTask task, File contentFile) { - executor = Executors.newSingleThreadExecutor(); + ExecutorService executor = Executors.newSingleThreadExecutor(); task.setContentFile(contentFile); LinkedHashMap cMap = new LinkedHashMap(); Future> future = executor.submit(task); diff --git a/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/PolicyValidation.java b/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/PolicyValidation.java index 2665784..5ddfc1b 100644 --- a/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/PolicyValidation.java +++ b/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/PolicyValidation.java @@ -38,7 +38,7 @@ public PolicyValidation(long timeout, Set patternFilter) { @Override public LinkedHashMap call() throws Exception { logger.info("Performing a policy validation on {}", contentFile); - StreamSource outputXml = EpubCheckWrapper.check(contentFile); + StreamSource outputXml = new EpubCheckWrapper().check(contentFile); return PolicyAware.policyValidationResult(outputXml, new StreamSource(EPUBFormat.getPolicyStatically()), patternFilter); } diff --git a/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/Wellformedness.java b/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/Wellformedness.java index 8c055c4..6df3838 100644 --- a/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/Wellformedness.java +++ b/flint-epub/src/main/java/uk/bl/dpt/qa/flint/epub/checks/Wellformedness.java @@ -39,9 +39,10 @@ public LinkedHashMap call() throws Exception { LinkedHashMap cmap = new LinkedHashMap(); if (patternFilter == null || patternFilter.contains(catName) ) { CheckCategory cc = new CheckCategory(catName); - if (CalibreWrapper.calibreIsAvailable()) { + CalibreWrapper calibreWrapper = new CalibreWrapper(); + if (calibreWrapper.calibreIsAvailable()) { try { - cc.add(new CheckCheck("isValidCalibre", CalibreWrapper.isValid(contentFile), null)); + cc.add(new CheckCheck("isValidCalibre", calibreWrapper.isValid(contentFile), null)); logger.debug(cc.get("isValidCalibre").toString()); } catch (Exception e) { logger.error(e.getMessage()); diff --git a/flint-fx/flint-fx-direct/src/main/java/uk/bl/dpt/qa/flint/Controller.java b/flint-fx/flint-fx-direct/src/main/java/uk/bl/dpt/qa/flint/Controller.java index 3061534..3282436 100644 --- a/flint-fx/flint-fx-direct/src/main/java/uk/bl/dpt/qa/flint/Controller.java +++ b/flint-fx/flint-fx-direct/src/main/java/uk/bl/dpt/qa/flint/Controller.java @@ -30,7 +30,7 @@ import java.util.Map; import java.util.Set; -import static uk.bl.dpt.qa.flint.wrappers.TikaWrapper.getMimetype; +import uk.bl.dpt.qa.flint.wrappers.TikaWrapper; /** * A controller for the flint-fx GUI that directly runs the flint classes. @@ -86,7 +86,7 @@ public void askForValidation() { protected Collection getAvailableFormats() { Collection formats = null; try { - formats = Flint.getAvailableFormats(getMimetype(inputFile)).keySet(); + formats = Flint.getAvailableFormats(new TikaWrapper().getMimetype(inputFile)).keySet(); } catch (IllegalAccessException | InstantiationException e) { logger.error(e.getMessage()); popupError(e); diff --git a/flint-mobi/pom.xml b/flint-mobi/pom.xml new file mode 100644 index 0000000..0257520 --- /dev/null +++ b/flint-mobi/pom.xml @@ -0,0 +1,28 @@ + + 4.0.0 + + + flint + uk.bl.dpt.qa + 0.9.0-SNAPSHOT + + + flint-mobi + + + + + ${project.groupId} + flint-core + ${project.version} + + + + + junit + junit + + + + + \ No newline at end of file diff --git a/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/formats/MobiBook.java b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/formats/MobiBook.java new file mode 100644 index 0000000..6991f8c --- /dev/null +++ b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/formats/MobiBook.java @@ -0,0 +1,499 @@ +package uk.bl.dpt.qa.flint.formats; + + +import java.io.ByteArrayOutputStream; +import java.io.DataInputStream; +import java.io.EOFException; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.nio.charset.Charset; +import java.util.ArrayList; +import java.util.Iterator; +import java.util.List; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + + +/** + * A simple parser for MobiBook files to allow the extraction of metadata. + * + * Based on http://wiki.mobileread.com/wiki/MOBI. + */ +public class MobiBook { + + private static Logger log = LoggerFactory.getLogger(MobiBook.class); + + private boolean valid = true; + + /** the Palm Database header */ + private PalmDatabaseHeader header = null; + + /** the list of records in the database */ + private List records = new ArrayList(); + + /** + * Constructor. + * @param file the mobi book file + * @throws FileNotFoundException if the file was not found + */ + public MobiBook(File file) throws FileNotFoundException { + this(new FileInputStream(file)); + } + + /** + * Constructor. + * @param in the mobi book file as a stream + */ + public MobiBook(InputStream in) { + DataInputStream is = null; + try { + is = new DataInputStream(in); + + // read the database header + header = new PalmDatabaseHeader(is); + if (!isMobiFormat() || header.getNumberOfRecords() < 2) { + valid = false; + return; + } + + // read the directory + for (int i = 0; i < header.getNumberOfRecords(); i++) { + records.add(createRecord(is, i)); + } + + Iterator it = records.iterator(); + Record currentEntry = it.next(); + Record nextEntry = it.hasNext() ? it.next() : null; + + // seek to the first record + int pos = 78 + (8 * header.getNumberOfRecords()); + while (pos++ < currentEntry.getRecordDataOffset()) { + is.readByte(); + } + + // read all the records + try { + while (true) { + byte b = is.readByte(); + pos++; + + if (nextEntry != null && pos == nextEntry.getRecordDataOffset()) { + currentEntry.close(); + currentEntry = nextEntry; + nextEntry = it.hasNext() ? it.next() : null; + } + + currentEntry.append(b); + } + } catch (EOFException e) { + currentEntry.close(); + } + + + } catch (FileNotFoundException e) { + valid = false; + e.printStackTrace(); + log.info("Unable to open file", e); + } catch (IOException e) { + valid = false; + e.printStackTrace(); + log.info("Unable to read file", e); + } finally { + try { + if (is != null) + is.close(); + } catch (IOException e) { + log.info("Unable to close input stream", e); + } + } + } + + /** + * True if the database could be read and it contains a mobi book. + * @return + */ + public boolean isValid() { + return valid; + } + + /** + * True if this database is a mobi book. + */ + public boolean isMobiFormat() { + return header != null && header.getType().equalsIgnoreCase("BOOK") && header.getCreator().equalsIgnoreCase("MOBI"); + } + + public PalmDatabaseHeader getPalmDatabaseHeader() { + return header; + } + + /** + * Get the Mobi header from position 0 in the database. + */ + public MobiHeader getMobiHeader() { + if (!records.isEmpty()) { + Record record = records.get(0); + + return record instanceof MobiHeader ? (MobiHeader) record : null; + } + + return null; + } + + /** + * Create a new Palm Database record. + * @param is the Palm Database contents as a stream + * @param position the position of the record in the database + * @return either a specific type of record (ie: MobiHeader) or a generic record. + * @throws IOException if an error occurs reading the database + */ + private Record createRecord(DataInputStream is, int position) throws IOException { + if (position == 0) { + return new MobiHeader(is, position); + } else { + return new Record(is, position); + } + } + + /** + * The Palm Database header. + * See http://wiki.mobileread.com/wiki/PDB#Palm_Database_Format + */ + public class PalmDatabaseHeader { + + private ByteBuffer palmDocHeader = null; + + /** + * Constructor. + * @param is the input file as a stream + * @throws IOException if an error occurred reading the 78 bytes of the header + */ + public PalmDatabaseHeader(DataInputStream is) throws IOException { + byte[] buf = new byte[78]; + is.readFully(buf); + + palmDocHeader = ByteBuffer.wrap(buf); + palmDocHeader.order(ByteOrder.BIG_ENDIAN); + palmDocHeader.mark(); + } + + /** + * The type of database. + */ + public String getType() { + byte[] buf = new byte[4]; + + palmDocHeader.reset().position(60); + palmDocHeader.get(buf).reset(); + + return new String(buf, Charset.forName("ASCII")); + } + + /** + * The program that uses this database + */ + public String getCreator() { + byte[] buf = new byte[4]; + + palmDocHeader.reset().position(64); + palmDocHeader.get(buf).reset(); + + return new String(buf, Charset.forName("ASCII")); + } + + /** + * The number of records in the database + */ + public short getNumberOfRecords() { + return palmDocHeader.getShort(76); + } + } + + public class MobiHeader extends Record { + + private Charset textEncoding = Charset.forName("UTF-8"); + + private int headerLength = 24; + + /** + * Constructor. + * @param is the palm database file as a stream + * @param position the position of the record in the database + * @throws IOException if the header could not be read + */ + public MobiHeader(DataInputStream is, int position) throws IOException { + super(is, position); + } + + /** + * True if the encryption type is set or the database has a DRM offset set. + */ + public boolean hasDRM() { + return getEncryptionType() > 0 || getDRMOffset() < 0xFFFFFFFF; + } + + /** + * Compression: 1 == no compression, 2 = PalmDOC compression, 17480 = HUFF/CDIC compression + */ + public short getCompression() { + return record.getShort(0); + } + + /** + * Uncompressed length of the entire text of the book. + */ + public int getTextLength() { + return record.getInt(4); + } + + /** + * Number of PDB records used for the text of the book. + */ + public short getRecordCount() { + return record.getShort(8); + } + + /** + * Maximum size of each record containing text, always 4096 + */ + public short getRecordSize() { + return record.getShort(10); + } + + /** + * Encryption Type: 0 == no encryption, 1 = Old Mobipocket Encryption, 2 = Mobipocket Encryption + */ + public short getEncryptionType() { + return record.getShort(12); + } + + /** + * The characters M O B I + */ + public String getIdentifier() { + return getString(16, 4); + } + + /** + * Length of the mobi header. + */ + public int getHeaderLength() { + return record.getInt(20); + } + + /** + * The kind of Mobipocket file this is: 2 Mobipocket Book, 3 PalmDoc Book, 4 Audio, 232 mobipocket? generated by kindlegen1.2, + * 248 KF8: generated by kindlegen2, 257 News, 258 News_Feed, 259 News_Magazine, 513 PICS, 514 WORD, 515 XLS, 516 PPT, 517 TEXT, 518 HTML + */ + public int getMobiType() { + return getInt(24); + } + + /** + * 1252 = CP1252 (WinLatin1); 65001 = UTF-8 + */ + public int getTextEncoding() { + return getInt(28); + } + + /** + * Some kind of unique ID number (random?) + */ + public int getUniqueID() { + return getInt(32); + } + + /** + * Version of the Mobipocket format used in this file. + */ + public int getFileVersion() { + return getInt(36); + } + + /** + * Offset in record 0 (not from start of file) of the full name of the book + */ + public int getFullNameOffset() { + return getInt(84); + } + + /** + * Length in bytes of the full name of the book + */ + public int getFullNameLength() { + return getInt(88); + } + + /** + * The full name of the book + */ + public String getFullName() { + return getString(getFullNameOffset(), getFullNameLength()); + } + + /** + * Offset to DRM key info in DRMed files. 0xFFFFFFFF if no DRM + */ + public int getDRMOffset() { + return getInt(168); + } + + /** + * Number of entries in DRM info. 0xFFFFFFFF if no DRM + */ + public int getDRMCount() { + return getInt(172); + } + + /** + * Number of bytes in DRM info. + */ + public int getDRMSize() { + return getInt(176); + } + + /** + * Some flags concerning the DRM info. + */ + public int getDRMFlags() { + return getInt(180); + } + + + /** + * Close the record and verify that it is a mobi header. + */ + @Override + void close() throws IOException { + super.close(); + + String identifier = getIdentifier(); + if (!identifier.equalsIgnoreCase("MOBI")) + throw new IOException("Not a mobi header"); + + headerLength = record.capacity(); + + int encoding = getTextEncoding(); + if (encoding == 1252) { + textEncoding = Charset.forName("ASCII"); + } + } + + /** + * Read a short from the record + * @param pos the position of the short in the record + */ + private short getShort(int pos) { + if (headerLength < pos + 2) throw new ArrayIndexOutOfBoundsException(pos); + return record.getShort(pos); + } + + /** + * Read an int from the record + * @param pos the position of the int in the record + */ + private int getInt(int pos) { + if (headerLength < pos + 4) throw new ArrayIndexOutOfBoundsException(pos); + return record.getInt(pos); + } + + /** + * Read a string from the record + * @param pos the start position of the string in the record + * @param size the length of the string + * @return the string parsed using the text encoding. + */ + private String getString(int pos, int size) { + if (headerLength < pos + size) throw new ArrayIndexOutOfBoundsException(pos); + + byte[] buf = new byte[size]; + + record.reset().position(pos); + record.get(buf).reset(); + + return new String(buf, textEncoding); + } + } + + /** + * A container for a generic Palm Database Record + */ + public class Record { + + /** the contents of the record */ + protected ByteBuffer record = null; + + /** the offset of the record data in the database */ + private int recordDataOffset; + + /** the position of the record in the database */ + private int position; + + /** + * Constructor. + * @param is the palm database file as a stream + * @param position the position of the record in the database + * @throws IOException if an error occurs while reading the record + */ + public Record(DataInputStream is, int position) throws IOException { + byte[] buf = new byte[8]; + is.readFully(buf); + + ByteBuffer palmDirectoryEntry = ByteBuffer.wrap(buf); + palmDirectoryEntry.order(ByteOrder.BIG_ENDIAN); + palmDirectoryEntry.mark(); + + this.position = position; + recordDataOffset = palmDirectoryEntry.getInt(0); + } + + /** + * The offset of the record data in the database + */ + public int getRecordDataOffset() { + return recordDataOffset; + } + + /** + * The position of this record in the database + */ + public int getPosition() { + return position; + } + + /** + * The raw data of the record + */ + public byte[] getData() { + return record.array(); + } + + private ByteArrayOutputStream baos = new ByteArrayOutputStream(); + + /** + * Append a byte to this record + */ + void append(byte b) { + baos.write(b); + } + + /** + * Close + */ + void close() throws IOException { + baos.close(); + + record = ByteBuffer.wrap(baos.toByteArray()); + record.order(ByteOrder.BIG_ENDIAN); + record.mark(); + } + + } + +} diff --git a/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/formats/MobiFormat.java b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/formats/MobiFormat.java new file mode 100644 index 0000000..8898c9c --- /dev/null +++ b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/formats/MobiFormat.java @@ -0,0 +1,118 @@ +package uk.bl.dpt.qa.flint.formats; + + +import java.io.File; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeMap; +import java.util.TreeSet; + +import uk.bl.dpt.qa.flint.checks.CheckResult; +import uk.bl.dpt.qa.flint.checks.TimedValidation; +import uk.bl.dpt.qa.flint.mobi.checks.FixedCategories; +import uk.bl.dpt.qa.flint.mobi.checks.SpecificDrmChecks; + + +public class MobiFormat extends PolicyAware implements Format { + + private final static String SCH_POLICY = "/mobicheck-policy-validation/minimal.sch"; + + // when does a wrapper's task timeout [seconds] + private final static long WRAPPER_TIMEOUT = 10 * 60; + + @Override + @SuppressWarnings("serial") + public Map>> getFixedCategories() { + final Set noDRM = new TreeSet() {{ + add("checkForEncryption"); + }}; + final Map> noDRMMap = new TreeMap>() {{ + put(FixedCategories.NO_DRM_ENCRYPTION.toString(), noDRM); + }}; + final Set wellFormed = new TreeSet() {{ + add("isValid"); + }}; + final Map> wellFormedMap = new TreeMap>() {{ + put(FixedCategories.WELL_FORMED.toString(), wellFormed); + }}; + return new TreeMap>>() {{ + put(FixedCategories.NO_DRM_ENCRYPTION.toString(), noDRMMap); + put(FixedCategories.WELL_FORMED.toString(), wellFormedMap); + }}; + } + + @Override + public Collection getAllCategoryNames() throws Exception { + Collection cats = new ArrayList(); + + cats.addAll(getFixedCategories().keySet()); + // add a potential policy validation error to all category names + cats.add(FixedCategories.POLICY_VALIDATION.name()); + // cats.addAll(requestPolicyPatternNames(new StreamSource(this.getPolicy()))); + return cats; + } + + @Override + public CheckResult validationResult(File contentFile) { + CheckResult checkResult; + try { + checkResult = new CheckResult(contentFile.getName(), this.getFormatName(), this.getVersion(), getAllCategoryNames()); + } catch (Exception e) { + throw new RuntimeException("could not initialise check-result! reason: {}", e); + } + + Long startTime = System.currentTimeMillis(); + + checkResult.addAll(TimedValidation.validate(new SpecificDrmChecks(WRAPPER_TIMEOUT, patternFilter), contentFile)); + + checkResult.setTime(System.currentTimeMillis() - startTime); + logger.info("all checks done for {}", this.getFormatName()); + + return checkResult; + } + + @Override + public boolean canCheck(File pFile, String pMimetype) { + return (canCheck(pMimetype) || + //simple check + pFile.getName().toLowerCase().endsWith(".mobi") + || pFile.getName().toLowerCase().endsWith(".azw") + || pFile.getName().toLowerCase().endsWith(".azw3") + || pFile.getName().toLowerCase().endsWith(".prc") + ); + } + + @Override + public boolean canCheck(String pMimetype) { + return acceptedMimeTypes().contains(pMimetype); + } + + @Override + @SuppressWarnings("serial") + public Collection acceptedMimeTypes() { + return new HashSet() {{ + add("application/x-mobipocket-ebook"); + add("application/vnd.amazon.ebook"); + }}; + } + + @Override + public String getFormatName() { + return "MOBI"; + } + + @Override + public String getVersion() { + return "0.1.0"; + } + + @Override + public InputStream getPolicy() { + return getClass().getResourceAsStream(SCH_POLICY); + } + +} diff --git a/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/mobi/checks/FixedCategories.java b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/mobi/checks/FixedCategories.java new file mode 100644 index 0000000..2abbdbe --- /dev/null +++ b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/mobi/checks/FixedCategories.java @@ -0,0 +1,27 @@ +package uk.bl.dpt.qa.flint.mobi.checks; + + +/** + * There are two types of Check Categories: + * (1) dynamically defined ones from within a (possibly changing) schematron + * policy (aka schematron patterns) + * (2) additional static ones that require specific check methods and are added + * 'manually' to the dynamic list; these ones are the FixedCategories. + */ +public enum FixedCategories { + + WELL_FORMED("Well formed"), + NO_DRM_ENCRYPTION("DRM check"), + POLICY_VALIDATION("Overall error indicator for policy validation"); + + private final String cat; + + FixedCategories(String cat) { + this.cat = cat; + } + + public String toString() { + return cat; + } + +} diff --git a/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/mobi/checks/SpecificDrmChecks.java b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/mobi/checks/SpecificDrmChecks.java new file mode 100644 index 0000000..e9ad76e --- /dev/null +++ b/flint-mobi/src/main/java/uk/bl/dpt/qa/flint/mobi/checks/SpecificDrmChecks.java @@ -0,0 +1,70 @@ +package uk.bl.dpt.qa.flint.mobi.checks; + + +import java.io.File; +import java.util.LinkedHashMap; +import java.util.Set; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import uk.bl.dpt.qa.flint.checks.CheckCategory; +import uk.bl.dpt.qa.flint.checks.CheckCheck; +import uk.bl.dpt.qa.flint.checks.TimedTask; +import uk.bl.dpt.qa.flint.formats.MobiBook; +import uk.bl.dpt.qa.flint.mobi.checks.FixedCategories; + + +/** + * Wrapper around additional specific DRM checks that produces an error message + * in case of a timing out after MobiFormat#Wrapper_TIMEOUT seconds. + */ +public class SpecificDrmChecks extends TimedTask { + + public static final String CHECK_FOR_ENCRYPTION = "checkForEncryption"; + + private Logger logger; + private Set patternFilter; + + + /** + * Create a SpecificDrmChecks object that times out if calls take longer than expected + * + * @param timeout timeout to use + * @param patternFilter a set of strings indicating which categories to use and not + */ + public SpecificDrmChecks(long timeout, Set patternFilter) { + super(FixedCategories.NO_DRM_ENCRYPTION.toString(), timeout); + + this.patternFilter = patternFilter; + this.logger = LoggerFactory.getLogger(this.getClass()); + } + + @Override + public LinkedHashMap call() throws Exception { + String catName = FixedCategories.NO_DRM_ENCRYPTION.toString(); + LinkedHashMap cmap = new LinkedHashMap(); + + if (patternFilter == null || patternFilter.contains(catName)) { + CheckCategory cc = new CheckCategory(catName); + cc.add(new CheckCheck(CHECK_FOR_ENCRYPTION, !checkForEncryption(contentFile), null)); + cmap.put(cc.getName(), cc); + + logger.debug(cc.get(CHECK_FOR_ENCRYPTION).toString()); + } + + return cmap; + } + + private boolean checkForEncryption(File contentFile) throws Exception { + boolean result = true; + + MobiBook mobiBook = new MobiBook(contentFile); + if (!mobiBook.isValid()) throw new Exception("Not a mobibook formatted file"); + + result = mobiBook.getMobiHeader().hasDRM(); + + return result; + } + +} diff --git a/flint-mobi/src/test/java/uk/bl/dpt/qa/flint/FlintMobiTest.java b/flint-mobi/src/test/java/uk/bl/dpt/qa/flint/FlintMobiTest.java new file mode 100644 index 0000000..5767367 --- /dev/null +++ b/flint-mobi/src/test/java/uk/bl/dpt/qa/flint/FlintMobiTest.java @@ -0,0 +1,38 @@ +package uk.bl.dpt.qa.flint; + + +import java.io.File; + +import static org.junit.Assert.*; +import org.junit.Before; +import org.junit.Test; + +import uk.bl.dpt.qa.flint.checks.CheckResult; + + +public class FlintMobiTest { + + private Flint flint; + + private static String DRM_MOBI_CHECK = "DRM check"; + + @Before + public void setUp() throws Exception { + flint = new Flint(); + } + + @Test + public final void testMobiNoDRM() { + File toTest = new File(FlintMobiTest.class.getResource("/mobisamples/lorem-ipsum.mobi").getPath()); + CheckResult result = flint.check(toTest).get(0); + assertTrue("DRM should not be found", result.get(DRM_MOBI_CHECK).isHappy()); + assertTrue("DRM should not be found", result.get(DRM_MOBI_CHECK).get("checkForEncryption").isHappy()); + + toTest = new File(FlintMobiTest.class.getResource("/mobisamples/lorem-ipsum.azw3").getPath()); + result = flint.check(toTest).get(0); + assertTrue("DRM should not be found", result.get(DRM_MOBI_CHECK).isHappy()); + assertTrue("DRM should not be found", result.get(DRM_MOBI_CHECK).get("checkForEncryption").isHappy()); + + } + +} diff --git a/flint-mobi/src/test/java/uk/bl/dpt/qa/flint/formats/MobiBookTest.java b/flint-mobi/src/test/java/uk/bl/dpt/qa/flint/formats/MobiBookTest.java new file mode 100644 index 0000000..b00d459 --- /dev/null +++ b/flint-mobi/src/test/java/uk/bl/dpt/qa/flint/formats/MobiBookTest.java @@ -0,0 +1,77 @@ +package uk.bl.dpt.qa.flint.formats; + + +import static org.junit.Assert.*; + +import org.junit.Ignore; +import org.junit.Test; + +import uk.bl.dpt.qa.flint.formats.MobiBook.MobiHeader; +import uk.bl.dpt.qa.flint.formats.MobiBook.PalmDatabaseHeader; + + +public class MobiBookTest { + + private MobiBook mobiBook = new MobiBook(MobiBookTest.class.getResourceAsStream("/mobisamples/lorem-ipsum.mobi")); + + @Test + public void testMobiBook() { + assertTrue("Should be a valid Palm Database File", mobiBook.isValid()); + assertTrue("Should be a mobi book", mobiBook.isMobiFormat()); + } + + @Test + public void testPalmDocHeader() { + PalmDatabaseHeader header = mobiBook.getPalmDatabaseHeader(); + + assertEquals("BOOK", header.getType()); + assertEquals("MOBI", header.getCreator()); + assertEquals(7, header.getNumberOfRecords()); + } + + @Test + public void testMobiHeader() { + MobiHeader header = mobiBook.getMobiHeader(); + + assertFalse(header.hasDRM()); + assertEquals(2, header.getCompression()); + assertEquals(0, header.getEncryptionType()); + assertEquals(2, header.getMobiType()); + assertEquals(0xFFFFFFFF, header.getDRMOffset()); + assertEquals(0, header.getDRMCount()); + assertEquals(" ", header.getFullName()); + } + + @Test + public void testNoDRMMobiHeader() { + MobiBook mobiBook = new MobiBook(MobiBookTest.class.getResourceAsStream("/mobisamples/lorem-ipsum.azw3")); + MobiHeader header = mobiBook.getMobiHeader(); + + assertFalse(header.hasDRM()); + assertEquals(2, header.getCompression()); + assertEquals(0, header.getEncryptionType()); + assertEquals(2, header.getMobiType()); + assertEquals(0xFFFFFFFF, header.getDRMOffset()); + assertEquals(0, header.getDRMCount()); + assertEquals("Unknown", header.getFullName()); + } + + /* + * test ignored because we don't have permission to distribute the test file. + */ + @Test @Ignore + public void testDRMMobiHeader() { + MobiBook mobiBook = new MobiBook(MobiBookTest.class.getResourceAsStream("/mobisamples/B00968FPDE_EBOK.prc")); + MobiHeader header = mobiBook.getMobiHeader(); + + assertTrue(header.hasDRM()); + assertEquals(17480, header.getCompression()); + assertEquals(2, header.getEncryptionType()); + assertEquals(2, header.getMobiType()); + assertEquals(1320, header.getDRMOffset()); + assertEquals(1, header.getDRMCount()); + assertEquals("The Definitive Guide to Customer Relationship Management (Collection)", header.getFullName()); + } + + +} diff --git a/flint-mobi/src/test/resources/mobisamples/lorem-ipsum.azw3 b/flint-mobi/src/test/resources/mobisamples/lorem-ipsum.azw3 new file mode 100644 index 0000000..e6869f2 Binary files /dev/null and b/flint-mobi/src/test/resources/mobisamples/lorem-ipsum.azw3 differ diff --git a/flint-mobi/src/test/resources/mobisamples/lorem-ipsum.mobi b/flint-mobi/src/test/resources/mobisamples/lorem-ipsum.mobi new file mode 100644 index 0000000..2ded667 Binary files /dev/null and b/flint-mobi/src/test/resources/mobisamples/lorem-ipsum.mobi differ diff --git a/flint-pdf/pom.xml b/flint-pdf/pom.xml index 0f2c2cb..99391f6 100644 --- a/flint-pdf/pom.xml +++ b/flint-pdf/pom.xml @@ -29,7 +29,7 @@ net.sf.saxon Saxon-HE - 9.5.1-5 + 9.5.1-8 diff --git a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/PolicyValidation.java b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/PolicyValidation.java index 572d97d..4963091 100644 --- a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/PolicyValidation.java +++ b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/PolicyValidation.java @@ -57,7 +57,7 @@ public PolicyValidation(long timeout, Set patternFilter) { @Override public LinkedHashMap call() throws Exception { logger.info("Performing a policy validation on {}", contentFile); - ByteArrayOutputStream outputXml = PDFBoxWrapper.preflightToXml(contentFile); + ByteArrayOutputStream outputXml = new PDFBoxWrapper().preflightToXml(contentFile); return PolicyAware.policyValidationResult(new StreamSource(new ByteArrayInputStream(outputXml.toByteArray())), new StreamSource(PDFFormat.getPolicyStatically()), patternFilter); } diff --git a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/SpecificDrmChecks.java b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/SpecificDrmChecks.java index 5123dcc..e6d428a 100644 --- a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/SpecificDrmChecks.java +++ b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/SpecificDrmChecks.java @@ -57,15 +57,17 @@ public SpecificDrmChecks(long pTimeout, Set pPatternFilter) { public LinkedHashMap call() throws Exception { LinkedHashMap cmap = new LinkedHashMap(); if (patternFilter == null || patternFilter.contains(FixedCategories.NO_DRM.toString()) ) { + PDFBoxWrapper pdfBoxWrapper = new PDFBoxWrapper(); + logger.info("Adding specific DRM checks for {} to check-result", contentFile); CheckCategory cc = new CheckCategory(FixedCategories.NO_DRM.toString()); - cc.add(new CheckCheck("checkDRMPDFBoxAbsolute", !PDFBoxWrapper.hasDRM(contentFile), null)); + cc.add(new CheckCheck("checkDRMPDFBoxAbsolute", !pdfBoxWrapper.hasDRM(contentFile), null)); logger.debug(cc.get("checkDRMPDFBoxAbsolute").toString()); - cc.add(new CheckCheck("checkDRMPDFBoxGranular", !PDFBoxWrapper.hasDRMGranular(contentFile), null)); + cc.add(new CheckCheck("checkDRMPDFBoxGranular", !pdfBoxWrapper.hasDRMGranular(contentFile), null)); logger.debug(cc.get("checkDRMPDFBoxGranular").toString()); cc.add(new CheckCheck("checkDRMNaiive", !checkDRMNaiive(contentFile), null)); logger.debug(cc.get("checkDRMNaiive").toString()); - cc.add(new CheckCheck("checkDRM_iText", !iTextWrapper.hasDRM(contentFile), null)); + cc.add(new CheckCheck("checkDRM_iText", !new iTextWrapper().hasDRM(contentFile), null)); logger.debug(cc.get("checkDRM_iText").toString()); cmap.put(cc.getName(), cc); } diff --git a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/Wellformedness.java b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/Wellformedness.java index 33e8d78..ec81c37 100644 --- a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/Wellformedness.java +++ b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/checks/Wellformedness.java @@ -62,15 +62,16 @@ public LinkedHashMap call() throws Exception { logger.info("Adding additional well-formedness checks for {}", contentFile); CheckCategory cc = new CheckCategory(catName); - cc.add(new CheckCheck("isValidPDFBox", PDFBoxWrapper.isValid(contentFile), null)); + cc.add(new CheckCheck("isValidPDFBox", new PDFBoxWrapper().isValid(contentFile), null)); logger.debug(cc.get("isValidPDFBox").toString()); - cc.add(new CheckCheck("isValid_iText", iTextWrapper.isValid(contentFile), null)); + cc.add(new CheckCheck("isValid_iText", new iTextWrapper().isValid(contentFile), null)); logger.debug(cc.get("isValid_iText").toString()); - if (CalibreWrapper.calibreIsAvailable()) { + CalibreWrapper calibreWrapper = new CalibreWrapper(); + if (calibreWrapper.calibreIsAvailable()) { try { - cc.add(new CheckCheck("isValid_Calibre", CalibreWrapper.isValid(contentFile), null)); + cc.add(new CheckCheck("isValid_Calibre", calibreWrapper.isValid(contentFile), null)); } catch (CalibreWrapper.CalibreMissingException e) { // this shouldn't happen due to availability check e.printStackTrace(); @@ -80,7 +81,7 @@ public LinkedHashMap call() throws Exception { // Jhove is passing files that should not pass // therefore only add a result if it is negative - boolean jhove = Jhove1Wrapper.isValid(contentFile); + boolean jhove = new Jhove1Wrapper().isValid(contentFile); if (!jhove) { cc.add(new CheckCheck("isValidJhove1", false, null)); logger.debug(cc.get("isValidJhove1").toString()); diff --git a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/converter/PDFToText.java b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/converter/PDFToText.java index 50026c4..e8bf99e 100644 --- a/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/converter/PDFToText.java +++ b/flint-pdf/src/main/java/uk/bl/dpt/qa/flint/pdf/converter/PDFToText.java @@ -40,13 +40,13 @@ public static boolean process(File pOriginal, File pText) { boolean ret = false; if(!pOriginal.exists()) return false; //test to see if the file is actually a pdf - if(TikaWrapper.getMimetype(pOriginal).toLowerCase().contains("pdf")) { + if(new TikaWrapper().getMimetype(pOriginal).toLowerCase().contains("pdf")) { //extract text - ret = PDFBoxWrapper.extractTextFromPDF(pOriginal, pText, true); + ret = new PDFBoxWrapper().extractTextFromPDF(pOriginal, pText, true); if(!ret) { //try and extract text using iText as PDFBox encountered an error - ret = iTextWrapper.extractTextFromPDF(pOriginal, pText, true); + ret = new iTextWrapper().extractTextFromPDF(pOriginal, pText, true); } } diff --git a/flint-register/pom.xml b/flint-register/pom.xml index 3da069d..f646716 100644 --- a/flint-register/pom.xml +++ b/flint-register/pom.xml @@ -25,6 +25,11 @@ flint-epub ${project.version} + + ${project.groupId} + flint-mobi + ${project.version} + \ No newline at end of file diff --git a/flint-toolwrappers/pom.xml b/flint-toolwrappers/pom.xml index 7d01080..e186870 100644 --- a/flint-toolwrappers/pom.xml +++ b/flint-toolwrappers/pom.xml @@ -62,6 +62,10 @@ org.tukaani xz + + com.google.guava + guava + \ No newline at end of file diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/CalibreWrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/CalibreWrapper.java index b322cd3..e625633 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/CalibreWrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/CalibreWrapper.java @@ -38,7 +38,7 @@ public class CalibreWrapper { static Logger LOGGER = LoggerFactory.getLogger(CalibreWrapper.class); private static String gVersion = null; - private static List CALIBRE_CONVERT = null; + private List CALIBRE_CONVERT = null; /** * Exception for when Calibre is missing @@ -58,14 +58,13 @@ public CalibreMissingException() { /** * Don't allow external instantiation, follow the Singleton pattern. */ - private CalibreWrapper() {} - - static { + public CalibreWrapper() { // TODO: make pathToCalibre a property or a command-line arg? @SuppressWarnings("serial") Map osMap = new HashMap() {{ put("windows", "c:/bin/calibre/calibre2/" + "ebook-convert.exe"); put("linux", "/usr/bin/" + "ebook-convert"); + put("mac os x", "/Applications/calibre.app/Contents/MacOS/" + "ebook-convert"); }}; String os = System.getProperty("os.name").toLowerCase(); @@ -93,15 +92,15 @@ private CalibreWrapper() {} * Checks whether Calibre is available * @return true if true, false if false :-) */ - public static boolean calibreIsAvailable() { - return CALIBRE_CONVERT != null; + public boolean calibreIsAvailable() { + return CALIBRE_CONVERT != null && "true".equalsIgnoreCase(System.getProperty("enable-calibre", "true")); } /** * Initialise the Calibre version number * @throws CalibreMissingException */ - private static void getVer() throws CalibreMissingException { + private void getVer() throws CalibreMissingException { if (CALIBRE_CONVERT == null && !calibreIsAvailable()) { throw new CalibreMissingException(); } @@ -125,7 +124,7 @@ private static void getVer() throws CalibreMissingException { * @return version string from Calibre * @throws CalibreMissingException in case Calibre is missing */ - public static String getVersion() throws CalibreMissingException { + public String getVersion() throws CalibreMissingException { if(null==gVersion) getVer(); return gVersion; } @@ -137,7 +136,7 @@ public static String getVersion() throws CalibreMissingException { * @return File for converted ebook (or null if error) * @throws CalibreMissingException in case Calibre is missing */ - public static File convertEbook(File pOriginal, String pType) throws CalibreMissingException { + public File convertEbook(File pOriginal, String pType) throws CalibreMissingException { if (CALIBRE_CONVERT == null && !calibreIsAvailable()) { throw new CalibreMissingException(); } @@ -169,7 +168,7 @@ public static File convertEbook(File pOriginal, String pType) throws CalibreMiss * @return true if valid (i.e. can be converted to text) * @throws CalibreMissingException in case Calibre is missing */ - public static boolean isValid(File pFile) throws CalibreMissingException { + public boolean isValid(File pFile) throws CalibreMissingException { if (CALIBRE_CONVERT == null && !calibreIsAvailable()) { throw new CalibreMissingException(); } @@ -178,8 +177,9 @@ public static boolean isValid(File pFile) throws CalibreMissingException { //we need to redirect stderr to stdout otherwise bad things happen if drm is detected and stderr is written to first ToolRunner runner = new ToolRunner(true); + File newEbook = null; try { - File newEbook = File.createTempFile(pFile.getName()+"-", ".txt"); + newEbook = File.createTempFile(pFile.getName()+"-", ".txt"); newEbook.deleteOnExit(); List commandLine = new ArrayList(); commandLine.addAll(CALIBRE_CONVERT); @@ -206,6 +206,8 @@ public static boolean isValid(File pFile) throws CalibreMissingException { return ret; } catch (Exception e) { LOGGER.error("Exception while trying to validate with Calibre: {}", e); + } finally { + if (newEbook != null) newEbook.delete(); } return ret; } @@ -218,7 +220,7 @@ public static boolean isValid(File pFile) throws CalibreMissingException { * @return true if converted ok, otherwise false * @throws CalibreMissingException in case Calibre is missing */ - public static boolean extractTextFromPDF(File pFile, File pOutput, boolean pOverwrite) throws CalibreMissingException { + public boolean extractTextFromPDF(File pFile, File pOutput, boolean pOverwrite) throws CalibreMissingException { if(pOutput.exists()&(!pOverwrite)) return false; //calibre uses the target file extension to decide how to convert the file //as we want text, only allow that extension diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/EpubCheckWrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/EpubCheckWrapper.java index 48bc4cb..f9994ca 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/EpubCheckWrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/EpubCheckWrapper.java @@ -36,9 +36,9 @@ public class EpubCheckWrapper { static Logger LOGGER = LoggerFactory.getLogger(EpubCheckWrapper.class); - private static Map miniCache = new HashMap(); + private Map miniCache = new HashMap(); - private EpubCheckWrapper() {} + public EpubCheckWrapper() {} /** * Check an epub file against a XmlReportWithMessageIds policy @@ -46,7 +46,7 @@ private EpubCheckWrapper() {} * @return StreamSource of output report * @throws IOException */ - public static StreamSource check(File file) throws IOException { + public StreamSource check(File file) throws IOException { Report report = miniCache.get(file.getAbsolutePath()); File reportFile = null; if (report == null) { diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/ExiftoolWrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/ExiftoolWrapper.java index 6655fe4..3b3d1a6 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/ExiftoolWrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/ExiftoolWrapper.java @@ -51,13 +51,13 @@ public class ExiftoolWrapper { */ final public static String EXIFTOOL_LINUX = "/usr/bin/exiftool";//path to installed location - private static String EXIFTOOL = null; + private String EXIFTOOL = null; - private ExiftoolWrapper() { + public ExiftoolWrapper() { // TODO Auto-generated constructor stub } - private static void setupExiftool() { + private void setupExiftool() { String os = System.getProperty("os.name").toLowerCase(); if(os.contains("windows")) { try { @@ -105,7 +105,7 @@ private static void setupExiftool() { * @param pFile * @return */ - private static File runExiftool(File pFile) { + private File runExiftool(File pFile) { List commandLine = Arrays.asList(EXIFTOOL, "-X", pFile.getAbsolutePath()); ToolRunner runner = new ToolRunner(); try { @@ -137,7 +137,7 @@ private static File runExiftool(File pFile) { * @param pFile file to check * @return whether or not the file has encryption */ - public static boolean hasDRM(File pFile) { + public boolean hasDRM(File pFile) { if(null==EXIFTOOL) setupExiftool(); if(null==EXIFTOOL) { //i.e. it's still null so we were unable to set up exiftool environment @@ -146,30 +146,35 @@ public static boolean hasDRM(File pFile) { boolean ret = false; - File output = runExiftool(pFile); - - /* - * NOTE: we can do more than just detect the presence of DRM with Exiftool (see outputs) - * Might want to add more granular approach? - */ - - Scanner scanner = null; + File output = null; try { - scanner = new Scanner(new FileInputStream(output)); - } catch (FileNotFoundException e) { - // TODO Auto-generated catch block - e.printStackTrace(); - } - - //just try and find the first occurrence of PDF:Encryption - //i.e. this is an ugly way around rdf namespaces and xpath - String found = scanner.findWithinHorizon("PDF:Encryption", 0); - scanner.close(); + output = runExiftool(pFile); - if(found!=null) { - //i.e. drm detected - ret = true; - //System.out.println("DRM detected with Exiftool"); + /* + * NOTE: we can do more than just detect the presence of DRM with Exiftool (see outputs) + * Might want to add more granular approach? + */ + + Scanner scanner = null; + try { + scanner = new Scanner(new FileInputStream(output)); + } catch (FileNotFoundException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + + //just try and find the first occurrence of PDF:Encryption + //i.e. this is an ugly way around rdf namespaces and xpath + String found = scanner.findWithinHorizon("PDF:Encryption", 0); + scanner.close(); + + if(found!=null) { + //i.e. drm detected + ret = true; + //System.out.println("DRM detected with Exiftool"); + } + } finally { + if (output != null) output.delete(); } return ret; diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/Jhove1Wrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/Jhove1Wrapper.java index 9ae77b7..109d7de 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/Jhove1Wrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/Jhove1Wrapper.java @@ -38,46 +38,48 @@ public class Jhove1Wrapper { private static Logger LOGGER = LoggerFactory.getLogger(Jhove1Wrapper.class); - private static JhoveBase jhove = null; - private static App app = null; - private static XmlHandler handler = null; - - static { + private JhoveBase jhove = null; + private App app = null; + private XmlHandler handler = null; + + public Jhove1Wrapper() { //CONFIG_FILE_PATH = Jhove1Wrapper.class.getResource("/jhove.conf").getPath(); //LOGGER.warn("JHOVE CONFIG EXISTS at {}?: {}", CONFIG_FILE_PATH, new File(CONFIG_FILE_PATH).exists()); //http://www.garymcgath.com/jhovenote.html - //and https://github.com/openplanets/planets-suite/blob/59d1517b5493815a0f59927d6c97ca5462d1ed8d/services/jhove/src/main/java/eu/planets_project/ifr/core/services/identification/jhove/impl/JhoveIdentification.java - try { - jhove = new JhoveBase(); - } catch (JhoveException e1) { - LOGGER.warn("Caught exception: {}", e1); - } - app = new App(JhoveBase._name, JhoveBase._release, JhoveBase.DATE, "", ""); - handler = new XmlHandler(); - //try { - // jhove.init(); - //} catch (JhoveException e) { - // LOGGER.warn("Caught exception: {}", e); - //} - jhove.setEncoding("utf-8"); - jhove.setTempDirectory(System.getProperty("java.io.tmpdir")); - jhove.setBufferSize(4096); - jhove.setChecksumFlag(false); - jhove.setShowRawFlag(false); - jhove.setSignatureFlag(false); - } - - private Jhove1Wrapper() {} + //and https://github.com/openplanets/planets-suite/blob/59d1517b5493815a0f59927d6c97ca5462d1ed8d/services/jhove/src/main/java/eu/planets_project/ifr/core/services/identification/jhove/impl/JhoveIdentification.java + try { + jhove = new JhoveBase(); + } catch (JhoveException e1) { + LOGGER.warn("Caught exception: {}", e1); + } + app = new App(JhoveBase._name, JhoveBase._release, JhoveBase.DATE, "", ""); + handler = new XmlHandler(); + //try { + // jhove.init(); + //} catch (JhoveException e) { + // LOGGER.warn("Caught exception: {}", e); + //} + jhove.setEncoding("utf-8"); + jhove.setTempDirectory(System.getProperty("java.io.tmpdir")); + jhove.setBufferSize(4096); + jhove.setChecksumFlag(false); + jhove.setShowRawFlag(false); + jhove.setSignatureFlag(false); + } /** * Queries Jhove to see whether a file is valid/well-formed or not * @param pFile file to check * @return true/false if Jhove thinks it's valid */ - public static boolean isValid(File pFile) { + public boolean isValid(File pFile) { boolean ret = false; + + File temp = null; try { - File temp = File.createTempFile("jhove-output-", ".xml"); + LOGGER.warn("jhove staring"); + temp = File.createTempFile("jhove-output-", ".xml"); + String[] inputs = new String[] { pFile.getAbsolutePath() }; jhove.dispatch(app, null, null, handler, temp.getAbsolutePath(), inputs); @@ -94,10 +96,10 @@ public static boolean isValid(File pFile) { //System.out.println(temp.getAbsolutePath()+": "+status+", "+status.length()); } LOGGER.warn("jhove thinks it is: {}", status.toLowerCase()); - temp.delete(); - } catch (Exception e) { LOGGER.warn("Caught exception: {}", e); + } finally { + if (temp != null) temp.delete(); } return ret; diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/PDFBoxWrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/PDFBoxWrapper.java index 529bed8..4d0db65 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/PDFBoxWrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/PDFBoxWrapper.java @@ -56,9 +56,9 @@ public class PDFBoxWrapper { private static Map pseudoCache = new HashMap(); - private static final XmlResultParser parser = new CachingXmlResultParser(); + private final XmlResultParser parser = new CachingXmlResultParser(); - private PDFBoxWrapper() {} + public PDFBoxWrapper() {} /** * As preflight is used more than once for different puroposes the result @@ -66,12 +66,14 @@ private PDFBoxWrapper() {} */ private static class CachingXmlResultParser extends XmlResultParser { public Element validate (Document rdocument, DataSource source) throws IOException { - if (pseudoCache.containsKey(source.getName())) { - // can be null, which means it's not valid - Element preflight = pseudoCache.get(source.getName()); - // we only cache ONCE and clear after. - pseudoCache.clear(); - return preflight; + synchronized (this.getClass()) { + if (pseudoCache.containsKey(source.getName())) { + // can be null, which means it's not valid + Element preflight = pseudoCache.get(source.getName()); + // we only cache ONCE and clear after. + pseudoCache.clear(); + return preflight; + } } // state that one has dealt with this file: pseudoCache.put(source.getName(), null); @@ -126,7 +128,7 @@ public Element validate (Document rdocument, DataSource source) throws IOExcepti * @throws IOException * @throws TransformerException */ - public static ByteArrayOutputStream preflightToXml(File pFile) throws IOException, TransformerException { + public ByteArrayOutputStream preflightToXml(File pFile) throws IOException, TransformerException { Element result = parser.validate(new FileDataSource(pFile)); LOGGER.debug("generating xml from preflight generated element for {}", pFile); Document doc = result.getOwnerDocument(); @@ -145,7 +147,7 @@ public static ByteArrayOutputStream preflightToXml(File pFile) throws IOExceptio * @param pFile file to check * @return true if valid, false if not */ - public static boolean isValid(File pFile) { + public boolean isValid(File pFile) { try { if (parser.validate(new FileDataSource(pFile)) == null) { return false; @@ -164,12 +166,14 @@ public static boolean isValid(File pFile) { // if preflight passes the file then try and extract the text from the file // this should be more robust at finding errors than load/save but it's // still not ideal - File pTemp; + File pTemp = null; try { pTemp = File.createTempFile("flint-temp-", ".pdfbox.txt"); pTemp.deleteOnExit(); } catch (IOException e) { return false; + } finally { + if (pTemp != null) pTemp.delete(); } return true; //return extractTextFromPDF(pFile, pTemp, true); @@ -180,9 +184,10 @@ public static boolean isValid(File pFile) { * @param pFile PDF file to load * @return whether the file loads and saves successfully or not */ - public static boolean loadSavePDF(File pFile) { + public boolean loadSavePDF(File pFile) { boolean ret = false; + File temp = null; try { // Note that this test passes files that fail to open in Acrobat @@ -191,7 +196,7 @@ public static boolean loadSavePDF(File pFile) { PDFParser parser = new PDFParser(new FileInputStream(pFile)); parser.parse(); - File temp = File.createTempFile("flint-temp-"+pFile.getName()+"-", ".pdf"); + temp = File.createTempFile("flint-temp-"+pFile.getName()+"-", ".pdf"); parser.getPDDocument().save(temp); parser.getDocument().close(); temp.deleteOnExit(); @@ -202,6 +207,8 @@ public static boolean loadSavePDF(File pFile) { // PDFBox state that these files have errors and their parser is correct // The only way to find out that the parser doesn't like it is to catch // a general Exception. + } finally { + if (temp != null) temp.delete(); } return ret; } @@ -211,13 +218,15 @@ public static boolean loadSavePDF(File pFile) { * @param pFile file to check * @return whether the file is had DRM or not */ - public static boolean hasDRM(File pFile) { + public boolean hasDRM(File pFile) { boolean ret = false; + + File tmp = null; try { System.setProperty("org.apache.pdfbox.baseParser.pushBackSize", "1024768"); // NOTE: we use loadNonSeq here as it is the latest parser // load() and parser.parse() have hung on test files - File tmp = File.createTempFile("flint-", ".tmp"); + tmp = File.createTempFile("flint-", ".tmp"); tmp.deleteOnExit(); RandomAccess scratchFile = new RandomAccessFile(tmp, "rw"); PDDocument doc = PDDocument.loadNonSeq(new FileInputStream(pFile), scratchFile); @@ -245,6 +254,8 @@ public static boolean hasDRM(File pFile) { // DRM or not. Return false and hope it is detected elsewhere. ret = false; + } finally { + if (tmp != null) tmp.delete(); } return ret; } @@ -255,15 +266,16 @@ public static boolean hasDRM(File pFile) { * @param pPDF pdf file to check * @return whether or not the file has DRM */ - public static boolean hasDRMGranular(File pPDF) { + public boolean hasDRMGranular(File pPDF) { boolean ret = false; + File tmp = null; try { System.setProperty("org.apache.pdfbox.baseParser.pushBackSize", "1024768"); // NOTE: we use loadNonSeq here as it is the latest parser // load() and parser.parse() have hung on test files - File tmp = File.createTempFile("flint-", ".tmp"); + tmp = File.createTempFile("flint-", ".tmp"); tmp.deleteOnExit(); RandomAccess scratchFile = new RandomAccessFile(tmp, "rw"); PDDocument doc = PDDocument.loadNonSeq(new FileInputStream(pPDF), scratchFile); @@ -309,6 +321,8 @@ public static boolean hasDRMGranular(File pPDF) { } catch (Exception e) { LOGGER.warn("Exception while doing granular DRM checks leads to invalidity: {}", e); + } finally { + if (tmp != null) tmp.delete(); } return ret; @@ -323,7 +337,7 @@ public static boolean hasDRMGranular(File pPDF) { * @param pOverwrite whether or not to overwrite an existing output file * @return true if converted ok, otherwise false */ - public static boolean extractTextFromPDF(File pFile, File pOutput, boolean pOverwrite) { + public boolean extractTextFromPDF(File pFile, File pOutput, boolean pOverwrite) { if(pOutput.exists()&(!pOverwrite)) return false; PDDocument doc = null; PrintWriter out = null; diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/TikaWrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/TikaWrapper.java index 2b541eb..34ece96 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/TikaWrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/TikaWrapper.java @@ -33,9 +33,9 @@ */ public class TikaWrapper { - private static final DefaultDetector detector = new DefaultDetector(); + private final DefaultDetector detector = new DefaultDetector(); - private TikaWrapper() { + public TikaWrapper() { // TODO Auto-generated constructor stub } @@ -44,7 +44,7 @@ private TikaWrapper() { * @param pFile file to check * @return the mimetype of the file */ - public static String getMimetype(File pFile) { + public String getMimetype(File pFile) { try { return getMimetype(TikaInputStream.get(pFile)); } catch (FileNotFoundException e) { @@ -63,7 +63,7 @@ public static String getMimetype(File pFile) { * @return mimetype * @throws IOException on error */ - public static String getMimetype(InputStream pInput) throws IOException { + public String getMimetype(InputStream pInput) throws IOException { String type = null; try { diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/XmlReportWithMessageIds.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/XmlReportWithMessageIds.java index a2e9ca3..51cebed 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/XmlReportWithMessageIds.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/XmlReportWithMessageIds.java @@ -29,9 +29,13 @@ * (based on Enums) as message content. */ public class XmlReportWithMessageIds extends XmlReportImpl { + + private File reportFile; public XmlReportWithMessageIds(File out, String ePubName, String versionEpubCheck) { super(out, ePubName, versionEpubCheck); + + this.reportFile = out; } @Override @@ -46,5 +50,9 @@ public MyMessage(MessageId messageId, Severity severity, String message, String Message myMessage = new MyMessage(message.getID(), message.getSeverity(), message.getMessage(), message.getSuggestion()); super.message(myMessage, location, arg); } + + public File getReportFile() { + return reportFile; + } } diff --git a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/iTextWrapper.java b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/iTextWrapper.java index f1cb9b1..457f388 100644 --- a/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/iTextWrapper.java +++ b/flint-toolwrappers/src/main/java/uk/bl/dpt/qa/flint/wrappers/iTextWrapper.java @@ -42,7 +42,7 @@ public class iTextWrapper { private static Logger LOGGER = LoggerFactory.getLogger(iTextWrapper.class); - private iTextWrapper() {} + public iTextWrapper() {} /** * Extracts text from a PDF. @@ -51,7 +51,7 @@ private iTextWrapper() {} * @param pOverwrite whether or not to overwrite an existing output file * @return true if converted ok, otherwise false */ - public static boolean extractTextFromPDF(File pFile, File pOutput, boolean pOverwrite) { + public boolean extractTextFromPDF(File pFile, File pOutput, boolean pOverwrite) { if(pOutput.exists()&(!pOverwrite)) return false; boolean ret = true; @@ -93,7 +93,7 @@ public static boolean extractTextFromPDF(File pFile, File pOutput, boolean pOver * @param pFile file to check * @return whether the file is valid or not */ - public static boolean isValid(File pFile) { + public boolean isValid(File pFile) { boolean ret = false; @@ -126,7 +126,7 @@ public static boolean isValid(File pFile) { * @param pFile file to check * @return whether the file is had DRM or not */ - public static boolean hasDRM(File pFile) { + public boolean hasDRM(File pFile) { boolean drm = false; diff --git a/pom.xml b/pom.xml index 2efe9b5..74d37fd 100644 --- a/pom.xml +++ b/pom.xml @@ -13,6 +13,7 @@ flint-pdf flint-epub + flint-mobi flint-toolwrappers @@ -70,7 +71,7 @@ 2.17 2.4 1.0.0 - 1.8.6 + 1.8.8 0.9.9-RC1 1.7.6 1.4 @@ -350,4 +351,15 @@ + + + nla-public-releases + scp://maven.nla.gov.au/releases + + + nla-public-snapshots + scp://maven.nla.gov.au/snapshots + + +