From 0b46abeec88a5113fa382fddfda4a55a262ac706 Mon Sep 17 00:00:00 2001 From: LieutenantPeacock Date: Tue, 2 Oct 2018 11:26:33 -0400 Subject: [PATCH] XMLSorter and associated classes. --- src/main/java/XMLSorter.java | 116 ++++++++++++++ .../com/ltpeacock/sorter/package-info.java | 5 + .../sorter/xml/ElementComparator.java | 58 +++++++ .../ltpeacock/sorter/xml/SortXmlEngine.java | 144 ++++++++++++++++++ .../java/com/ltpeacock/sorter/xml/Util.java | 51 +++++++ .../ltpeacock/sorter/xml/XmlPrettyPrint.java | 70 +++++++++ .../ltpeacock/sorter/xml/package-info.java | 5 + 7 files changed, 449 insertions(+) create mode 100644 src/main/java/XMLSorter.java create mode 100644 src/main/java/com/ltpeacock/sorter/package-info.java create mode 100644 src/main/java/com/ltpeacock/sorter/xml/ElementComparator.java create mode 100644 src/main/java/com/ltpeacock/sorter/xml/SortXmlEngine.java create mode 100644 src/main/java/com/ltpeacock/sorter/xml/Util.java create mode 100644 src/main/java/com/ltpeacock/sorter/xml/XmlPrettyPrint.java create mode 100644 src/main/java/com/ltpeacock/sorter/xml/package-info.java diff --git a/src/main/java/XMLSorter.java b/src/main/java/XMLSorter.java new file mode 100644 index 0000000..4ce4e24 --- /dev/null +++ b/src/main/java/XMLSorter.java @@ -0,0 +1,116 @@ +import static java.lang.String.format; +import static com.ltpeacock.sorter.xml.Util.logException; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.FileOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.logging.Level; +import java.util.logging.Logger; + +import com.ltpeacock.sorter.xml.SortXmlEngine; + +/** + * + * @author LieutenantPeacock + * + */ +public class XMLSorter { + private static final Logger LOG = Logger.getLogger(XMLSorter.class.getName()); + + public static void main(String[] args) { + switch (args.length) { + case 2: + fileInFileOut(args[0], args[1]); + break; + case 1: + if ("--help".equals(args[0])) { + help(); + } else { + fileInStdOut(args[0]); + } + break; + case 0: + stdInStdOut(); + break; + default: + System.err.println("Too many arguments."); + break; + } + } + + protected static void fileInFileOut(final String inFileName, final String outFileName) { + final File inFile = new File(inFileName); + final File outFile = new File(outFileName); + final long startMs = System.currentTimeMillis(); + boolean success = false; + try (InputStream is = new FileInputStream(inFile); + OutputStream os = new FileOutputStream(outFile)) { + System.out.println(format("Reading from '%s' ...", inFile)); + SortXmlEngine engine = new SortXmlEngine(); + engine.sort(is, os); + success = true; + } catch (FileNotFoundException e) { + logException(e); + } catch (IOException e) { + logException(e); + } + if (success) { + final long endMs = System.currentTimeMillis(); + final long tookMs = endMs - startMs; + System.out.println(format("Wrote to '%s' ..., took %s ms.", outFile, tookMs)); + } + } + + protected static void fileInStdOut(final String inFileName) { + final File inFile = new File(inFileName); + + final long startMs = System.currentTimeMillis(); + boolean success = false; + try (InputStream is = new FileInputStream(inFile); + OutputStream os = System.out) { + LOG.log(Level.FINE, format("Reading from '%s' ...", inFile)); + SortXmlEngine engine = new SortXmlEngine(); + engine.sort(is, os); + success = true; + } catch (FileNotFoundException e) { + logException(e); + } catch (IOException e) { + logException(e); + } + if (success) { + final long endMs = System.currentTimeMillis(); + final long tookMs = endMs - startMs; + LOG.log(Level.FINE, format("Wrote to 'stdout' ..., took %s ms.", tookMs)); + } + } + + protected static void stdInStdOut() { + + final long startMs = System.currentTimeMillis(); + boolean success = false; + try (InputStream is = System.in; + OutputStream os = System.out) { + LOG.log(Level.FINE, "Reading from 'stdin' ..."); + SortXmlEngine engine = new SortXmlEngine(); + engine.sort(is, os); + success = true; + } catch (FileNotFoundException e) { + logException(e); + } catch (IOException e) { + logException(e); + } + if (success) { + final long endMs = System.currentTimeMillis(); + final long tookMs = endMs - startMs; + LOG.log(Level.FINE, format("Wrote to 'stdout' ..., took %s ms.", tookMs)); + } + } + + protected static void help() { + System.out.println("Usage: [inputFile] [outputFile]"); + } +} diff --git a/src/main/java/com/ltpeacock/sorter/package-info.java b/src/main/java/com/ltpeacock/sorter/package-info.java new file mode 100644 index 0000000..877308a --- /dev/null +++ b/src/main/java/com/ltpeacock/sorter/package-info.java @@ -0,0 +1,5 @@ +/** + * Package containing subpackages for sorting various file types. + * @author LieutenantPeacock + */ +package com.ltpeacock.sorter; \ No newline at end of file diff --git a/src/main/java/com/ltpeacock/sorter/xml/ElementComparator.java b/src/main/java/com/ltpeacock/sorter/xml/ElementComparator.java new file mode 100644 index 0000000..a76dab0 --- /dev/null +++ b/src/main/java/com/ltpeacock/sorter/xml/ElementComparator.java @@ -0,0 +1,58 @@ +package com.ltpeacock.sorter.xml; + +import java.util.Comparator; + +import org.w3c.dom.Element; + +/** + * + * @author LieutenantPeacock + * + */ +public class ElementComparator implements Comparator { + + private static final String NAME = "name"; + + public ElementComparator() { + } + + @Override + public int compare(final Element arg0, final Element arg1) { + int c = Util.compare(safeToUpper( + reverseColumns(arg0.getNodeName())), + safeToUpper(reverseColumns(arg1.getNodeName()))); + if (0 == c) { + final String nameAttr0 = arg0.getAttribute(NAME); + final String nameAttr1 = arg1.getAttribute(NAME); + c = Util.compare(safeToUpper(nameAttr0), safeToUpper(nameAttr1)); + } + return c; + } + + protected String reverseColumns(final String input) { + final String result; + if (input == null) { + result = input; + } else { + final int idx = input.indexOf(':'); + if (idx > 0) { + final String s1 = input.substring(0, idx); + final String s2 = input.substring(idx + 1); + result = s2 + ':' + s1; + } else { + result = input; + } + } + return result; + } + + protected String safeToUpper(final String input) { + final String result; + if (input == null) { + result = null; + } else { + result = input.toUpperCase(); + } + return result; + } +} diff --git a/src/main/java/com/ltpeacock/sorter/xml/SortXmlEngine.java b/src/main/java/com/ltpeacock/sorter/xml/SortXmlEngine.java new file mode 100644 index 0000000..6fa9022 --- /dev/null +++ b/src/main/java/com/ltpeacock/sorter/xml/SortXmlEngine.java @@ -0,0 +1,144 @@ +package com.ltpeacock.sorter.xml; + +import static java.lang.String.format; +import static com.ltpeacock.sorter.xml.Util.logException; +import static com.ltpeacock.sorter.xml.Util.removeEmptyLines; + +import java.io.BufferedInputStream; +import java.io.BufferedOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.io.OutputStreamWriter; +import java.io.PrintWriter; +import java.util.ArrayList; +import java.util.Collections; +import java.util.List; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import javax.xml.parsers.DocumentBuilder; +import javax.xml.parsers.DocumentBuilderFactory; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; + +import org.w3c.dom.Document; +import org.w3c.dom.Element; +import org.w3c.dom.Node; +import org.w3c.dom.NodeList; +import org.xml.sax.SAXException; + +public class SortXmlEngine { + private static final Logger LOG = Logger.getLogger(SortXmlEngine.class.getName()); + + public void sort(final InputStream in, final OutputStream os) { + try (BufferedInputStream bis = new BufferedInputStream(in); + BufferedOutputStream bos = new BufferedOutputStream(os); + PrintWriter pw = new PrintWriter(bos)) { + final Document doc = readXml(in); + final Element rootDocElement = doc.getDocumentElement(); + sortElement(rootDocElement); + final String prettyXml = XmlPrettyPrint.prettyXml(doc); + final String prettyXml2 = XmlPrettyPrint.prettyFormat(prettyXml); + final String prettyXml4 = removeEmptyLines(prettyXml2); + final String prettyXml5 = prettyXml4.replaceAll("\">", "\" >").replaceAll("\"/>", "\" />"); + final String prettyXml6 = removeExtraWsdlpartClose(prettyXml5); + pw.print(prettyXml6); + } catch (IOException e) { + logException(e); + } + } + + static String removeExtraWsdlpartClose(final String inputStr) { + final String s1 = removeExtraClose(inputStr, "wsdl:part"); + final String s2 = removeExtraClose(s1, "wsdl:input"); + final String s3 = removeExtraClose(s2, "wsdl:output"); + final String s4 = removeExtraClose(s3, "xs:element"); + final String s5 = s4.replaceFirst("\\?>\r\n", "\">"); + return s6; + } + + static String removeExtraClose(final String inputStr, final String tag) { + final Pattern pattern = Pattern.compile("(<" + tag + " .+ )>\\r?\\n?[ \t]*", Pattern.MULTILINE); + final Matcher m = pattern.matcher(inputStr); + final String prettyXml3 = m.replaceAll("$1/>"); + + return prettyXml3; + } + + /** + * @param rootDocElement + */ + void sortElement(final Element rootDocElement) { + final NodeList nodeList = rootDocElement.getChildNodes(); + final int length = nodeList.getLength(); + final List elemList0 = new ArrayList<>(length); + for (int i = 0; i < length; i++) { + Node currentNode = nodeList.item(i); + + if (currentNode.getNodeType() == Node.ELEMENT_NODE) { + + final String nodeName = currentNode.getNodeName(); + LOG.fine(format("localName [%s]", nodeName)); + elemList0.add((Element) currentNode); + if (currentNode.getChildNodes() != null && currentNode.getChildNodes().getLength() > 0) { + sortElement((Element) currentNode); + } + } else { + LOG.fine(format("Node type [%s]", currentNode.getNodeType())); + } + } + Collections.sort(elemList0, new ElementComparator()); + int count = 0; + for (Node elem : elemList0) { + count++; + LOG.fine(format("removing Count [%s], name[%s]", count, elem.getNodeName())); + rootDocElement.removeChild(elem); + } + int count2 = 0; + for (Node elem : elemList0) { + count2++; + LOG.fine(format("Count [%s], name[%s]", count2, elem.getNodeName())); + rootDocElement.appendChild(elem); + } + } + + public static void printDocument(final Document doc, final OutputStream out) + throws IOException, TransformerException { + TransformerFactory tf = TransformerFactory.newInstance(); + Transformer transformer = tf.newTransformer(); + transformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "no"); + transformer.setOutputProperty(OutputKeys.METHOD, "xml"); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty(OutputKeys.ENCODING, "UTF-8"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "4"); + + LOG.info("===== printing XML ====="); + transformer.transform(new DOMSource(doc), + new StreamResult(new OutputStreamWriter(out, "UTF-8"))); + } + + protected Document readXml(final InputStream is) { + DocumentBuilderFactory docBuilderFac = DocumentBuilderFactory.newInstance(); + DocumentBuilder docBuilder = null; + Document doc = null; + try { + docBuilder = docBuilderFac.newDocumentBuilder(); + doc = docBuilder.parse(is); + } catch (ParserConfigurationException e) { + logException(e); + } catch (SAXException e) { + logException(e); + } catch (IOException e) { + logException(e); + } + return doc; + } +} diff --git a/src/main/java/com/ltpeacock/sorter/xml/Util.java b/src/main/java/com/ltpeacock/sorter/xml/Util.java new file mode 100644 index 0000000..a190b3b --- /dev/null +++ b/src/main/java/com/ltpeacock/sorter/xml/Util.java @@ -0,0 +1,51 @@ +package com.ltpeacock.sorter.xml; + +import java.util.logging.Level; +import java.util.logging.Logger; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +public class Util { + private static final Logger LOG = Logger.getLogger(Util.class.getName()); + + private Util() { + throw new IllegalStateException("No instances of Util allowed"); + } + + public static int compare(final String s1, final String s2) { + return compare(s1, s2, false); + } + + public static int compare(final String s1, final String s2, final boolean nullGreater) { + final int result; + if (s1 == s2) { + result = 0; + } else if (s1 == null) { + result = nullGreater ? 1 : -1; + } else if (s2 == null) { + result = nullGreater ? -1 : 1; + } else { + result = s1.compareTo(s2); + } + + return result; + } + + /** + * @param inputStr + * @return + */ + public static String removeEmptyLines(final String inputStr) { + final Pattern pattern = Pattern.compile("(^[ \t]*$\\r\\n)+", Pattern.MULTILINE); + final Matcher m = pattern.matcher(inputStr); + m.replaceAll("\r\n"); + final String prettyXml3 = m.replaceAll("\r\n"); + final String prettyXml4 = prettyXml3.replaceAll("(\\r\\n)+", "\r\n"); + return prettyXml4; + } + + public static void logException(final Exception e) { + LOG.log(Level.SEVERE, "Error", e); + throw new RuntimeException(e); + } +} diff --git a/src/main/java/com/ltpeacock/sorter/xml/XmlPrettyPrint.java b/src/main/java/com/ltpeacock/sorter/xml/XmlPrettyPrint.java new file mode 100644 index 0000000..dad084e --- /dev/null +++ b/src/main/java/com/ltpeacock/sorter/xml/XmlPrettyPrint.java @@ -0,0 +1,70 @@ +package com.ltpeacock.sorter.xml; + +import java.io.StringReader; +import java.io.StringWriter; + +import javax.xml.transform.OutputKeys; +import javax.xml.transform.Source; +import javax.xml.transform.Transformer; +import javax.xml.transform.TransformerConfigurationException; +import javax.xml.transform.TransformerException; +import javax.xml.transform.TransformerFactory; +import javax.xml.transform.TransformerFactoryConfigurationError; +import javax.xml.transform.dom.DOMSource; +import javax.xml.transform.stream.StreamResult; +import javax.xml.transform.stream.StreamSource; + +import org.w3c.dom.Document; + +public class XmlPrettyPrint { + + public XmlPrettyPrint() { + } + + public static String prettyXml(final Document doc) { + String xmlString = null; + try { + Transformer transformer = TransformerFactory.newInstance().newTransformer(); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.setOutputProperty("{http://xml.apache.org/xslt}indent-amount", "2"); + //initialize StreamResult with File object to save to file + StreamResult result = new StreamResult(new StringWriter()); + DOMSource source = new DOMSource(doc); + transformer.transform(source, result); + xmlString = result.getWriter().toString(); + } catch (TransformerConfigurationException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (IllegalArgumentException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TransformerFactoryConfigurationError e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } catch (TransformerException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } + return xmlString; + } + + public static String prettyFormat(String input, int indent) { + try { + Source xmlInput = new StreamSource(new StringReader(input)); + StringWriter stringWriter = new StringWriter(); + StreamResult xmlOutput = new StreamResult(stringWriter); + TransformerFactory transformerFactory = TransformerFactory.newInstance(); + transformerFactory.setAttribute("indent-number", indent); + Transformer transformer = transformerFactory.newTransformer(); + transformer.setOutputProperty(OutputKeys.INDENT, "yes"); + transformer.transform(xmlInput, xmlOutput); + return xmlOutput.getWriter().toString(); + } catch (Exception e) { + throw new RuntimeException(e); + } + } + + public static String prettyFormat(String input) { + return prettyFormat(input, 2); + } +} diff --git a/src/main/java/com/ltpeacock/sorter/xml/package-info.java b/src/main/java/com/ltpeacock/sorter/xml/package-info.java new file mode 100644 index 0000000..f2dbfd1 --- /dev/null +++ b/src/main/java/com/ltpeacock/sorter/xml/package-info.java @@ -0,0 +1,5 @@ +/** + * Package for XML sorting. + * @author LieutenantPeacock + */ +package com.ltpeacock.sorter.xml; \ No newline at end of file