/*
 ******************************************************************************
 * Copyright (C) 2003-2014, International Business Machines Corporation and   *
 * others. All Rights Reserved.                                               *
 ******************************************************************************
 */
/**
 * @author Ram Viswanadha
 *
 * This tool validates xml against DTD or valid XML ... IE 6 does not do a good job
 */
package org.unicode.cldr.util;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileReader;
import java.io.FilenameFilter;
import java.io.IOException;

import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;

@CLDRTool(alias = "validate", description = "Check XML files for validity")
public class XMLValidator {
    public static boolean quiet = false;
    public static boolean parseonly = false;

    public static void main(String[] args) throws IOException {
        if (args.length == 0) {
            System.out.println("No files specified. Validation failed. Use --help for help.");
            return;
        }
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-q") || args[i].equals("--quiet")) {
                quiet = true;
            } else if (args[i].equals("--help")) {
                usage();
                return;
            } else if (args[i].equals("--parseonly")) {
                System.err.println("# DTD Validation is disabled. Will only check for well formed XML.");
                parseonly = true;
            } else {
                File f = new File(args[i]);
                if (f.isDirectory()) {
                    parseDirectory(f);
                } else {
                    if (!quiet) System.out.println("Processing file " + args[i]);
                    new fileParserThread(args[i]).run();
                }
            }
        }
        if (parseonly) {
            System.err.println("# DTD Validation is disabled. Only checked for well formed XML.");
        }
    }

    private static void parseDirectory(File f) throws IOException {
        // System.err.println("Parsing directory " + f.getAbsolutePath());
        for (File s : f.listFiles(new FilenameFilter() {
            @Override
            public boolean accept(File arg0, String arg1) {
                if (arg1.startsWith(".")) {
                    return false; // skip .git, .svn, ...
                }
                File n = new File(arg0, arg1);
                // System.err.println("Considering " + n.getAbsolutePath() );
                if (n.isDirectory()) {
                    try {
                        parseDirectory(n);
                    } catch (IOException e) {
                        // TODO Auto-generated catch block
                        e.printStackTrace();
                        System.err.println("Error " + e.toString() + " parsing " + arg0.getPath());
                    }
                    return false;
                } else if (arg1.endsWith(".xml")) {
                    return true;
                } else {
                    return false;
                }
            }
        })) {
            if (!quiet) System.out.println("Processing file " + s.getPath());
            new fileParserThread(s.getCanonicalPath()).run();
        }
    }

    private static void usage() {
        System.err.println("usage:  " + XMLValidator.class.getName() + " [ -q ] [ --help ] [ --parseonly ] file ...");
        System.err.println("usage:  " + XMLValidator.class.getName()
            + " [ -q ] [ --help ] [ --parseonly ] directory ...");
    }

    /**
     * Utility method to translate a String filename to URL.
     *
     * Note: This method is not necessarily proven to get the correct URL for
     * every possible kind of filename; it should be improved. It handles the
     * most common cases that we've encountered when running Conformance tests
     * on Xalan. Also note, this method does not handle other non-file: flavors
     * of URLs at all.
     *
     * If the name is null, return null. If the name starts with a common URI
     * scheme (namely the ones found in the examples of RFC2396), then simply
     * return the name as-is (the assumption is that it's already a URL)
     * Otherwise we attempt (cheaply) to convert to a file:/// URL.
     *
     * @param filename
     *            a local path/filename of a file
     * @return a file:/// URL, the same string if it appears to already be a
     *         URL, or null if error
     */
    public static String filenameToURL(String filename) {
        // null begets null - something like the commutative property
        if (null == filename)
            return null;

        // Don't translate a string that already looks like a URL
        if (filename.startsWith("file:") || filename.startsWith("http:")
            || filename.startsWith("ftp:")
            || filename.startsWith("gopher:")
            || filename.startsWith("mailto:")
            || filename.startsWith("news:")
            || filename.startsWith("telnet:"))
            return filename;

        File f = new File(filename);
        String tmp = null;
        try {
            // This normally gives a better path
            tmp = f.getCanonicalPath();
        } catch (IOException ioe) {
            // But this can be used as a backup, for cases
            // where the file does not exist, etc.
            tmp = f.getAbsolutePath();
        }

        // URLs must explicitly use only forward slashes
        if (File.separatorChar == '\\') {
            tmp = tmp.replace('\\', '/');
        }
        // Note the presumption that it's a file reference
        // Ensure we have the correct number of slashes at the
        // start: we always want 3 /// if it's absolute
        // (which we should have forced above)
        if (tmp.startsWith("/"))
            return "file://" + tmp;
        else
            return "file:///" + tmp;

    }

    public static class fileParserThread extends Thread {
        String filename;

        fileParserThread(String _filename) {
            filename = _filename;
        }

        public void run() {
            // Force filerefs to be URI's if needed: note this is independent of any
            // other files
            String docURI = filenameToURL(filename);
            parse(new InputSource(docURI), filename);
        }
    }

    static Document parse(InputSource docSrc, String filename) {

        // Check for BOM.
        try {
            FileInputStream fis = null;
            try {
                fis = new FileInputStream(filename);
                byte bytes[] = new byte[3];
                if (fis.read(bytes) == 3 &&
                    bytes[0] == (byte) 0xef &&
                    bytes[1] == (byte) 0xbb &&
                    bytes[2] == (byte) 0xbf) {
                    System.err.println(filename + ": ERROR: contains UTF-8 BOM (shouldn't happen in CLDR XML files)");
                }
            } finally {
                if (fis != null) {
                    fis.close();
                }
            }
        } catch (IOException ioe) { /* ignored- other branches will report an error. */
        }

        DocumentBuilderFactory dfactory = DocumentBuilderFactory.newInstance();
        // Always set namespaces on
        if (!parseonly) {
            dfactory.setNamespaceAware(true);
            dfactory.setValidating(true);
        }
        // Set other attributes here as needed
        // applyAttributes(dfactory, attributes);

        // Local class: cheap non-printing ErrorHandler
        // This is used to suppress validation warnings
        final String filename2 = filename;
        ErrorHandler nullHandler = new ErrorHandler() {
            public void warning(SAXParseException e) throws SAXException {
                System.err.println(filename2 + ": Warning: " + e.getMessage());

            }

            public void error(SAXParseException e) throws SAXException {
                int col = e.getColumnNumber();
                System.err.println(filename2 + ":" + e.getLineNumber() + (col >= 0 ? ":" + col : "")
                    + ": ERROR: Element " + e.getPublicId()
                    + " is not valid because " + e.getMessage());
            }

            public void fatalError(SAXParseException e) throws SAXException {
                System.err.println(filename2 + ": ERROR ");
                throw e;
            }
        };

        Document doc = null;
        try {
            // First, attempt to parse as XML (preferred)...
            DocumentBuilder docBuilder = dfactory.newDocumentBuilder();
            docBuilder.setErrorHandler(nullHandler);
            docBuilder.setEntityResolver(new CachingEntityResolver());
            // if(docBuilder.isValidating()){
            // System.out.println("The parser is a validating parser");
            // }
            doc = docBuilder.parse(docSrc);
        } catch (Throwable se) {
            // ... if we couldn't parse as XML, attempt parse as HTML...
            if (se instanceof SAXParseException) {
                SAXParseException pe = (SAXParseException) se;
                int col = pe.getColumnNumber();
                System.err.println(filename + ":" + pe.getLineNumber() + (col >= 0 ? ":" + col : "") + ": ERROR:"
                    + se.toString());
            } else {
                System.err.println(filename + ": ERROR:" + se.toString());
            }
            try {
                // @todo need to find an HTML to DOM parser we can use!!!
                // doc = someHTMLParser.parse(new InputSource(filename));
                throw new RuntimeException(filename + ": XMLComparator not HTML parser!");
            } catch (Exception e) {
                if (filename != null) {
                    // ... if we can't parse as HTML, then just parse the text
                    try {

                        // Parse as text, line by line
                        // Since we already know it should be text, this should
                        // work better than parsing by bytes.
                        FileReader fr = new FileReader(filename);
                        BufferedReader br = new BufferedReader(fr);
                        StringBuffer buffer = new StringBuffer();
                        for (;;) {
                            String tmp = br.readLine();

                            if (tmp == null) {
                                break;
                            }

                            buffer.append(tmp);
                            buffer.append("\n"); // Put in the newlines as well
                        }
                        br.close();
                        DocumentBuilder docBuilder = dfactory
                            .newDocumentBuilder();
                        doc = docBuilder.newDocument();
                        Element outElem = doc.createElement("out");
                        Text textNode = doc.createTextNode(buffer.toString());

                        // Note: will this always be a valid node? If we're
                        // parsing
                        // in as text, will there ever be cases where the diff that's
                        // done later on will fail becuase some really garbage-like
                        // text has been put into a node?
                        outElem.appendChild(textNode);
                        doc.appendChild(outElem);
                    } catch (Throwable throwable) {

                        // throwable.printStackTrace();
                    }
                }
            }
        }
        return doc;
    }
}