package dk.statsbiblioteket.doms.disseminator;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;

import fedora.client.FedoraClient;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import sun.misc.BASE64Encoder;

/**
 * Utility class for creating a digital object bundle
 */
public class DigitalObjectBundleCreator {
    private static final Log LOG
            = LogFactory.getLog(DigitalObjectBundleCreator.class);
    private static final int BUFFER_BLOCKSIZE = 8192;

    /**
     * Package up a number of Fedora digital objects in a digital object bundle.
     * Will simply list one FoxML object after the other. All relations with
     * given predicates will be followed. Datastreams in a given list will be
     * inlined as inline XML in a CDATA BASE64-encoded section.
     *
     * @param startingURLs List of object URLs. The URLs may contain a
     * datastream name as well, it will be stripped off. URLs not recognised as
     * URLs to a fedora object will be ignored.
     * @param datastreamsToInclude List of IDs of datastreams to be inlined
     * @param relsToFollow
     * @return A document with the bundle
     *
     * @throws Error on exceptions that should never happen, like unable to
     * initialise default document builder.
     */
    public static Document createBundle(Set<String> startingURLs,
                                        Set<String> datastreamsToInclude,
                                        Set<String> relsToFollow)
            throws Error {
        //Initialise result document
        DocumentBuilder documentBuilder = getDocumentBuilder();
        Document result = documentBuilder.newDocument();
        Element topLevelElm = result.createElementNS(
                "http://fedora.statsbiblioteket.dk/datatypes/digitalObjectBundle/",
                "d:digitalObjectBundle");
        result.appendChild(topLevelElm);
        //TODO: For completeness - handle DOMExceptions as errors?

        List<String> urlList = new ArrayList<String>(startingURLs);
        for (int index = 0; index < urlList.size(); index++) {
            String url = urlList.get(index); 
            System.out.println("URL: " + url);
            //Strip the server and PID from the URL
            Matcher matcher = Pattern.compile("(.*)/get/([^/]+)(/.*)?").matcher(
                    url);
            if (!matcher.find()) {
                LOG.warn("The url '" + url
                         + "' does not seem to be a Fedora URL. Ignoring");
                //just ignore this URL
                continue;
            }
            String server = matcher.group(1);
            String pid = matcher.group(2);

            //Fetch and parse the contents from Fedora
            String fedoraUsername = "fedora";
            String fedoraPassword = "fedora";
            //TODO: Real user/pass from properties
            Document fedoraObject;
            try {
                fedoraObject = readFoxmlDocument(server, fedoraUsername,
                                                 fedoraPassword,
                                                 pid, documentBuilder);
            } catch (Exception e) {
                LOG.error("Unable to get PID '" + pid + "' from server '"
                          + server + "'. Ignoring the URL '" + url + "'", e);
                //unable to connect to fedora in that URL. Just ignore it.
                continue;
            }

            //For all datastreams do
            NodeList datastreamNodes = fedoraObject.getElementsByTagNameNS(
                    "info:fedora/fedora-system:def/foxml#", "datastream");
            if (datastreamNodes != null) {
                for (int i = 0; i < datastreamNodes.getLength(); i++) {
                    Node datastreamNode = datastreamNodes.item(i);
                    checkDatastream(datastreamNode, datastreamsToInclude,
                                    server, pid, fedoraObject);

                }
            }

            //Drop contents in result
            Node importedD
                    = result.importNode(fedoraObject.getFirstChild(), true);
            topLevelElm.appendChild(importedD);

            //TODO: Currently follows relations from all datastreams
            NodeList rdfNodes = fedoraObject.getElementsByTagNameNS(
                    "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
                    "Description");

            //Run through all relations
            for (int i = 0; i < rdfNodes.getLength(); i++) {
                Node descriptionNode = rdfNodes.item(i);
                //TODO: Check about-attribute
                NodeList children = descriptionNode.getChildNodes();
                for (int c = 0; c < children.getLength(); c++) {
                    Node child = children.item(c);
                    System.out.println("Child:" + child.getNodeName());
                    //for children that we should follow
                    if (child.getNodeType() == Node.ELEMENT_NODE
                        && relsToFollow.contains(child.getNodeName())) {
                        //find the endpoint
                        NamedNodeMap attributes = child.getAttributes();
                        if (attributes != null) {
                            System.out.println("Attrbute length: " + attributes.getLength());
                            Node resourceAttribute
                                    = attributes.getNamedItem("rdf:resource");
                            String relEndPoint
                                    = resourceAttribute.getNodeValue();
                            //add url to list of starting points
                            String newUrl;
                            if (relEndPoint.startsWith("info:fedora/")) {
                                newUrl = server + "/get/"
                                                + relEndPoint.substring(12)
                                                + "/DC";
                            } else {
                                newUrl = relEndPoint;
                            }
                            LOG.info("Adding '" + newUrl
                                     + "' to Bundle");
                            System.out.println("Adding '" + newUrl
                                     + "' to Bundle");
                            urlList.add(newUrl);
                        }
                    }

                }

            }

        }

        return result;
    }

    private static void checkDatastream(Node datastreamNode,
                                        Set<String> datastreamsToInclude,
                                        String server, String pid,
                                        Document fedoraObject) {

        //Retain only newest version
        Node datastreamVersionNode = retainNewestVersion(
                datastreamNode);
        //If name in datastreamsToInclude,
        String datastreamID = getDatastreamID(datastreamNode);
        if (datastreamID == null) {
            LOG.error("Malformed datastream ID. Ignoring");
        }
        if (datastreamsToInclude.contains(datastreamID)) {
            Node controlGroupAttribute
                    = datastreamNode.getAttributes().getNamedItem(
                    "CONTROL_GROUP");
            //TODO: Check for nulls
            if (controlGroupAttribute.getNodeValue().equals("R")
                || controlGroupAttribute.getNodeValue().equals("E")
                || controlGroupAttribute.getNodeValue().equals("M")) {

                inlineDatastream(controlGroupAttribute, server,
                                 pid,
                                 datastreamID, fedoraObject,
                                 datastreamVersionNode);
            }
        }
    }

    private static void inlineDatastream(
            Node controlGroupAttribute,
            String server, String pid,
            String datastreamID,
            Document fedoraObject,
            Node datastreamVersionNode) {
        byte[] bytes = new byte[0];
        try {
            bytes = readContentData(controlGroupAttribute, server, pid,
                                    datastreamID,
                                    datastreamVersionNode);
        } catch (IOException e) {
            LOG.error("Unable to inline datastream '" + datastreamID + "' from"
                      + " object '" + pid + "'. Ignoring.", e);
            return;
        }

        //Replace datastream with xmlContents datastream
        while (datastreamVersionNode.getFirstChild()
               != null) {
            datastreamVersionNode.removeChild(
                    datastreamVersionNode.getFirstChild());
        }
        controlGroupAttribute.setNodeValue("X");
        Node xmlContent = fedoraObject.createElementNS(
                "info:fedora/fedora-system:def/foxml#",
                "foxml:xmlContent");
        datastreamVersionNode.appendChild(xmlContent);

        //Insert CDATA element with contents
        Node content = fedoraObject.createElementNS(
                "http://fedora.statsbiblioteket.dk/datatypes/digitalObjectBundle/",
                "d:content");
        xmlContent.appendChild(content);
        Node cdata = fedoraObject.createCDATASection(
                new BASE64Encoder().encode(bytes));
        //TODO: BASE64Encoder is not a standard class!!
        content.appendChild(cdata);
    }

    private static byte[] readContentData(Node controlGroupAttribute,
                                          String server, String pid,
                                          String datastreamID,
                                          Node datastreamVersionNode)
            throws IOException {
        //url of the contents to inline
        URL contentURL = generateContentURL(controlGroupAttribute, server, pid,
                                            datastreamID,
                                            datastreamVersionNode);

        //Read data from URL
        //TODO: Large content read into memory!!
        URLConnection urlConnection = contentURL.openConnection();
        byte[] buffer = new byte[BUFFER_BLOCKSIZE];
        InputStream inputStream = urlConnection.getInputStream();
        ByteArrayOutputStream baos = new ByteArrayOutputStream();
        int read;
        while ((read = inputStream.read(buffer)) != -1) {
            baos.write(buffer, 0, read);
        }
        return baos.toByteArray();
    }

    private static URL generateContentURL(Node controlGroupAttribute,
                                          String server, String pid,
                                          String datastreamID,
                                          Node datastreamVersionNode)
            throws MalformedURLException {
        if (controlGroupAttribute.getNodeValue().equals("M")) {
            //generate URL from PID and REF
            return new URL(server + "/get/" + pid + "/" + datastreamID);
        } else {
            NodeList elms
                    = datastreamVersionNode.getChildNodes();
            for (int c = 0; c < elms.getLength(); c++) {
                Node child = elms.item(c);
                if (child.getNodeType() == Node.ELEMENT_NODE
                    && child.getNodeName().equals(
                        "foxml:contentLocation")) {
                    return new URL(child.getAttributes().getNamedItem(
                            "REF").getNodeValue());
                }
            }
        }
        return null;
    }

    private static Node retainNewestVersion(
            Node datastreamNode) {
        NodeList children = datastreamNode.getChildNodes();
        String newestDate = findNewestDate(children);
        Node datastreamVersionNode = null;
        for (int c = 0; c < children.getLength(); c++) {
            Node child = children.item(c);
            if (child.getNodeType() == Node.ELEMENT_NODE
                && child.getNodeName().equals(
                    "foxml:datastreamVersion")) {
                Node date = child.getAttributes().getNamedItem("CREATED");
                if (date.getNodeValue().compareTo(newestDate) < 0) {
                    child.getParentNode().removeChild(child);
                } else {
                    datastreamVersionNode = child;
                }
            }
        }
        return datastreamVersionNode;
    }

    private static String findNewestDate(NodeList children) {
        String newestDate = null;
        for (int c = 0; c < children.getLength(); c++) {
            Node child = children.item(c);
            if (child.getNodeType() == Node.ELEMENT_NODE
                && child.getNodeName().equals(
                    "foxml:datastreamVersion")) {
                Node date = child.getAttributes().getNamedItem("CREATED");
                if (newestDate == null ||
                    date.getNodeValue().compareTo(newestDate) > 0) {
                    newestDate = date.getNodeValue();
                }
            }
        }
        return newestDate;
    }

    /**
     * Get value of ID node of attributes, or null if it cannot be found.
     *
     * @param datastreamNode The datastream node
     * @return value of ID, or null for none.
     */
    private static String getDatastreamID(Node datastreamNode) {
        NamedNodeMap attributes = datastreamNode.getAttributes();
        if (attributes == null) {
            return null;
        }
        Node attribute = attributes.getNamedItem("ID");
        if (attribute == null) {
            return null;
        }
        String datastreamID = attribute.getNodeValue();
        return datastreamID;
    }

    /**
     * Read and parse a foxml document from Fedora.
     *
     * @param server Fedora server URL.
     * @param fedoraUsername Username.
     * @param fedoraPassword Password.
     * @param pid PID for object.
     * @param documentBuilder The doucment builder.
     * @return The parsed document
     *
     * @throws Exception On any trouble reading the document.
     */
    private static Document readFoxmlDocument(String server,
                                              String fedoraUsername,
                                              String fedoraPassword, String pid,
                                              DocumentBuilder documentBuilder)
            throws Exception {
        FedoraClient client = new FedoraClient(server, fedoraUsername,
                                               fedoraPassword);
        byte[] object = client.getAPIM().export(pid, "foxml1.0", "public");
        return documentBuilder.parse(new ByteArrayInputStream(
                object));
    }

    private static DocumentBuilder getDocumentBuilder() {
        DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();
        dbf.setNamespaceAware(true);
        DocumentBuilder documentBuilder;
        try {
            documentBuilder = dbf.newDocumentBuilder();
        } catch (ParserConfigurationException e) {
            throw new Error("Unable to initialise default document builder", e);
        }
        return documentBuilder;
    }
}
