/*******************************************************************************
 * Copyright (c) 2007, 2010 Association for Decentralized Information Management
 * in Industry THTH ry.
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 *
 * Contributors:
 *     VTT Technical Research Centre of Finland - initial API and implementation
 *******************************************************************************/
/* The following copyright is attached because marked parts of the following code are
 * copied and modified from Jena 2.4.
 */
/*
 *  (c) Copyright 2001, 2002, 2003, 2004, 2005, 2006 Hewlett-Packard Development Company, LP
 *  All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.

 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

 * * Id: URIref.java,v 1.5 2006/03/22 13:52:49 andy_seaborne Exp

   AUTHOR:  Jeremy J. Carroll
 */

package org.simantics.databoard.util;

import java.util.Arrays;
import java.util.List;


/**
 * Contains utility methods for handling URI Strings in the context of ProCore
 * and the Simantics platform. This includes URI escaping and unescaping and
 * namespace/local name separation and joining.
 * 
 * <p>
 * URI's in this context are assumed to be formed as follows:
 * 
 * <pre>
 * &lt;namespace part&gt;#&lt;local name part&gt;
 * </pre>
 * 
 * <p>
 * The implementation of {@link #escape(String)} and {@link #unescape(String)}
 * is copied and modified from Jena's com.hp.hpl.jena.util.URIref.
 * </p>
 * 
 * @see <a href="http://en.wikipedia.org/wiki/Percent-encoding">Percent-encoding</a>
 * 
 * @author Tuukka Lehtonen
 */
public final class URIStringUtils {

    /**
     * The character '/' is used as a path separator in URI namespace parts in ProCore.
     */
    public static final char NAMESPACE_PATH_SEPARATOR  = '/';

    /**
     * The '#' character is used to separate the local name and namespace parts
     * of an URI, for example <code>http://www.example.org#localName</code>.
     */
    public static final char NAMESPACE_LOCAL_SEPARATOR = '#';

    /**
     * Checks that only one separator character ({@link #NAMESPACE_LOCAL_SEPARATOR})
     * between namespace and localname exists in the specified URI and returns
     * its index.
     * 
     * @param uri the URI to search from
     * @return the character index of the separator ranging from 0 to uri.length()-1
     * @throws IllegalArgumentException if no {@link #NAMESPACE_LOCAL_SEPARATOR}
     *         is found in the specified URI
     */
    private static int assertSingleSeparatorPosition(String uri) {
        int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);
        if (sharpIndex == -1) {
            throw new IllegalArgumentException("URI '" + uri + "' does not contain any '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");
        }
        int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);
        if (nextSharpIndex != -1) {
            throw new IllegalArgumentException("URI '" + uri + "' contains multiple '" + NAMESPACE_LOCAL_SEPARATOR + "' separator characters");
        }
        return sharpIndex;
    }

    /**
     * Checks that only one separator character (
     * {@link #NAMESPACE_LOCAL_SEPARATOR}) between namespace and localname
     * exists in the specified URI and returns its index. This version does not
     * throw an exception when the separator is not found.
     * 
     * @param uri the URI to search from
     * @return the character index of the separator ranging from 0 to
     *         uri.length()-1 or -1 if no separator was found.
     */
    private static int singleSeparatorPosition(String uri) {
        int sharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR);
        if (sharpIndex == -1) {
            return -1;
        }
        int nextSharpIndex = uri.indexOf(NAMESPACE_LOCAL_SEPARATOR, sharpIndex + 1);
        if (nextSharpIndex != -1) {
            return -1;
        }
        return sharpIndex;
    }

    /**
     * Splits the specified URI into a namespace and a local name and returns
     * the namespace.
     * 
     * <p>
     * Assumes that namespaces are always separated by
     * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.
     * </p>
     * 
     * @param uri the URI to split, must be non-null
     * @return the namespace part of the specified URI
     * @throws IllegalArgumentException for URIs without a
     *         {@link #NAMESPACE_LOCAL_SEPARATOR}
     * @throws NullPointerException for <code>null</code> URIs
     */
    public static String getNamespace(String uri) {
        if (uri == null)
            throw new NullPointerException("null uri");
        int separatorIndex = assertSingleSeparatorPosition(uri);
        return uri.substring(0, separatorIndex);
    }
    
    public static String getRVIParent(String uri) {
        int childSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);
        int propertySeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_LOCAL_SEPARATOR);
        int separator = Math.max(childSeparator, propertySeparator);
        return uri.substring(0, separator);
    }
    

    /**
     * Splits the specified URI into a namespace and a local name and returns
     * the local name.
     * 
     * <p>
     * Assumes that namespaces are always separated by
     * {@link #NAMESPACE_LOCAL_SEPARATOR} characters.
     * </p>
     * 
     * @param uri the URI to split, must be non-null
     * @return the local name part of the specified URI
     * @throws IllegalArgumentException for URIs without a
     *         {@link #NAMESPACE_LOCAL_SEPARATOR}
     * @throws NullPointerException for <code>null</code> URIs
     */
    public static String getLocalName(String uri) {
        if (uri == null)
            throw new NullPointerException("null uri");
        int separatorIndex = assertSingleSeparatorPosition(uri);
        return uri.substring(separatorIndex + 1);
    }

    public static String escapeName(String name) {
        char[] chars = name.toCharArray();
        boolean modified = false;
        for(int i=0;i<chars.length;++i)
            if(!Character.isJavaIdentifierPart(chars[i])) {
                chars[i] = '_';
                modified = true;
            }
        if(modified)
            return new String(chars);
        else
            return name;
    }

    final private static String HTTP_PREFIX = "http://";
    final private static int HTTP_POSITION = HTTP_PREFIX.length();

    public static String[] splitURI(String uri) {
        int nextPathSeparator = uri.lastIndexOf(URIStringUtils.NAMESPACE_PATH_SEPARATOR);
        if (nextPathSeparator == -1) return null;
        if (nextPathSeparator == HTTP_POSITION - 1) {
            if(uri.startsWith(HTTP_PREFIX)) return new String[] { HTTP_PREFIX, uri.substring(HTTP_POSITION, uri.length()) };
            else return null;
        }
        return new String[] {
                uri.substring(0, nextPathSeparator),
                uri.substring(nextPathSeparator + 1, uri.length())
        };
    }

    public static List<String> splitURISCL(String uri) {
        String[] result = splitURI(uri);
        return Arrays.asList(result);
    }

    /**
     * Splits the specified URI into a namespace and a local name and returns
     * them both separately as an array.
     * 
     * @param uri the URI to split, must be non-null
     * @return [0] = namespace, [1] = local name or <code>null</code> if the URI
     *         cannot be split.
     * @throws NullPointerException for <code>null</code> URIs
     */
    public static String[] trySplitNamespaceAndLocalName(String uri) {
        if (uri == null)
            throw new NullPointerException("null uri");
        int separatorIndex = singleSeparatorPosition(uri);
        return separatorIndex == -1 ?
                null
                : new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };
    }

    /**
     * Splits the specified URI into a namespace and a local name and returns
     * them both separately as an array.
     * 
     * @param uri the URI to split, must be non-null
     * @return [0] = namespace, [1] = local name
     * @throws IllegalArgumentException for URIs without a
     *         {@link #NAMESPACE_LOCAL_SEPARATOR}
     * @throws NullPointerException for <code>null</code> URIs
     */
    public static String[] splitNamespaceAndLocalName(String uri) {
        if (uri == null)
            throw new NullPointerException("null uri");
        int separatorIndex = assertSingleSeparatorPosition(uri);
        return new String[] { uri.substring(0, separatorIndex), uri.substring(separatorIndex + 1) };
    }

    /**
     * Converts a unicode string into an RFC 2396 compliant URI, using %NN
     * escapes where appropriate, including the
     * {@link #NAMESPACE_PATH_SEPARATOR} character.
     * 
     * @param localName the string to escape
     * @return the escaped string
     * @throws NullPointerException for <code>null</code> URIs
     */
    public static String escapeURI(String localName) {
        if (localName == null)
            throw new NullPointerException("null local name");
        return encode(localName);
    }

    /**
     * Add a suffix path to a namespace string, i.e. join the strings to
     * together with the {@link #NAMESPACE_PATH_SEPARATOR} character in between.
     * 
     * @param namespace the namespace to append to
     * @param suffix the suffix to append
     * @return the joined namespace
     */
    public static String appendURINamespace(String namespace, String suffix) {
        return new StringBuilder(namespace.length() + 1 + suffix.length())
        .append(namespace)
        .append(NAMESPACE_PATH_SEPARATOR)
        .append(suffix)
        .toString();
    }

    /**
     * Join a namespace and a localname to form an URI with
     * {@link #NAMESPACE_LOCAL_SEPARATOR}.
     * 
     * @param namespace the namespace part to join
     * @param localName the localname part to join
     * @return the joined URI
     */
    public static String makeURI(String namespace, String localName) {
        String escapedLocalName = escapeURI(localName);
        return new StringBuilder(namespace.length() + 1 + escapedLocalName.length())
        .append(namespace)
        .append(NAMESPACE_LOCAL_SEPARATOR)
        .append(escapedLocalName)
        .toString();
    }

    /**
     * Convert a Unicode string, first to UTF-8 and then to an RFC 2396
     * compliant URI with optional fragment identifier using %NN escape
     * mechanism as appropriate. The '%' character is assumed to already
     * indicated an escape byte. The '%' character must be followed by two
     * hexadecimal digits.
     * 
     * <p>
     * Meant to be used for encoding URI local name parts if it is desired to
     * have '/' characters in the local name without creating a new namespace.
     * For example these two URI's:<br/>
     * 
     * <code>
     * http://foo.bar.com/foo/bar/org%2Fcom<br/>
     * http://foo.bar.com/foo/bar/net%2Fcom<br/>
     * </code>
     * 
     * have the same namespace <code>http://foo.bar.com/foo/bar/</code> and
     * different local names <code>org%2Fcom</code> and <code>net%2Fcom</code>
     * or <code>org/com</code> and <code>net/com</code> in unescaped form.
     * </p>
     * 
     * @param unicode The uri, in characters specified by RFC 2396 + '#'
     * @return The corresponding Unicode String
     */
    public static String escape(String unicode) {
        return encode(unicode);
    }


    /*
     * RFC 3986 section 2.2 Reserved Characters (January 2005)
     * !*'();:@&=+$,/?#[]
     */
    private static boolean[] ESCAPED_US_ASCII_CHARS = new boolean[128];

    static {
        ESCAPED_US_ASCII_CHARS[' '] = true;
        // IMPORTANT NOTE: every time escape is invoked, all input needs to be escaped,
        // i.e. escape("%01") should result in "%2501", not "%01".
        // escape and unescape form a bijection, where neither
        // of them is an idempotent operation. 
        ESCAPED_US_ASCII_CHARS['%'] = true;
        // '#' and '/' are URL segment/fragment delimiters, need to be escaped in names.
        ESCAPED_US_ASCII_CHARS['#'] = true;
        ESCAPED_US_ASCII_CHARS['/'] = true;
        // Escape '&' characters to avoid them being interpreted as SGML entities.
        ESCAPED_US_ASCII_CHARS['&'] = true;
    }

    private static int needsEscaping(String unicode) {
        int len = unicode.length();
        int escapeCount = 0;
        for (int i = 0; i < len; ++i) {
            char ch = unicode.charAt(i);
            if (ch < 128 && ESCAPED_US_ASCII_CHARS[ch])
                ++escapeCount;
        }
        return escapeCount;
    }

    private static String encode(String unicode) {
        int needsEscapes = needsEscaping(unicode);
        if (needsEscapes == 0)
            return unicode;

        int len = unicode.length();
        char result[] = new char[(len - needsEscapes) + needsEscapes * 3];
        int in = 0;
        int out = 0;
        while (in < len) {
            char inCh = unicode.charAt(in++);
            if (inCh >= 128 || !ESCAPED_US_ASCII_CHARS[inCh]) {
                result[out++] = inCh;
            } else {
                // Only selected 7-bit US-ASCII characters are escaped
                int c = inCh & 255;
                result[out++] = '%';
                result[out++] = (char) hexEncode(c / 16);
                result[out++] = (char) hexEncode(c % 16);
            }
        }
        return new String(result, 0, out);
    }

    private static boolean needsUnescaping(String unicode) {
        return unicode.indexOf('%') > -1;
    }

    /**
     * Convert a URI, in UTF-16 with escaped characters taken from US-ASCII, to
     * the corresponding unescaped Unicode string. On ill-formed input the results are
     * undefined.
     * 
     * @param uri the uri, in characters specified by RFC 2396 + '#'.
     * @return the corresponding unescaped Unicode String.
     * @exception IllegalArgumentException if a % hex sequence is ill-formed.
     */
    public static String unescape(String uri) {
        try {
            if (!needsUnescaping(uri))
                return uri;

            int len = uri.length();
            String unicode = uri;
            char result[] = new char[len];
            int in = 0;
            int out = 0;
            while (in < len) {
                char inCh = unicode.charAt(in++);
                if (inCh == '%') {
                    char d1 = unicode.charAt(in);
                    char d2 = unicode.charAt(in+1);
                    if (d1 > 127 || d2 > 127)
                        throw new IllegalArgumentException("Invalid hex digit escape sequence in " + uri + " at " + in);
                    result[out++] = (char) (hexDecode((byte) d1) * 16 | hexDecode((byte) d2));
                    in += 2;
                } else {
                    result[out++] = inCh;
                }
            }
            return new String(result, 0, out);
        } catch (IllegalArgumentException e) {
            throw new IllegalArgumentException("Problem while unescaping string: " + uri, e);
        } catch (IndexOutOfBoundsException ee) {
            throw new IllegalArgumentException("Incomplete hex digit escape sequence in " + uri);
        }
    }

    /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */
    private static byte hexEncode(int i) {
        if (i < 10)
            return (byte) ('0' + i);
        else
            return (byte)('A' + i - 10);
    }

    /* Copied from Jena 2.4 com.hp.hpl.jena.util.URIref */
    private static int hexDecode(byte b) {
        switch (b) {
            case (byte)'a': case (byte)'b': case (byte)'c': case (byte)'d': case (byte)'e': case (byte)'f':
                return ((b) & 255) - 'a' + 10;
            case (byte)'A': case (byte)'B': case (byte)'C': case (byte)'D': case (byte)'E': case (byte)'F':
                return b - (byte) 'A' + 10;
            case (byte)'0': case (byte)'1': case (byte)'2': case (byte)'3': case (byte)'4': case (byte)'5': case (byte)'6': case (byte)'7': case (byte)'8': case (byte)'9':
                return b - (byte) '0';
            default:
                throw new IllegalArgumentException("Bad Hex escape character: " + ((b)&255) );
        }
    }

    /**
     * Some simple tests.
     * @param args
     */
    public static void main(String[] args) {
        String s = makeURI("http://foo.bar.com/foo/bar", "baz/guuk/org%2Fnet");
        System.out.println("escapeURI: " + s);
        System.out.println("getNamespace: " + getNamespace(s));
        System.out.println("getLocalName: " + getLocalName(s));

        System.out.println("escapeURI: " + escapeURI("foo/bar/org%2Fnet"));
        System.out.println("escapeURI('...#...'): " + escapeURI("foo/bar#org%2Fnet"));

        testEscape("/", "%2F");
        testEscape("#", "%23");
        testEscape("%", "%25");
        testEscape("%01", "%2501");
        testEscape("%GG", "%25GG");
        testEscape("st venttiili", "st%20venttiili");
        testEscape("st", "st");
        testEscape("Something / Else", "Something%20%2F%20Else");
        testEscape("http://www.vtt.fi%2FSome- %25 Namespace/Something", "http:%2F%2Fwww.vtt.fi%252FSome-%20%2525%20Namespace%2FSomething");
        testEscape("http://www.vtt.fi/PSK", "http:%2F%2Fwww.vtt.fi%2FPSK");
        testEscape("http://www.vtt.fi%2FSome-Namespace/Something / Else", "http:%2F%2Fwww.vtt.fi%252FSome-Namespace%2FSomething%20%2F%20Else");
    }

    private static void testEscape(String unescaped, String expectedEscaped) {
        String esc = escape(unescaped);
        String unesc = unescape(esc);
        System.out.format("escape('%s') -> '%s', unescape('%s') -> '%s'", unescaped, esc, esc, unesc);
        if (!esc.equals(expectedEscaped))
            throw new AssertionError("escape('" + unescaped + "') was expected to return '" + expectedEscaped + "' but returned '" + esc + "'");
        if (!unesc.equals(unescaped))
            throw new AssertionError("unescape(escape('" + unescaped + "'))=unescape(" + esc + ") was expected to return '" + unescaped + "' but returned '" + unesc + "'");
        System.out.println(" OK");
    }

}
