package de.xam.texthtml.html;

import java.util.HashSet;
import java.util.Set;

import org.xydra.annotations.CanBeNull;
import org.xydra.annotations.NeverNull;

public class SharedHtmlUtils {

	/**
	 * @param s
	 * @CanBeNull
	 * @return a sanitised form of s that cannot have malicious side effects.
	 *         Result is XHTML compliant. @CanBeNull
	 */
	public static String sanitize(@CanBeNull final String s) {
		if (s == null) {
			return null;
		}
		return htmlEncode(s);
	}

	public static Set<String> sanitize(@CanBeNull final Set<String> unsafe) {
		if (unsafe == null) {
			return null;
		}
		final Set<String> safe = new HashSet<String>();
		for (final String u : unsafe) {
			safe.add(sanitize(u));
		}
		return safe;
	}

	private static final String MALICIOUS_INPUT_SAMPLE = "Dirk<script>alert('test');</script>";

	public static void main(final String[] args) {
		System.out.println(sanitize(MALICIOUS_INPUT_SAMPLE));
	}

	/**
	 * @param raw unencoded string @NeverNull
	 * @return the input string with HTML escaping
	 */
	public static final String htmlEncode(@NeverNull final String raw) {
		String safe = raw;

		safe = safe.replace("&", "&amp;");

		safe = safe.replace("<", "&lt;");
		// unicode equivalent
		safe = safe.replace("\u00AB", "&lt");

		safe = safe.replace(">", "&gt;");
		// unicode equivalent
		safe = safe.replace("\u00BB", "&lt");

		// http://stackoverflow.com/questions/2083754/why-shouldnt-apos-be-used-to-escape-single-quotes
		/* "'" == In X(HT)ML: &quot; In HTML: &#39; For both: do nothing */
		safe = safe.replace("'", "&#39;");

		safe = safe.replace("\"", "&quot;");

		return safe;
	}

}
