src/main/java/com/sun/activation/registries/MailcapTokenizer.java - java/activation - Git at Google

 /*
  * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
  *
  * Copyright (c) 1997-2017 Oracle and/or its affiliates. All rights reserved.
  *
  * The contents of this file are subject to the terms of either the GNU
  * General Public License Version 2 only ("GPL") or the Common Development
  * and Distribution License("CDDL") (collectively, the "License").  You
  * may not use this file except in compliance with the License.  You can
  * obtain a copy of the License at
  * https://oss.oracle.com/licenses/CDDL+GPL-1.1
  * or LICENSE.txt.  See the License for the specific
  * language governing permissions and limitations under the License.
  *
  * When distributing the software, include this License Header Notice in each
  * file and include the License file at LICENSE.txt.
  *
  * GPL Classpath Exception:
  * Oracle designates this particular file as subject to the "Classpath"
  * exception as provided by Oracle in the GPL Version 2 section of the License
  * file that accompanied this code.
  *
  * Modifications:
  * If applicable, add the following below the License Header, with the fields
  * enclosed by brackets [] replaced by your own identifying information:
  * "Portions Copyright [year] [name of copyright owner]"
  *
  * Contributor(s):
  * If you wish your version of this file to be governed by only the CDDL or
  * only the GPL Version 2, indicate your decision by adding "[Contributor]
  * elects to include this software in this distribution under the [CDDL or GPL
  * Version 2] license."  If you don't indicate a single choice of license, a
  * recipient has the option to distribute your version of this file under
  * either the CDDL, the GPL Version 2 or to extend the choice of license to
  * its licensees as provided above.  However, if you add GPL Version 2 code
  * and therefore, elected the GPL Version 2 license, then the option applies
  * only if the new code is made subject to such option by the copyright
  * holder.
  */

 package	com.sun.activation.registries;

 /**
  *	A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
  *	Useful for parsing MIME content types.
  */
 public class MailcapTokenizer {

     public static final int UNKNOWN_TOKEN = 0;
     public static final int START_TOKEN = 1;
     public static final int STRING_TOKEN = 2;
     public static final int EOI_TOKEN = 5;
     public static final int SLASH_TOKEN = '/';
     public static final int SEMICOLON_TOKEN = ';';
     public static final int EQUALS_TOKEN = '=';

     /**
      *  Constructor
      *
      *  @parameter  inputString the string to tokenize
      */
     public MailcapTokenizer(String inputString) {
 	data = inputString;
 	dataIndex = 0;
 	dataLength = inputString.length();

 	currentToken = START_TOKEN;
 	currentTokenValue = "";

 	isAutoquoting = false;
 	autoquoteChar = ';';
     }

     /**
      *  Set whether auto-quoting is on or off.
      *
      *  Auto-quoting means that all characters after the first
      *  non-whitespace, non-control character up to the auto-quote
      *  terminator character or EOI (minus any whitespace immediatley
      *  preceeding it) is considered a token.
      *
      *  This is required for handling command strings in a mailcap entry.
      */
     public void setIsAutoquoting(boolean value) {
 	isAutoquoting = value;
     }

     /**
      *  Retrieve current token.
      *
      *  @returns    The current token value
      */
     public int getCurrentToken() {
 	return currentToken;
     }

     /*
      *  Get a String that describes the given token.
      */
     public static String nameForToken(int token) {
 	String name = "really unknown";

 	switch(token) {
 	    case UNKNOWN_TOKEN:
 		name = "unknown";
 		break;
 	    case START_TOKEN:
 		name = "start";
 		break;
 	    case STRING_TOKEN:
 		name = "string";
 		break;
 	    case EOI_TOKEN:
 		name = "EOI";
 		break;
 	    case SLASH_TOKEN:
 		name = "'/'";
 		break;
 	    case SEMICOLON_TOKEN:
 		name = "';'";
 		break;
 	    case EQUALS_TOKEN:
 		name = "'='";
 		break;
 	}

 	return name;
     }

     /*
      *  Retrieve current token value.
      *
      *  @returns    A String containing the current token value
      */
     public String getCurrentTokenValue() {
 	return currentTokenValue;
     }

     /*
      *  Process the next token.
      *
      *  @returns    the next token
      */
     public int nextToken() {
 	if (dataIndex < dataLength) {
 	    //  skip white space
 	    while ((dataIndex < dataLength) &&
 		    (isWhiteSpaceChar(data.charAt(dataIndex)))) {
 		++dataIndex;
 	    }

 	    if (dataIndex < dataLength) {
 		//  examine the current character and see what kind of token we have
 		char c = data.charAt(dataIndex);
 		if (isAutoquoting) {
 		    if (c == ';' || c == '=') {
 			currentToken = c;
 			currentTokenValue = new Character(c).toString();
 			++dataIndex;
 		    } else {
 			processAutoquoteToken();
 		    }
 		} else {
 		    if (isStringTokenChar(c)) {
 			processStringToken();
 		    } else if ((c == '/') || (c == ';') || (c == '=')) {
 			currentToken = c;
 			currentTokenValue = new Character(c).toString();
 			++dataIndex;
 		    } else {
 			currentToken = UNKNOWN_TOKEN;
 			currentTokenValue = new Character(c).toString();
 			++dataIndex;
 		    }
 		}
 	    } else {
 		currentToken = EOI_TOKEN;
 		currentTokenValue = null;
 	    }
 	} else {
 	    currentToken = EOI_TOKEN;
 	    currentTokenValue = null;
 	}

 	return currentToken;
     }

     private void processStringToken() {
 	//  capture the initial index
 	int initialIndex = dataIndex;

 	//  skip to 1st non string token character
 	while ((dataIndex < dataLength) &&
 		isStringTokenChar(data.charAt(dataIndex))) {
 	    ++dataIndex;
 	}

 	currentToken = STRING_TOKEN;
 	currentTokenValue = data.substring(initialIndex, dataIndex);
     }

     private void processAutoquoteToken() {
 	//  capture the initial index
 	int initialIndex = dataIndex;

 	//  now skip to the 1st non-escaped autoquote termination character
 	//  XXX - doesn't actually consider escaping
 	boolean foundTerminator = false;
 	while ((dataIndex < dataLength) && !foundTerminator) {
 	    char c = data.charAt(dataIndex);
 	    if (c != autoquoteChar) {
 		++dataIndex;
 	    } else {
 		foundTerminator = true;
 	    }
 	}

 	currentToken = STRING_TOKEN;
 	currentTokenValue =
 	    fixEscapeSequences(data.substring(initialIndex, dataIndex));
     }

     private static boolean isSpecialChar(char c) {
 	boolean lAnswer = false;

 	switch(c) {
 	    case '(':
 	    case ')':
 	    case '<':
 	    case '>':
 	    case '@':
 	    case ',':
 	    case ';':
 	    case ':':
 	    case '\\':
 	    case '"':
 	    case '/':
 	    case '[':
 	    case ']':
 	    case '?':
 	    case '=':
 		lAnswer = true;
 		break;
 	}

 	return lAnswer;
     }

     private static boolean isControlChar(char c) {
 	return Character.isISOControl(c);
     }

     private static boolean isWhiteSpaceChar(char c) {
 	return Character.isWhitespace(c);
     }

     private static boolean isStringTokenChar(char c) {
 	return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
     }

     private static String fixEscapeSequences(String inputString) {
 	int inputLength = inputString.length();
 	StringBuffer buffer = new StringBuffer();
 	buffer.ensureCapacity(inputLength);

 	for (int i = 0; i < inputLength; ++i) {
 	    char currentChar = inputString.charAt(i);
 	    if (currentChar != '\\') {
 		buffer.append(currentChar);
 	    } else {
 		if (i < inputLength - 1) {
 		    char nextChar = inputString.charAt(i + 1);
 		    buffer.append(nextChar);

 		    //  force a skip over the next character too
 		    ++i;
 		} else {
 		    buffer.append(currentChar);
 		}
 	    }
 	}

 	return buffer.toString();
     }

     private String  data;
     private int     dataIndex;
     private int     dataLength;
     private int     currentToken;
     private String  currentTokenValue;
     private boolean isAutoquoting;
     private char    autoquoteChar;

     /*
     public static void main(String[] args) {
 	for (int i = 0; i < args.length; ++i) {
 	    MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);

 	    System.out.println("Original: |" + args[i] + "|");

 	    int currentToken = tokenizer.nextToken();
 	    while (currentToken != EOI_TOKEN) {
 		switch(currentToken) {
 		    case UNKNOWN_TOKEN:
 			System.out.println("  Unknown Token:           |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    case START_TOKEN:
 			System.out.println("  Start Token:             |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    case STRING_TOKEN:
 			System.out.println("  String Token:            |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    case EOI_TOKEN:
 			System.out.println("  EOI Token:               |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    case SLASH_TOKEN:
 			System.out.println("  Slash Token:             |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    case SEMICOLON_TOKEN:
 			System.out.println("  Semicolon Token:         |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    case EQUALS_TOKEN:
 			System.out.println("  Equals Token:            |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		    default:
 			System.out.println("  Really Unknown Token:    |" + tokenizer.getCurrentTokenValue() + "|");
 			break;
 		}

 		currentToken = tokenizer.nextToken();
 	    }

 	    System.out.println("");
 	}
     }
     */
 }
	/*
	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
	*
	* Copyright (c) 1997-2017 Oracle and/or its affiliates. All rights reserved.
	*
	* The contents of this file are subject to the terms of either the GNU
	* General Public License Version 2 only ("GPL") or the Common Development
	* and Distribution License("CDDL") (collectively, the "License"). You
	* may not use this file except in compliance with the License. You can
	* obtain a copy of the License at
	* https://oss.oracle.com/licenses/CDDL+GPL-1.1
	* or LICENSE.txt. See the License for the specific
	* language governing permissions and limitations under the License.
	*
	* When distributing the software, include this License Header Notice in each
	* file and include the License file at LICENSE.txt.
	*
	* GPL Classpath Exception:
	* Oracle designates this particular file as subject to the "Classpath"
	* exception as provided by Oracle in the GPL Version 2 section of the License
	* file that accompanied this code.
	*
	* Modifications:
	* If applicable, add the following below the License Header, with the fields
	* enclosed by brackets [] replaced by your own identifying information:
	* "Portions Copyright [year] [name of copyright owner]"
	*
	* Contributor(s):
	* If you wish your version of this file to be governed by only the CDDL or
	* only the GPL Version 2, indicate your decision by adding "[Contributor]
	* elects to include this software in this distribution under the [CDDL or GPL
	* Version 2] license." If you don't indicate a single choice of license, a
	* recipient has the option to distribute your version of this file under
	* either the CDDL, the GPL Version 2 or to extend the choice of license to
	* its licensees as provided above. However, if you add GPL Version 2 code
	* and therefore, elected the GPL Version 2 license, then the option applies
	* only if the new code is made subject to such option by the copyright
	* holder.
	*/

	package com.sun.activation.registries;

	/**
	* A tokenizer for strings in the form of "foo/bar; prop1=val1; ... ".
	* Useful for parsing MIME content types.
	*/
	public class MailcapTokenizer {

	public static final int UNKNOWN_TOKEN = 0;
	public static final int START_TOKEN = 1;
	public static final int STRING_TOKEN = 2;
	public static final int EOI_TOKEN = 5;
	public static final int SLASH_TOKEN = '/';
	public static final int SEMICOLON_TOKEN = ';';
	public static final int EQUALS_TOKEN = '=';

	/**
	* Constructor
	*
	* @parameter inputString the string to tokenize
	*/
	public MailcapTokenizer(String inputString) {
	data = inputString;
	dataIndex = 0;
	dataLength = inputString.length();

	currentToken = START_TOKEN;
	currentTokenValue = "";

	isAutoquoting = false;
	autoquoteChar = ';';
	}

	/**
	* Set whether auto-quoting is on or off.
	*
	* Auto-quoting means that all characters after the first
	* non-whitespace, non-control character up to the auto-quote
	* terminator character or EOI (minus any whitespace immediatley
	* preceeding it) is considered a token.
	*
	* This is required for handling command strings in a mailcap entry.
	*/
	public void setIsAutoquoting(boolean value) {
	isAutoquoting = value;
	}

	/**
	* Retrieve current token.
	*
	* @returns The current token value
	*/
	public int getCurrentToken() {
	return currentToken;
	}

	/*
	* Get a String that describes the given token.
	*/
	public static String nameForToken(int token) {
	String name = "really unknown";

	switch(token) {
	case UNKNOWN_TOKEN:
	name = "unknown";
	break;
	case START_TOKEN:
	name = "start";
	break;
	case STRING_TOKEN:
	name = "string";
	break;
	case EOI_TOKEN:
	name = "EOI";
	break;
	case SLASH_TOKEN:
	name = "'/'";
	break;
	case SEMICOLON_TOKEN:
	name = "';'";
	break;
	case EQUALS_TOKEN:
	name = "'='";
	break;
	}

	return name;
	}

	/*
	* Retrieve current token value.
	*
	* @returns A String containing the current token value
	*/
	public String getCurrentTokenValue() {
	return currentTokenValue;
	}

	/*
	* Process the next token.
	*
	* @returns the next token
	*/
	public int nextToken() {
	if (dataIndex < dataLength) {
	// skip white space
	while ((dataIndex < dataLength) &&
	(isWhiteSpaceChar(data.charAt(dataIndex)))) {
	++dataIndex;
	}

	if (dataIndex < dataLength) {
	// examine the current character and see what kind of token we have
	char c = data.charAt(dataIndex);
	if (isAutoquoting) {
	if (c == ';' \|\| c == '=') {
	currentToken = c;
	currentTokenValue = new Character(c).toString();
	++dataIndex;
	} else {
	processAutoquoteToken();
	}
	} else {
	if (isStringTokenChar(c)) {
	processStringToken();
	} else if ((c == '/') \|\| (c == ';') \|\| (c == '=')) {
	currentToken = c;
	currentTokenValue = new Character(c).toString();
	++dataIndex;
	} else {
	currentToken = UNKNOWN_TOKEN;
	currentTokenValue = new Character(c).toString();
	++dataIndex;
	}
	}
	} else {
	currentToken = EOI_TOKEN;
	currentTokenValue = null;
	}
	} else {
	currentToken = EOI_TOKEN;
	currentTokenValue = null;
	}

	return currentToken;
	}

	private void processStringToken() {
	// capture the initial index
	int initialIndex = dataIndex;

	// skip to 1st non string token character
	while ((dataIndex < dataLength) &&
	isStringTokenChar(data.charAt(dataIndex))) {
	++dataIndex;
	}

	currentToken = STRING_TOKEN;
	currentTokenValue = data.substring(initialIndex, dataIndex);
	}

	private void processAutoquoteToken() {
	// capture the initial index
	int initialIndex = dataIndex;

	// now skip to the 1st non-escaped autoquote termination character
	// XXX - doesn't actually consider escaping
	boolean foundTerminator = false;
	while ((dataIndex < dataLength) && !foundTerminator) {
	char c = data.charAt(dataIndex);
	if (c != autoquoteChar) {
	++dataIndex;
	} else {
	foundTerminator = true;
	}
	}

	currentToken = STRING_TOKEN;
	currentTokenValue =
	fixEscapeSequences(data.substring(initialIndex, dataIndex));
	}

	private static boolean isSpecialChar(char c) {
	boolean lAnswer = false;

	switch(c) {
	case '(':
	case ')':
	case '<':
	case '>':
	case '@':
	case ',':
	case ';':
	case ':':
	case '\\':
	case '"':
	case '/':
	case '[':
	case ']':
	case '?':
	case '=':
	lAnswer = true;
	break;
	}

	return lAnswer;
	}

	private static boolean isControlChar(char c) {
	return Character.isISOControl(c);
	}

	private static boolean isWhiteSpaceChar(char c) {
	return Character.isWhitespace(c);
	}

	private static boolean isStringTokenChar(char c) {
	return !isSpecialChar(c) && !isControlChar(c) && !isWhiteSpaceChar(c);
	}

	private static String fixEscapeSequences(String inputString) {
	int inputLength = inputString.length();
	StringBuffer buffer = new StringBuffer();
	buffer.ensureCapacity(inputLength);

	for (int i = 0; i < inputLength; ++i) {
	char currentChar = inputString.charAt(i);
	if (currentChar != '\\') {
	buffer.append(currentChar);
	} else {
	if (i < inputLength - 1) {
	char nextChar = inputString.charAt(i + 1);
	buffer.append(nextChar);

	// force a skip over the next character too
	++i;
	} else {
	buffer.append(currentChar);
	}
	}
	}

	return buffer.toString();
	}

	private String data;
	private int dataIndex;
	private int dataLength;
	private int currentToken;
	private String currentTokenValue;
	private boolean isAutoquoting;
	private char autoquoteChar;

	/*
	public static void main(String[] args) {
	for (int i = 0; i < args.length; ++i) {
	MailcapTokenizer tokenizer = new MailcapTokenizer(args[i]);

	System.out.println("Original: \|" + args[i] + "\|");

	int currentToken = tokenizer.nextToken();
	while (currentToken != EOI_TOKEN) {
	switch(currentToken) {
	case UNKNOWN_TOKEN:
	System.out.println(" Unknown Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	case START_TOKEN:
	System.out.println(" Start Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	case STRING_TOKEN:
	System.out.println(" String Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	case EOI_TOKEN:
	System.out.println(" EOI Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	case SLASH_TOKEN:
	System.out.println(" Slash Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	case SEMICOLON_TOKEN:
	System.out.println(" Semicolon Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	case EQUALS_TOKEN:
	System.out.println(" Equals Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	default:
	System.out.println(" Really Unknown Token: \|" + tokenizer.getCurrentTokenValue() + "\|");
	break;
	}

	currentToken = tokenizer.nextToken();
	}

	System.out.println("");
	}
	}
	*/
	}