//  UCS2Conv.cpp version 1.5
//  yudit package - Unicode Editor for the X Window System (and Linux) 
//
//  Author: gsinai@iname.com (Gaspar Sinai)
//  GNU Copyright (C) 1997,1998,1999  Gaspar Sinai
// 
//  yudit version 1.5  Copyright(C) 30 November, 1999, Tokyo Japan  Gaspar Sinai
//  yudit version 1.4  Copyright(C) 25 November, 1999, Tokyo Japan  Gaspar Sinai
//  yudit version 1.3  Copyright(C)  5 April,    1999, Tokyo Japan  Gaspar Sinai
//  yudit version 1.2  Copyright(C) 10 December, 1998, Tokyo Japan  Gaspar Sinai
//  yudit version 1.1  Copyright(C) 23 August,   1998, Tokyo Japan  Gaspar Sinai
//  yudit version 1.0  Copyright(C) 17 May,      1998, Tokyo Japan  Gaspar Sinai
//  yudit version 0.99 Copyright(C)  4 April,    1998, Tokyo Japan  Gaspar Sinai
//  yudit version 0.97 Copyright(C)  4 February, 1998, Tokyo Japan  Gaspar Sinai
//  yudit version 0.95 Copyright(C) 10 January,  1998, Tokyo Japan  Gaspar Sinai
//  yudit version 0.94 Copyright(C) 17 December, 1997, Tokyo Japan  Gaspar Sinai
//  yudit version 0.9 Copyright (C)  8 December, 1997, Tokyo Japan  Gaspar Sinai
//  yutex version 0.8 Copyright (C)  5 November, 1997, Tokyo Japan  Gaspar Sinai
//
//  This program is free software; you can redistribute it and/or modify
//  it under the terms of the GNU General Public License as published by
//  the Free Software Foundation; either version 2 of the License, or
//  (at your option) any later version.
//
//  This program is distributed in the hope that it will be useful,
//  but WITHOUT ANY WARRANTY; without even the implied warranty of
//  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
//  GNU General Public License for more details.
//
//  You should have received a copy of the GNU General Public License
//  along with this program; if not, write to the Free Software
//  Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
//

// This file defines several UCS2Converter classes
//

#include "UString.h"
#include "UCS2Conv.h"
#include "UCommon.h"
#include <memory.h>
#include <sys/types.h>
#include <stdlib.h>
#include <string.h>

static unsigned char *allowedChars = 0;
static unsigned char *base64Decode = 0;

//static unsigned char allowedStrict[] = 
//	{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?"};

static unsigned char allowedLoose[] = 
	{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789'(),-./:?!\"#$%&*;<=>@[]^_`{|}"};
static unsigned char base64Code[] =
	{"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"};

#define UNI_MAGIC UCS2(0xfeff)



//------------------------------------------------------------------------------
// DEFAULT converter (UTF8)
//------------------------------------------------------------------------------

//
// lets make a default converter
//
UCS2Converter::UCS2Converter ()
{
	name = new char[strlen (DEFAULT_CONVERTER) +1];
	CHECKNULL (name);
	strcpy (name, DEFAULT_CONVERTER);
}

//
// lets make a default converter
//
UCS2Converter::UCS2Converter (const char *nameIn)
{
	if (nameIn==0)
	{
		name = new char[strlen (DEFAULT_CONVERTER) +1];
		CHECKNULL (name);
		strcpy (name, DEFAULT_CONVERTER);
	}
	else
	{
		name = new char[strlen (nameIn) +1];
		CHECKNULL (name);
		strcpy (name, nameIn);
	}
}

UCS2Converter::~UCS2Converter ()
{
	delete name;
}

void
UCS2Converter::init (const UCS2* input, int _size)
{
	if (_size < 0)
	{
		inputSize = 0;
		while (input[inputSize]!=0) inputSize++;
	}
	else
	{
		inputSize = _size;
	}
	bufferSize = inputSize +2;
	size = 0;
	uchar = new unsigned char [bufferSize];
	CHECKNULL (uchar);
	
}

void
UCS2Converter::init (const unsigned char* input, int _size)
{
	if (_size > 0)
	{
		inputSize = _size;
	}
	else
	{
		inputSize = 0;
		while (input[inputSize]!=0) inputSize++;
	}
	bufferSize = inputSize +2;
	size = 0;
	ucs2 = new UCS2 [bufferSize];
	CHECKNULL (ucs2);
}

void
UCS2Converter::append (const unsigned char in)
{
	unsigned char*	newBuffer;
	if (size>=bufferSize)
	{
		bufferSize += bufferSize/2 ;
		newBuffer = new unsigned char [bufferSize];
		memcpy (newBuffer, uchar, size * sizeof (unsigned char));
		delete uchar;
		uchar = newBuffer;
	}
	uchar[size++] = (unsigned char) in;
}

void
UCS2Converter::append (const UCS2 in)
{
	UCS2*		newBuffer;

/*
	if (in ==0)
	{
		appendQuoted ((UCS2) in);
		return;
	}
*/
	if (size>=bufferSize)
	{
		bufferSize += bufferSize/2;
		newBuffer = new UCS2 [bufferSize];
		memcpy (newBuffer, ucs2, size * sizeof (UCS2));
		delete ucs2;
		ucs2 = newBuffer;
	}
	ucs2[size++] = (UCS2) in;
}

void
UCS2Converter::terminateStr ()
{
	append ((unsigned char) 0);
}

void
UCS2Converter::terminateUCS2 ()
{
	UCS2*		newBuffer;
	if (size>=bufferSize)
	{
		bufferSize += bufferSize/2;
		newBuffer = new UCS2 [bufferSize];
		memcpy (newBuffer, ucs2, size * sizeof (UCS2));
		delete ucs2;
		ucs2 = newBuffer;
	}
	ucs2[size++] = (UCS2) 0;
}

//
// append into UCS2
//
void
UCS2Converter::append32 (const QUOTE32 ucs4)
{
	char 		hexmap[] = {'0', '1', '2', '3', 
			'4', '5', '6', '7', '8', '9', 
			'a', 'b', 'c', 'd', 'e', 'f'};
	append ((UCS2) '\\');
	append ((UCS2) 'U');
	append ((UCS2) hexmap[(ucs4>>28)&0xf]);
	append ((UCS2) hexmap[(ucs4>>24)&0xf]);
	append ((UCS2) hexmap[(ucs4>>20)&0xf]);
	append ((UCS2) hexmap[(ucs4>>16)&0xf]);
	append ((UCS2) hexmap[(ucs4>>12)&0xf]);
	append ((UCS2) hexmap[(ucs4>>8)&0xf]);
	append ((UCS2) hexmap[(ucs4>>4)&0xf]);
	append ((UCS2) hexmap[ucs4&0xf]);
}

//
// append into UCS2
//
void
UCS2Converter::appendQuoted (const UCS2 u2)
{
	char 		hexmap[] = {'0', '1', '2', '3', 
			'4', '5', '6', '7', '8', '9', 
			'a', 'b', 'c', 'd', 'e', 'f'};
	append ((UCS2) '\\');
	append ((UCS2) 'u');
	append ((UCS2) hexmap[(u2>>12)&0xf]);
	append ((UCS2) hexmap[(u2>>8)&0xf]);
	append ((UCS2) hexmap[(u2>>4)&0xf]);
	append ((UCS2) hexmap[u2&0xf]);
}

//
// append into char
//
void
UCS2Converter::append16 (const QUOTE16 q16)
{
	char 		hexmap[] = {'0', '1', '2', '3', 
			'4', '5', '6', '7', '8', '9', 
			'a', 'b', 'c', 'd', 'e', 'f'};
	append ((unsigned char) '\\');
	append ((unsigned char) 'u');
	append ((unsigned char) hexmap[(q16>>12)&0xf]);
	append ((unsigned char) hexmap[(q16>>8)&0xf]);
	append ((unsigned char) hexmap[(q16>>4)&0xf]);
	append ((unsigned char) hexmap[q16&0xf]);
}

//
// append into UCS2
//
void
UCS2Converter::append8 (const QUOTE8 quote)
{
	char            hexmap[] = {'0', '1', '2', '3',
			'4', '5', '6', '7', '8', '9',
			'A', 'B', 'C', 'D', 'E', 'F'};
	append ((UCS2) '=');
	append ((UCS2) (UCS2) hexmap[((unsigned) quote >> 4) & 0xf]);
	append ((UCS2) (UCS2) hexmap[(unsigned) quote & 0xf]);
}

//
// we do not use the extended ucs2 that is
// using the user space - 0xd800 0xdfff
// ucs4 characters in th range of 0x00010000-0xffffffff
// are translated into ucs2 by \Uxxxxxxxx
UCS2*
UCS2Converter::decode (const unsigned char *in, int _size, int *retSize)
{
	int		i;
	UCS2		decoded;
	QUOTE32		quoted;

	init (in, _size);

	for (i=0; i<inputSize; i++)
	{
		// Unexpected continuation bytes
		if (in[i] <= 0xbf && in[i] >= 0x80)
		{
			append8 (in[i]);
			continue;
		}

		if ((in[i] & 0xe0) ==0xc0 && inputSize-i > 1 && (in[i+1] & 0xc0)==0x80 )
		{
			// check - the second 
			decoded = (((unsigned short)(in[i] & 0x1f)) << 6)
					| ((unsigned short) (in[i+1] & 0x3f));
			if (decoded < 0x80)
			{
				appendQuoted ((UCS2) decoded);
			}
			else
			{
				append (decoded);
			}
			i++;
			continue;
		}
		if ((in[i] & 0xf0)==0xe0 && inputSize-i > 2
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80)
		{
			decoded = (((unsigned short) (in[i] & 0x0f)) << 12)
					| (((unsigned short) (in[i+1] & 0x3f))<<6)
					| ((unsigned short) (in[i+2] & 0x3f));
			if (decoded < 0x800)
			{
				appendQuoted ((UCS2) decoded);
			}
			else
			{
				append (decoded);
			}
			i++;
			i++;
			continue;
		}
		if ((in[i] & 0xf8)==0xf0 && inputSize-i > 3
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80 
			&& (in[i+3] & 0xc0)==0x80)
		{
			quoted = (((unsigned int) (in[i] & 0x07)) << 18)
				| (((unsigned int) (in[i+1] & 0x3f))<<12)
				| (((unsigned short)(in[i+2] & 0x3f))<<6)
				| ((unsigned short) (in[i+3] &  0x3f));
			append32 (quoted);
			i++;
			i++;
			i++;
			continue;
		}
		if ((in[i] & 0xfc)==0xf8 && inputSize-i > 4
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80 
			&& (in[i+3] & 0xc0)==0x80 && (in[i+4] & 0xc0)==0x80)
		{
			quoted = (((unsigned int) (in[i] & 0x03)) << 24)
				| (((unsigned int) (in[i+1] & 0x3f)) << 18)
				| (((unsigned int) (in[i+2] & 0x3f))<<12)
				| (((unsigned short) (in[i+3] & 0x3f))<<6)
				| ((unsigned short) (in[i+4] & 0x3f));
			append32 (quoted);
			i++;
			i++;
			i++;
			i++;
			continue;
		}
		if ((in[i] & 0xfe)==0xfc && inputSize-i > 5
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80 
			&& (in[i+3] & 0xc0)==0x80 && (in[i+4] & 0xc0)==0x80
			&& (in[i+5] & 0xc0)==0x80)
		{
			quoted =  (((unsigned int) (in[i] & 0x01)) << 30)
				| (((unsigned int) (in[i+1] & 0x3f)) << 24)
				| (((unsigned int) (in[i+2] & 0x3f)) << 18)
				| (((unsigned int) (in[i+3] & 0x3f))<<12)
				| (((unsigned short)(in[i+4] & 0x3f))<<6)
				| ((unsigned short) (in[i+5] &  0x3f));
			append32 (quoted);
			i++;
			i++;
			i++;
			i++;
			i++;
			continue;
		}

		if (in[i] >= 0x80)
		{
			append8 (in[i]);
			continue;
		}

		// we translate broken utf8 into ucs2 also...
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

//
//
unsigned char*
UCS2Converter::encode (const UCS2 *in, int _size, int* retSize)
{
	int	i;

	init (in, _size);
	for (i=0; i<inputSize; i++)
	{

		// The order is important.
		if (in[i] >= 0x0800)
		{
			append ((unsigned char) (0xe0 | (in[i] >> 12)));
			append ((unsigned char) (0x80 | ((in[i] >> 6) & 0x3f))); 
			append ((unsigned char) (0x80 | (in[i]  & 0x3f))); 
			continue;
		}
		if (in[i] >= 0x80 && in[i] <= 0x07ff)
		{
			append ((unsigned char) (0xc0 | (in[i] >> 6))); 
			append ((unsigned char) (0x80 | (in[i]  & 0x3f))); 
			continue;
		}
		append ((unsigned char) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// UTF7 converter
//------------------------------------------------------------------------------
UUTF7Conv::UUTF7Conv () : UCS2Converter ((const char*) "UTF7")
{
	unsigned int		i;
	if (allowedChars==0)
	{
		allowedChars = new unsigned char[0x80];
		CHECKNULL (allowedChars);
		memset (allowedChars, 0, 0x80);
		for (i=0; i<sizeof (allowedLoose); i++)
		{
			allowedChars[allowedLoose[i]]=1;
		}
	}
	if (base64Decode==0)
	{
		base64Decode = new unsigned char[0x80];
		CHECKNULL (base64Decode);
		memset (base64Decode, 0xff, 0x80);

		// -1 is because of null termination
		for (i=0; i<sizeof (base64Code) -1; i++)
		{
			base64Decode[base64Code[i]] = i;
		}
	}
}
UUTF7Conv::~UUTF7Conv ()
{
}

UCS2*
UUTF7Conv::decode (const unsigned char* in, int _size, int* retSize)
{
	int		i;
	int		start;

	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (inputSize-i > 1 && in[i] == '+' && in[i+1] == '-')
		{
			append ((UCS2) '+');
			i++;
			continue;
		}
		if (in[i] != '+')
		{
			append ((UCS2) in[i]);
			continue;
		}
		i++;
		start = size;
		while (in[i] < 0x80 && base64Decode[in[i]] != 0xff)
		{
			append ((UCS2)in[i]);
			i++;
		}
		// '-' should be absorbed
		if (in[i] != '-') i--;
		mdecode (start);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UUTF7Conv::encode (const UCS2 *in, int _size, int* retSize)
{
	int		i;
	int		start;

	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80 && allowedChars[in[i]]==1)
		{
			append ((unsigned char)in[i]);
			continue;
		}
		if (in[i]=='+')
		{
			append ((unsigned char)'+');
			append ((unsigned char)'-');
			continue;
		}
		if (in[i] <= 0x20)
		{
			append ((unsigned char)in[i]);
			continue;
		}
		// we need to mime encode this.
		append ((unsigned char)'+');
		start=size;
		while (in[i] >= 0x80 || allowedChars[in[i]]==0)
		{
			if (in[i] <= 0x20) break;
			append ((unsigned char)(in[i] >> 8));
			append ((unsigned char)(in[i] & 0xff));
			i++;
		}
		// mime encode from start
		mencode (start);
#if LAZYUTF7
		if (in[i] == 0 || base64Decode[in[i]] != 0xff) 
		{
			append ((unsigned char)'-');
		}
#else
		append ((unsigned char)'-');
#endif
		if (in[i] == 0) break;
		i--;
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//
// decode using the same buffer.
//
void
UUTF7Conv::mdecode (int start)
{
	int 		i;
	UCS2*		in;
	int		length;
	UCS2		uch;
	unsigned char	cch1, cch2;
		
	length=size-start;
	in = new UCS2[length+3]; 
	CHECKNULL (in);
	memcpy (in, &ucs2[start], length * sizeof (UCS2));
	in [length] = base64Code[0];
	in [length+1] = base64Code[0];
	in [length+2] = base64Code[0];
	size = start;
	i=0;
	// The buffer is already aligned
	while (i<length)
	{
		cch1 = (UCS2) (base64Decode[in[i]] <<2)
			| (base64Decode[in[i+1]]>>4);

		cch2= (UCS2) (base64Decode[in[i+1]] <<4)
			| (base64Decode[in[i+2]]>>2);

		uch = (cch1 << 8) |  cch2;
		if (uch!=0) append (uch);
		if (i+1>=length) break;

		cch1 = (UCS2) (base64Decode[in[i+2]] <<6)
			| base64Decode[in[i+3]];
		i+=4;
		cch2 = (UCS2) (base64Decode[in[i]] <<2)
			| (base64Decode[in[i+1]]>>4);

		uch = (cch1 << 8) | cch2;
		if (uch!=0) append (uch);
		if (i>=length) break;

		cch1= (UCS2) (base64Decode[in[i+1]] <<4)
			| (base64Decode[in[i+2]]>>2);
		cch2 = (UCS2) (base64Decode[in[i+2]] <<6)
			| base64Decode[in[i+3]];

		uch = (cch1 << 8) | cch2;
		if (uch!=0) append (uch);
		i+=4;
	}
	delete in;
}

//
// encode using the same buffer.
//
void
UUTF7Conv::mencode (int start)
{
	int 		i;
	unsigned char*	in;
	unsigned char	uchar4[4];
	int		length;
		
	length=size-start;
	in = new unsigned char[length+1]; 
	CHECKNULL (in);
	memcpy (in, &uchar[start], length);
	in[length]=0;
	size = start;
	i=0;
	// The buffer is already aligned
	while (i<length)
	{
		
		uchar4[0] = base64Code[in[i]>>2];
		append (uchar4[0]);

		uchar4[1] = base64Code[((in[i] & 0x03)<<4) | (in[i+1] >> 4)];
		append (uchar4[1]);
		if (i+1>=length) break;

		uchar4[2] = base64Code[((in[i+1] & 0x0f)<<2) | (in[i+2] >> 6)];
		append (uchar4[2]);
		if (i+2>=length) break;

		uchar4[3] = base64Code[in[i+2] & 0x3f];
		append (uchar4[3]);
		i += 3;
	}
	delete in;
}


//------------------------------------------------------------------------------
// Java converter
//------------------------------------------------------------------------------
UJavaConv::UJavaConv () : UCS2Converter ((const char*) "JAVA")
{
}

UJavaConv::~UJavaConv ()
{
}

unsigned char*
UJavaConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i]<0x80)
		{
			append ((unsigned char)in[i]);
			continue;
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

UCS2*
UJavaConv::decode (const unsigned char *in, int _size, int* retSize)
{
	int		i;
	UCS2		decoded;
	QUOTE32		quoted;
	unsigned char*	toOut;

	init (in, _size);

	for (i=0; i<inputSize; i++)
	{
		if (inputSize-i > 5 && in[i] == '\\' && in[i+1] == 'u')
		{
			decoded = (UCS2)  strtol ((const char*) &in[i+2], 
				(char**) &toOut, 16);
			// success
			if (in + i + 6 == toOut && decoded!=0) 
			{
				append (decoded);
				i += 5;
				continue;
			}
			// life goes on..
		}
		//
		// It tries as UTF8
		//
		// Unexpected continuation bytes
		if (in[i] <= 0xbf && in[i] >= 0x80)
		{
			append8 (in[i]);
			continue;
		}

		if ((in[i] & 0xe0) ==0xc0 && inputSize-i > 1 && (in[i+1] & 0xc0)==0x80 )
		{
			// check - the second 
			decoded = (((unsigned short)(in[i] & 0x1f)) << 6)
					| ((unsigned short) (in[i+1] & 0x3f));
			if (decoded < 0x80)
			{
				appendQuoted ((UCS2) decoded);
			}
			else
			{
				append (decoded);
			}
			i++;
			continue;
		}
		if ((in[i] & 0xf0)==0xe0 && inputSize-i > 2
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80)
		{
			decoded = (((unsigned short) (in[i] & 0x0f)) << 12)
					| (((unsigned short) (in[i+1] & 0x3f))<<6)
					| ((unsigned short) (in[i+2] & 0x3f));
			if (decoded < 0x800)
			{
				appendQuoted ((UCS2) decoded);
			}
			else
			{
				append (decoded);
			}
			i++;
			i++;
			continue;
		}
		if ((in[i] & 0xf8)==0xf0 && inputSize-i > 3
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80 
			&& (in[i+3] & 0xc0)==0x80)
		{
			quoted = (((unsigned int) (in[i] & 0x07)) << 18)
				| (((unsigned int) (in[i+1] & 0x3f))<<12)
				| (((unsigned short)(in[i+2] & 0x3f))<<6)
				| ((unsigned short) (in[i+3] &  0x3f));
			append32 (quoted);
			i++;
			i++;
			i++;
			continue;
		}
		if ((in[i] & 0xfc)==0xf8 && inputSize-i > 4
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80 
			&& (in[i+3] & 0xc0)==0x80 && (in[i+4] & 0xc0)==0x80)
		{
			quoted = (((unsigned int) (in[i] & 0x03)) << 24)
				| (((unsigned int) (in[i+1] & 0x0f)) << 18)
				| (((unsigned int) (in[i+2] & 0x3f))<<12)
				| (((unsigned short) (in[i+3] & 0x3f))<<6)
				| ((unsigned short) (in[i+4] & 0x3f));
			append32 (quoted);
			i++;
			i++;
			i++;
			i++;
			continue;
		}
		if ((in[i] & 0xfe)==0xfc && inputSize-i > 5
			&& (in[i+1] & 0xc0)==0x80 && (in[i+2] & 0xc0)==0x80 
			&& (in[i+3] & 0xc0)==0x80 && (in[i+4] & 0xc0)==0x80
			&& (in[i+5] & 0xc0)==0x80)
		{
			quoted =  (((unsigned int) (in[i] & 0x01)) << 30)
				| (((unsigned int) (in[i+1] & 0x3f)) << 24)
				| (((unsigned int) (in[i+2] & 0x3f)) << 18)
				| (((unsigned int) (in[i+3] & 0x3f))<<12)
				| (((unsigned short)(in[i+4] & 0x3f))<<6)
				| ((unsigned short) (in[i+5] &  0x3f));
			append32 (quoted);
			i++;
			i++;
			i++;
			i++;
			i++;
			continue;
		}

		if (in[i] >= 0x80)
		{
			append8 (in[i]);
			continue;
		}
		// we translate broken utf8 into ucs2 also...
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

//------------------------------------------------------------------------------
// JIS converter
//------------------------------------------------------------------------------

UJISConv::UJISConv () : UCS2Converter ((const char*) "JIS")
{
	mJIS0201=0;
	mJIS0208=0;
	mJIS0212=0;
	initialized=0;
}

UJISConv::UJISConv (const char *name_) : UCS2Converter (name_)
{
	mJIS0201=0;
	mJIS0208=0;
	mJIS0212=0;
	initialized=0;
}

UJISConv::~UJISConv ()
{
	if (mJIS0201) delete mJIS0201;
	if (mJIS0208) delete mJIS0208;
	if (mJIS0212) delete mJIS0212;
}

void
UJISConv::initialize ()
{
	if (initialized) return;

	mJIS0201 = new UMap ();
	CHECKNULL (mJIS0201);
	if (mJIS0201->rename("JIS0201") != UMap::OK)
	{
		cerr << "error: can not get JIS0201 umap.\n";
		delete mJIS0201;
		mJIS0201=0;
	}
	mJIS0208 = new UMap ();
	CHECKNULL (mJIS0208);
	if (mJIS0208->rename("JIS0208") != UMap::OK)
	{
		cerr << "error: can not get JIS0208 umap.\n";
		delete mJIS0208;
		mJIS0208=0;
	}
	mJIS0212 = new UMap ();
	CHECKNULL (mJIS0212);
	if (mJIS0212->rename("JIS0212") != UMap::OK)
	{
		cerr << "error: can not get JIS0212 umap.\n";
		delete mJIS0212;
		mJIS0212=0;
	}
	initialized=1;
}

UCS2*
UJISConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UMap		*current=0;
	UCS2		got;
	int		i;
	int		katakana=0;

	initialize (); 
	init (in, _size);
	current = 0;
	for (i=0; i<inputSize; i++)
	{
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '$' 
			&& in[i+2] == '@')
		{
			current=mJIS0208; // JIS C 6226-1978
			i++; i++;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '$' 
			&& in[i+2] == 'B')
		{
			current=mJIS0208; // JIS X 0X208-1983
			i++; i++;
			continue;
		} 
		if (inputSize -i > 5 && in[i] == ESC && in[i+1] == '&' 
			&& in[i+2] == '@' && in[i+3] == ESC && in[i+4] == '$' 
				&& in[i+5] == 'B')
		{
			current=mJIS0208; // JIS X 0X208-1990
			i++; i++; i++; i++; i++;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'J')
		{
			current=mJIS0201;  // JIS Roman
			katakana=0;
			i++; i++;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'H')
		{
			current=mJIS0201; // JIS Roman - old bad escape
			katakana=0;
			i++; i++;
			continue;
		}
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'I')
		{
			current=mJIS0201; // Half width katakana
			katakana=1;
			i++; i++;
			continue;
		}
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'B')
		{
			current=0; // ASCII
			i++; i++;
			continue;
		}
		if (inputSize -i > 3 && in[i] == ESC && in[i+1] == '$' 
			&& in[i+2] == '(' && in[i+3] == 'D')
		{
			current=mJIS0212; // JIS X 0212-1990
			i++; i++; i++;
			continue;
		}

		// It should not happen but it does.
		if (in[i] < ' ') current=0;

		if (current) 
		{
			if (current==mJIS0201)
			{
				// Tyr katakana first, then jis roman.
				got=(katakana)
				    ? current->decode ((UCS2)(in[i]|0x80))
				    :  current->decode ((UCS2)(in[i]));
			}
			else
			{
				if (inputSize-i > 1)
				{
					got = current->decode ((UCS2)in[i] << 8 | in[i+1]);
				}
				else
				{
					got = 0;
				}
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			if (current!=mJIS0201) i++;
			continue;
		}
		append ((UCS2) in[i]);
		continue;
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

//
// Hmm.... ESC [$] ) .. dollar is missing here...
//
unsigned char*
UJISConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	UMap		*current=0;
	int		i;
	int		katakana=0;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			if (current != 0)
			{
				append ((unsigned char) ESC);
				append ((unsigned char) '(');
				append ((unsigned char) 'B');
			}
			current=0;
			append ((unsigned char) in[i]);
			continue;
		}
		if (mJIS0201!=0 && (got=mJIS0201->encode ((UCS2)in[i])) != 0)
		{
			if (got > 0xa0 && got < 0xff) 
			{
				if (current != mJIS0201 || katakana!= 1)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '(');
					append ((unsigned char) 'I');
					current = mJIS0201;
				}
				current = mJIS0201;
				katakana=1;
				append ((unsigned char) (got&0x7f));
				continue;
			}
			if (got < 0x7f)
			{
				// Roman
				if (current != mJIS0201 || katakana!= 1)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '(');
					append ((unsigned char) 'J');
					current = mJIS0201;
				}
				katakana=0;
				append ((unsigned char) got);
				continue;
			}
		}
		if (mJIS0208!=0 && (got=mJIS0208->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0x2000 && (got&0xff00) < 0x8f00
				&& (got&0xff) > 0x20 && (got&0xff) < 0x8f) 
			{
				if (current != mJIS0208)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '$');
					append ((unsigned char) 'B');
					current = mJIS0208;
				}
				append ((unsigned char) ((got&0xff00)>>8));
				append ((unsigned char) (got&0xff));
				continue;
			}
		}
		if (mJIS0212!=0 && (got=mJIS0212->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0x2000 && (got&0xff00) < 0x8f00
				&& (got&0xff) > 0x20 && (got&0xff) < 0x8f) 
			{
				if (current != mJIS0212)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '$');
					append ((unsigned char) '(');
					append ((unsigned char) 'D');
					current = mJIS0212;
				}
				append ((unsigned char) ((got&0xff00)>>8));
				append ((unsigned char) (got&0xff));
				continue;
			}
		}
		if (current != 0)
		{
			append ((unsigned char) ESC);
			append ((unsigned char) '(');
			append ((unsigned char) 'B');
			current = 0;
		}
		append16 ((const QUOTE16) in[i]);
	}
	if (current != 0)
	{
		append ((unsigned char) ESC);
		append ((unsigned char) '(');
		append ((unsigned char) 'B');
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// SHIFT-JIS converter.
//------------------------------------------------------------------------------

USJISConv::USJISConv () : UCS2Converter ((const char*) "SJIS")
{
	mJIS0201=0;
	mJIS0208=0;
	initialized=0;
}

USJISConv::~USJISConv ()
{
	if (mJIS0201) delete mJIS0201;
	if (mJIS0208) delete mJIS0208;
}

void
USJISConv::initialize ()
{
	if (initialized) return;

	mJIS0201 = new UMap ();
	CHECKNULL (mJIS0201);
	if (mJIS0201->rename("JIS0201") != UMap::OK)
	{
		cerr << "error: can not get JIS0201 umap.\n";
		delete mJIS0201;
		mJIS0201=0;
	}
	mJIS0208 = new UMap ();
	CHECKNULL (mJIS0208);
	if (mJIS0208->rename("JIS0208") != UMap::OK)
	{
		cerr << "error: can not get JIS0208 umap.\n";
		delete mJIS0208;
		mJIS0208=0;
	}
	initialized=1;
}

UCS2*
USJISConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	UCS2		code;
	int		i;
	unsigned short	adjust;
	unsigned short	rowOffset;
	unsigned short	cellOffset;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		// Half-width katakana
		if (in[i] >= 0xa1 && in[i] <= 0xdf)
		{
			code = in[i];
			got = mJIS0201->decode (code);
			if (got ==0)
			{
				append8 ((QUOTE8) in[i]);
			}
			else
			{
				append (got);
			}
			continue;
		}

		// JIS
		if (inputSize -i > 1 && 
			((in[i] >= 0x81 && in[i] <= 0x9f) 
			  || (in[i] >= 0xe0 && in[i] <= 0xef))
		 && 
			((in[i+1] >= 0x40 && in[i+1] <= 0x7e) 
			  || (in[i+1] >= 0x80 && in[i+1] <= 0xfc)) )
			
		{
			adjust = in[i+1] < 159;
			rowOffset = in[i] < 160 ? 112 : 176;
			cellOffset = adjust ? (in[i+1]>127 ? 32 : 31) : 126;

			code = ((((in[i] - rowOffset) << 1) - adjust) << 8)
				| (in[i+1] - cellOffset);

			got = mJIS0208->decode (code);
			if (got ==0)
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			else
			{
				append (got);
			}
			i++;
			continue;
		} 

		// User defined area
		if (inputSize -i > 1 && 
			((in[i] >= 0xf0 && in[i] <= 0xfc) 
			  || (in[i] >= 0x40 && in[i] <= 0x7e))
		 && 
			((in[i] >= 0xf0 && in[i] <= 0xfc) 
			  || (in[i+1] >= 0x80 && in[i+1] <= 0xfc)) )
			
		{
			append8 ((QUOTE8) in[i]);
			append8 ((QUOTE8) in[i+1]);
			i++;
			continue;
		}

		// MAC - backslash
		if (in[i] == 0x80)
		{
			append ((UCS2) '\\');
			continue;
		}
		// MAC - copyright
		if (in[i] == 0xfd)
		{
			append ((UCS2) 0xa9);
			continue;
		}
		// MAC - tm
		if (in[i] == 0xfe)
		{
			append ((UCS2) 0x2122);
			continue;
		}
		// MAC - ... horizontal ellipsis
		if (in[i] == 0xff)
		{
			append ((UCS2) 0x2026);
			continue;
		}

		if (in[i] > 0x80)
		{
			append8 ((QUOTE8) in[i]);
			continue;
		}
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
USJISConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	unsigned short	rowOffset;
	unsigned short	cellOffset;
	unsigned char	c1;
	unsigned char	c2;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (mJIS0201!=0 && (got=mJIS0201->encode ((UCS2)in[i])) != 0)
		{
			if (got > 0xa0 && got < 0xff) 
			{
				append ((unsigned char) got);
				continue;
			}
			// Roman
			append ((unsigned char) got);
			continue;
		}
		if (mJIS0208!=0 && (got=mJIS0208->encode ((UCS2)in[i])) != 0)
		{
			c1 = got >> 8;
			c2 = got & 0xff;
			
			rowOffset =  (c1 < 95) ? 112 : 176;
			cellOffset = c1 % 2 ? (c2 > 95 ? 32 : 31 ) : 126;
			c1 = ((c1+1) >> 1) + rowOffset;
			c2 = c2 + cellOffset;
			if ( ((c1 >= 0x81 && c1 <= 0x9f)
				|| (c1 >= 0xe0 &&  c1 <= 0xef))
			    &&
				((c2 >= 0x40 && c2 <= 0x9f)
				|| (c2 >= 0x80 && c2 <= 0xfc)) )
			{
				append (c1);
				append (c2);
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// EUC - Japan
//------------------------------------------------------------------------------
UEUC_JPConv::UEUC_JPConv () : UJISConv ((const char*) "EUC-JP")
{
}
UEUC_JPConv::~UEUC_JPConv ()
{
}

UCS2*
UEUC_JPConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		got=0;
		if (inputSize -i > 1 && in[i] == 0x8e && in[i+1] > 0xa0
			&& in[i+1] < 0xff)
		{
			// JISC 6226-1978 half width katakana
			if (mJIS0201!=0)
			{
				got = mJIS0201->decode ((UCS2)in[i+1]);
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		} 
		if (inputSize -i > 1 && in[i] > 0xa0 && in[i] < 0xff
			&& in[i+1] > 0xa0 && in[i+1] < 0xff)
		{
			// JIS X 0X208-1990
			if (mJIS0208!=0)
			{
				got = mJIS0208->decode 
					(((UCS2)(in[i]&0x7f) << 8)
					| (in[i+1]&0x7f) );
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == 0x8f 
			&& in[i+1] > 0xa0 && in[i+1] < 0xff
			&& in[i+2] > 0xa0 && in[i+2] < 0xff)
		{
			// JIS X 0212-1990
			if (mJIS0212!=0)
			{
				got = mJIS0212->decode 
					(((UCS2)(in[i+1]&0x7f) << 8)
					| (in[i+2]&0x7f) );
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
				append8 ((QUOTE8) in[i+2]);
			}
			i++; i++;
			continue;
		} 
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UEUC_JPConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (mJIS0201!=0 && (got=mJIS0201->encode ((UCS2)in[i])) != 0)
		{
			if (got > 0xa0 && got < 0xff) 
			{
				append ((unsigned char) 0x8e);
				append ((unsigned char) got);
				continue;
			}
		}
		if (mJIS0208!=0 && (got=mJIS0208->encode ((UCS2)in[i])) != 0)
		{
			got |= 0x8080;
			if ((got&0xff00) > 0xa000 && (got&0xff00) < 0xff00
				&& (got&0xff) > 0xa0 && (got&0xff) < 0xff) 
			{
				append ((unsigned char) ((got&0xff00)>>8));
				append ((unsigned char) (got&0xff));
				continue;
			}
		}
		if (mJIS0212!=0 && (got=mJIS0212->encode ((UCS2)in[i])) != 0)
		{
			got |= 0x8080;
			if ((got&0xff00) > 0xa000 && (got&0xff00) < 0xff00
				&& (got&0xff) > 0xa0 && (got&0xff) < 0xff) 
			{
				append ((unsigned char) 0x8f);
				append ((unsigned char) ((got&0xff00)>>8));
				append ((unsigned char) (got&0xff));
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// COMPUND_TEXT Japanese  conversion  - this is a rather stripped down version
//------------------------------------------------------------------------------
CTEXT_JAConv::CTEXT_JAConv () : UJISConv ((const char*) "CTEXT_JA")
{
}
CTEXT_JAConv::~CTEXT_JAConv ()
{
}

UCS2*
CTEXT_JAConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UMap		*current=0;
	UCS2		got;
	int		i;
	int		right;

	initialize (); 
	init (in, _size);
	current = 0;
	right = 0;
	for (i=0; i<inputSize; i++)
	{
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == ')' 
			&& in[i+2] == 'I')
		{
			current=mJIS0201; // JIS X 0X201-1976 right  -katakana
			right = 1;
			i++; i++;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'J')
		{
			current=mJIS0201; // JIS X 0X201-1976 left half -roman
			right = 0;
			i++; i++; 
			continue;
		} 
		if (inputSize -i > 3 && in[i] == ESC && in[i+1] == '$' 
			&& in[i+2] == ')' && in[i+3] == 'B')
		{
			current=mJIS0208; // JIS X 0X208-1990
			right = 1;
			i++; i++; i++; 
			continue;
		} 
		if (inputSize -i > 3 && in[i] == ESC && in[i+1] == '$' 
			&& in[i+2] == '(' && in[i+3] == 'D')
		{
			current=mJIS0212; // JIS X JIS0212-1990
			right = 0;
			i++; i++; i++; 
			continue;
		} 
		//
		// Kterm has the habit of setting GR instead of GL
		//
		if (inputSize -i > 3 && in[i] == ESC && in[i+1] == '$' 
			&& in[i+2] == ')' && in[i+3] == 'D')
		{
			current=mJIS0212; // JIS X JIS0212-1990
			right = 0;
			i++; i++; i++; 
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC  && in[i+1] == '(' 
			&& in[i+2] == 'B')
		{
			current=0; // ASCII
			right = 1;
			i++; i++;
			continue;
		} 
		// G0 and G1 in an 8-bit env
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == ' ' 
			&& in[i+2] == 'C')
		{
			i++; i++; 
			continue;
		} 
		// In 8 bit C1 is 8 bits
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == ' ' 
			&& in[i+2] == 'G')
		{
			i++; i++; 
			continue;
		} 
		// In 8 bit C1 is 8 bits
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == ' ' 
			&& in[i+2] == 'I')
		{
			i++; i++; 
			continue;
		} 
		// ASCII is G0
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'B')
		{
			i++; i++; 
			continue;
		} 
		// Right ISO latin is G1
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '-' 
			&& in[i+2] == 'A')
		{
			i++; i++; 
			continue;
		} 
		// Left to right text
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '1' 
			&& in[i+2] == ']')
		{
			i++; i++; 
			continue;
		} 
		// right to left text
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '2' 
			&& in[i+2] == ']')
		{
			i++; i++; 
			continue;
		} 
		// end of text
		if (inputSize -i > 1 && in[i] == ESC && in[i+1] == ']' )
		{
			i++;  
			break;
		} 

		// It should not happen but it does.
		// Removed because kterm assumes that conversion did not 
		// change. This is wrong, but let's respect kterm.
		//if (in[i] < ' ') current=0;
		if (current) 
		{
			if (current==mJIS0201)
			{
				// katakana should have high bit set
				if (right!=0 && in[i] < 0x80)
				{
					// escaped ASCII
					got = (UCS2) in[i];
					append ((UCS2) got);
					continue;
				}
				else
				{
					got = current->decode ((UCS2)(in[i]));
				}
			}
			else
			{
				// GR should have high bit set
				if (right!=0 && in[i] < 0x80)
				{
					// escaped ASCII
					got = (UCS2) in[i];
					append ((UCS2) got);
					continue;
				}

				if (inputSize-i > 1)
				{
					got = current->decode (0x7f7f &
					((UCS2)in[i] << 8 | in[i+1]));
				}
				else
				{
					got = 0;
				}
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			if (current!=mJIS0201) i++;
			continue;
		}
		append ((UCS2) in[i]);
		continue;
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
CTEXT_JAConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	UMap		*current=0;
	int		i;
	int		katakana=0;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			if (current != 0)
			{
				append ((unsigned char) ESC);
				append ((unsigned char) '(');
				append ((unsigned char) 'B');
			}
			current=0;
			append ((unsigned char) in[i]);
			continue;
		}
		if (mJIS0201!=0 && (got=mJIS0201->encode ((UCS2)in[i])) != 0)
		{
			if (got > 0xa0 && got < 0xff) 
			{
				if (current != mJIS0201 || katakana!= 1)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) ')');
					append ((unsigned char) 'I');
					current = mJIS0201;
				}
				current = mJIS0201;
				katakana=1;
				append ((unsigned char) got);
				continue;
			}
			if (got < 0x7f)
			{
				// Roman
				if (current != mJIS0201 || katakana!= 1)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '(');
					append ((unsigned char) 'J');
					current = mJIS0201;
				}
				katakana=0;
				append ((unsigned char) got);
				continue;
			}
		}
		if (mJIS0208!=0 && (got=mJIS0208->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0x2000 && (got&0xff00) < 0x8f00
				&& (got&0xff) > 0x20 && (got&0xff) < 0x8f) 
			{
				if (current != mJIS0208)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '$');
					append ((unsigned char) ')');
					append ((unsigned char) 'B');
					current = mJIS0208;
				}
				// GR should have high bit set
				append ((unsigned char) ((got|0x8000)>>8));
				append ((unsigned char) (got|0x80));
			
				continue;
			}
		}
		if (mJIS0212!=0 && (got=mJIS0212->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0x2000 && (got&0xff00) < 0x8f00
				&& (got&0xff) > 0x20 && (got&0xff) < 0x8f) 
			{
				if (current != mJIS0212)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '$');
					append ((unsigned char) '(');
					append ((unsigned char) 'D');
					current = mJIS0212;
				}
				append ((unsigned char) ((got&0x7f00)>>8));
				append ((unsigned char) (got&0x7f));

				continue;
			}
		}
		if (current != 0)
		{
			append ((unsigned char) ESC);
			append ((unsigned char) '(');
			append ((unsigned char) 'B');
			current = 0;
		}
		append16 ((const QUOTE16) in[i]);
	}
	if (current != 0)
	{
		append ((unsigned char) ESC);
		append ((unsigned char) '(');
		append ((unsigned char) 'B');
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// GB2312_7 converter
//------------------------------------------------------------------------------

UGB2312_7Conv::UGB2312_7Conv () : UCS2Converter ((const char*) "GB2312_7")
{
	mGB2312=0;
	initialized=0;
}

UGB2312_7Conv::UGB2312_7Conv (const char *name_) : UCS2Converter (name_)
{
	mGB2312=0;
	initialized=0;
}

UGB2312_7Conv::~UGB2312_7Conv ()
{
	if (mGB2312) delete mGB2312;
}

void
UGB2312_7Conv::initialize ()
{
	if (initialized) return;

	mGB2312 = new UMap ();
	CHECKNULL (mGB2312);
	if (mGB2312->rename("GB2312L") != UMap::OK)
	{
		cerr << "error: can not get GB2312L umap.\n";
		delete mGB2312;
		mGB2312=0;
	}
	initialized=1;
}

//
// 7 Bit GB2312_7
//
UCS2*
UGB2312_7Conv::decode (const unsigned char* in, int _size, int* retSize)
{
	UMap		*current=0;
	UCS2		got;
	int		i;
	int		gbRoman=0;

	initialize (); 
	init (in, _size);
	current = 0;
	for (i=0; i<inputSize; i++)
	{
		if (inputSize -i > 3 && in[i] == ESC 
			&& in[i+1] == '$' 
			&& in[i+2] == '(' 
			&& in[i+3] == 'A')
		{
			current=mGB2312;  // Chinese
			i++; i++; i++;
			gbRoman=0;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'B')
		{
			current=0; // ASCII
			i++; i++;
			gbRoman=0;
			continue;
		}
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'T')
		{
			current=mGB2312; // GB Roman
			i++; i++;
			gbRoman=1;
			continue;
		}

		// It should not happen but it does.
		if (in[i] < ' ') current=0;
		if (current!=0) 
		{
			got = (inputSize-i > 1) ? 
				current->decode (((UCS2)in[i]<< 8) | in[i+1]): 
				0;
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		}
		append ((UCS2) in[i]);
		continue;
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UGB2312_7Conv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	UMap		*current=0;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			if (current != 0)
			{
				append ((unsigned char) ESC);
				append ((unsigned char) '(');
				append ((unsigned char) 'B');
			}
			current=0;
			append ((unsigned char) in[i]);
			continue;
		}

		if (mGB2312 != 0 && (got=mGB2312->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0x2000 && (got&0xff00) < 0x7f00
				&& (got&0xff) > 0x20 && (got&0xff) < 0x7f) 
			{
				if (current==0)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '$');
					append ((unsigned char) '(');
					append ((unsigned char) 'A');
				}
				current=mGB2312;
				append ((unsigned char) ((got&0x7f00)>>8));
				append ((unsigned char) (got&0x7f));
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	// Change it to roman
	if (current != 0)
	{
		append ((unsigned char) ESC);
		append ((unsigned char) '(');
		append ((unsigned char) 'B');
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// GB2312_8 - 8 Bit Chinese
//------------------------------------------------------------------------------
UGB2312_8Conv::UGB2312_8Conv () : UGB2312_7Conv ((const char*) "GB2312_8")
{
}

UGB2312_8Conv::~UGB2312_8Conv ()
{
}

UCS2*
UGB2312_8Conv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		got=0;
		if (inputSize -i > 1 && in[i] > 0xa0 && in[i] < 0xff
			&& in[i+1] > 0xa0 && in[i+1] < 0xff)
		{
			// GB2312_7
			if (mGB2312!=0)
			{
				got = mGB2312->decode 
					((UCS2)((in[i] & 0x7f) << 8)
					| (in[i+1] & 0x7f) );
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		} 
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UGB2312_8Conv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (mGB2312!=0 && (got=mGB2312->encode ((UCS2)in[i])) != 0)
		{
			got |= 0x8080;
			if ((got&0xff00) > 0xa000 && (got&0xff00) < 0xff00
				&& (got&0xff) > 0xa0 && (got&0xff) < 0xff) 
			{
				append ((unsigned char) ((got>>8) | 0x80));
				append ((unsigned char) ((got&0xff) | 0x80));
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}


//------------------------------------------------------------------------------
// HZ - 7 Bit Chinese portable
//------------------------------------------------------------------------------
UHZConv::UHZConv () : UGB2312_7Conv ((const char*) "HZ")
{
}

UHZConv::~UHZConv ()
{
}

UCS2*
UHZConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	UMap		*current=0;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		got=0;
		if (current==0 && inputSize -i > 1 
			&& in[i] == '~' && in[i+1] == '~')
		{
			append ((UCS2) in[i]);
			i++; 
			continue;
		}
		if (inputSize -i > 1 && in[i] == '~' && in[i+1] == '\n')
		{
			i++; 
			continue;
		}
		if (current==0 && inputSize -i > 1 && in[i] == '~'
			&& in[i+1] == '{')
		{
			current = mGB2312;
			i++;
			continue;
		}
		if (current!=0 && inputSize -i > 1 && in[i] == '~'
			&& in[i+1] == '}')
		{
			current = 0;
			i++;
			continue;
		}
	
		if (current!=0 && inputSize -i > 1)
		{
			got = current->decode ((UCS2)(in[i] << 8) | in[i+1]);
			if (got != 0)
			{
				append (got);
				i++;
				continue;
			}
		}
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UHZConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	UMap		*current=0;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			if (current != 0)
			{
				append ((unsigned char) '~');
				append ((unsigned char) '}');
				current=0;
			}
			append ((unsigned char) in[i]);
			continue;
		}
		if (mGB2312!=0 && (got=mGB2312->encode ((UCS2)in[i])) != 0)
		{
			if (current == 0)
			{
				append ((unsigned char) '~');
				append ((unsigned char) '{');
				current=mGB2312;
			}
			if ((got&0x7f00) > 0x2000 && (got&0x7f00) < 0x7f00
				&& (got&0x7f) > 0x20 && (got&0x7f) < 0x7f) 
			{
				append ((unsigned char) ((got>>8) & 0x7f));
				append ((unsigned char) ((got&0xff) & 0x7f));
				continue;
			}
			append ((unsigned char) '~');
			append ((unsigned char) '}');
			current=0;
		}
		append16 ((const QUOTE16) in[i]);
	}
	if (current != 0)
	{
		append ((unsigned char) '~');
		append ((unsigned char) '}');
		current=0;
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// KSC5601 converter
//------------------------------------------------------------------------------

UKSC5601Conv::UKSC5601Conv () : UCS2Converter ((const char*) "CTEXT_KR")
{
	mKSC5601=0;
	initialized=0;
}

UKSC5601Conv::UKSC5601Conv (const char *name_) : UCS2Converter (name_)
{
	mKSC5601=0;
	initialized=0;
}

UKSC5601Conv::~UKSC5601Conv ()
{
	if (mKSC5601) delete mKSC5601;
}

void
UKSC5601Conv::initialize ()
{
	if (initialized) return;

	mKSC5601 = new UMap ();
	CHECKNULL (mKSC5601);
	if (mKSC5601->rename("KSC5601R") != UMap::OK)
	{
		cerr << "error: can not get KSC5601R umap.\n";
		delete mKSC5601;
		mKSC5601=0;
	}
	initialized=1;
}

//
// 7 Bit KSC5601
//
UCS2*
UKSC5601Conv::decode (const unsigned char* in, int _size, int* retSize)
{
	UMap		*current=0;
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	current = 0;
	for (i=0; i<inputSize; i++)
	{
		if (inputSize -i > 3 && in[i] == ESC 
			&& in[i+1] == '$' 
			&& in[i+2] == '(' 
			&& in[i+3] == 'C')
		{
			current=mKSC5601;  // Korean
			i++; i++; i++;
			continue;
		} 
		if (inputSize -i > 2 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'B')
		{
			current=0; // ASCII
			i++; i++;
			continue;
		}
		if (inputSize -i > 3 && in[i] == ESC && in[i+1] == '(' 
			&& in[i+2] == 'A' && in[i+3] > 0x20 && in[i+3] <0x7f)
		{
			current=0; // 1 byte ASCII
			i++; i++; i++;
			append ((UCS2) in[i]);
			continue;
		}

		// It should not happen but it does.
		if (in[i] < ' ') current=0;

		if (current!=0) 
		{
			if ( inputSize-i > 1 && in[i]>0x20 && in[i] < 0x7F
				&& in[i+1] > 0x20 && in[i+1] < 0x7F)
			{
				got = current->decode (((((UCS2)in[i]<< 8) | 
				in[i+1]) | 0x8080 )) ;
			}
			else
			{
				got = 0;
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		}
		append ((UCS2) in[i]);
		continue;
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UKSC5601Conv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	UMap		*current=0;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			if (current != 0)
			{
				append ((unsigned char) ESC);
				append ((unsigned char) '(');
				append ((unsigned char) 'B');
			}
			current=0;
			append ((unsigned char) in[i]);
			continue;
		}

		if (mKSC5601 != 0 && (got=mKSC5601->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0xa000 && (got&0xff00) < 0xff00
				&& (got&0xff) > 0xa0 && (got&0xff) < 0xff) 
			{
				if (current==0)
				{
					append ((unsigned char) ESC);
					append ((unsigned char) '$');
					append ((unsigned char) '(');
					append ((unsigned char) 'C');
				}
				current=mKSC5601;
				append ((unsigned char) ((got&0x7f00)>>8));
				append ((unsigned char) (got&0x7f));
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	if (current != 0)
	{
		append ((unsigned char) ESC);
		append ((unsigned char) '(');
		append ((unsigned char) 'B');
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// EUC - Korean
//------------------------------------------------------------------------------
UEUC_KRConv::UEUC_KRConv () : UKSC5601Conv ((const char*) "EUC-KR")
{
}

UEUC_KRConv::~UEUC_KRConv ()
{
}

UCS2*
UEUC_KRConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		got=0;
		if (inputSize -i > 1 && in[i] > 0xa0 && in[i] < 0xff
			&& in[i+1] > 0xa0 && in[i+1] < 0xff)
		{
			// KSC5601
			if (mKSC5601!=0)
			{
				got = mKSC5601->decode 
					((UCS2)((in[i] & 0xff) << 8)
					| (in[i+1] & 0xff) );
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		} 
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UEUC_KRConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (mKSC5601!=0 && (got=mKSC5601->encode ((UCS2)in[i])) != 0)
		{
			// got |= 0x8080;
			if ((got&0xff00) > 0xa000 && (got&0xff00) < 0xff00
				&& (got&0xff) > 0xa0 && (got&0xff) < 0xff) 
			{
				append ((unsigned char) ((got>>8) | 0x80));
				append ((unsigned char) ((got&0xff) | 0x80));
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// UHC - MS-Windows-Korean : extended EUC-KR , Unified Hangul Code, CP949
//------------------------------------------------------------------------------
UUHCConv::UUHCConv () : UKSC5601Conv ((const char*) "UHC")
{
}

UUHCConv::~UUHCConv ()
{
}

UCS2*
UUHCConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		got=0;
		//
		// patch from Jungshik Shin 9 Nov 1998
		//
                if (inputSize -i > 1 && ( in[i] > 0xa0 && in[i] < 0xff
                        && in[i+1] > 0xa0 && in[i+1] < 0xff)     ||
                        ( in[i] > 0x80 && in[i] < 0xc6  &&   
                          (in[i+1] > 0x40 && in[i+1] < 0x5b ||
                           in[i+1] > 0x60 && in[i+1] < 0x7b ||
                           in[i+1] > 0x80 && in[i+1] < 0xff  ) ) ||
                        ( in[i] == 0xc6 && in[i+1] > 0x40 && in[i+1] < 0x53) )

                {
			// KSC5601
			if (mKSC5601!=0)
			{
				got = mKSC5601->decode 
					((UCS2)((in[i] & 0xff) << 8)
					| (in[i+1] & 0xff) );
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		} 
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UUHCConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (mKSC5601!=0 && (got=mKSC5601->encode ((UCS2)in[i])) != 0)
		{
			append ((unsigned char) (got>>8) );
			append ((unsigned char) (got&0xff) );
			continue;
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// JOHAB converter : KS C 5601-1992, Annex 3, supplementary encoding
//------------------------------------------------------------------------------

UJOHABConv::UJOHABConv () : UKSC5601Conv ((const char*) "JOHAB")
{
}

UJOHABConv::~UJOHABConv ()
{
}


UCS2*
UJOHABConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	UCS2		code;
	int		i;
	int		idx;

/* 
   The table for Bit pattern to Hangul Jamo
   5 bits each are used to encode
   leading consonants(19 + 1 filler),medial vowels(21 + 1 filler) 
   and trailing consonants(27 + 1 filler). 

   KS C 5601-1992 Annex 3 Table 2 
   0 : Filler, -1: invalid, >= 1 : valid
*/
	const int lead[32] =
	{-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
 	19, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1};
	const int mid[32] =
	{-1, -1, 0, 1, 2, 3, 4, 5,
 	-1, -1, 6, 7, 8, 9, 10, 11,
 	-1, -1, 12, 13, 14, 15, 16, 17,
 	-1, -1, 18, 19, 20, 21, -1, -1};
	const int trail[32] =
	{-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
 	-1, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, -1, -1};
	const UCS2 lead_to_ucs[19] =
	{
	  0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142,
	  0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b,
	  0x314c, 0x314d, 0x314e
	};

	const UCS2 trail_to_ucs[27] =
	{
	  0, 0, 0x3133, 0, 0x3135, 0x3136, 0, 0,
	  0x313a, 0x313b, 0x314c, 0x313d, 0x313e, 0x313f,
	  0x3140, 0, 0, 0x3144, 0, 0, 0,
	  0, 0, 0, 0, 0, 0
	};



	initialize (); 
	init (in, _size);

	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x7f)
		{
			append ((UCS2) in[i]);
			continue;
		}

		if (in[i] > 0xf9 || in[i] == 0xdf || (in[i] > 0x7e &&
                         in[i] < 0x84) || (in[i] > 0xd3 && in[i] < 0xd9))
		{
			append8 ((QUOTE8) in[i]);
			continue;
		}
	
		// Hangul 
		if (inputSize -i > 1 && in[i] <= 0xd3 && 
				((in[i+1]> 0x40 && in[i+1] < 0x7f)
				  || (in[i+1] > 0x80 && in[i+1] < 0xff)) )
		{
			int l,m,t;

			idx = (in[i] << 8) + in[i+1];
			l = lead[(idx & 0x7c00) >> 10];
			m = mid[(idx & 0x03e0) >> 5];
			t = trail[idx & 0x001f];

			if (l == -1 || m == -1 || t == -1) 
				got=0;
			// Syllable 
			else if (l > 0 && m > 0)
				got = ((l-1)*21 + (m-1))*28 + t + 0xac00;
			// Initial Consonant
			else if (l > 0 && m == 0 && t == 0)
				got = lead_to_ucs[l - 1];
			// Vowel
			else if (l == 0 && m > 0 && t == 0)
				got = 0x314e + m; // 0x314f + m - 1
			// Final Consonant
			else if (l == 0 && m == 0 && t > 0)
				got = trail_to_ucs[t - 1];
			else
				got = 0;
			if (got == 0)
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			else
			{
				append (got);
			}
			i++;
			continue;
		}
		// Hanja & Symbol
		if (inputSize -i > 1 && in[i] > 0xd8 &&
			((in[i+1]> 0x30 && in[i+1] < 0x7f)
			|| (in[i+1] > 0x90 && in[i+1] < 0xff)) )
		{
			// User Defined Area : Unused
			if ( in[i]==0xda && 
				in[i+1]>0xa0 && in[i+1]<0xd4 )
				got=0;
			// Symbol
			else  if (in[i] < 0xdf) 
			{
				idx = ( ( (in[i]-0xd9) * 2 
				    + (in[i+1] > 0xa0 ? 1 : 0) + 0xa1 ) << 8 )
				    + in[i+1] + (in[i+1] > 0xa0 ? 0 : 
				      (in[i+1] > 0x90 ? 0x5e : 0x70) );   
				got=mKSC5601->decode(idx);
			}
			// Hanja
			else 
			{
				idx = ( ( (in[i]-0xe0) * 2 
				    + (in[i+1] > 0xa0 ? 1 : 0) + 0xca ) << 8 )
				    + in[i+1] + (in[i+1] > 0xa0 ? 0 : 
				      (in[i+1] > 0x90 ? 0x5e : 0x70) );   
				got=mKSC5601->decode(idx);
			}
			if (got ==0)
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			else
			{
				append (got);
			}
			i++;
			continue;
		} 

		if (in[i] > 0x7e)
		{
			append8 ((QUOTE8) in[i]);
			continue;
		}
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UJOHABConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	unsigned char	c1;
	unsigned char	c2;
	int		i;
	const UCS2 jamo_from_ucs[51] =
	{0x8841, 0x8c41,
	 0x8444,
	 0x9041,
	 0x8446, 0x8447,
	 0x9441, 0x9841, 0x9c41,
	 0x844a, 0x844b, 0x844c, 0x844d, 0x884e, 0x884f, 0x8450,
	 0xa041, 0xa441, 0xa841,
	 0x8454,
	 0xac41, 0xb041, 0xb441, 0xb841, 0xbc41,
	 0xc041, 0xc441, 0xc841, 0xca41, 0xd041,
	 0x8461, 0x8481, 0x84a1, 0x84c1, 0x84e1,
	 0x8541, 0x8561, 0x8581, 0x85a1, 0x85c1, 0x85e1,
	 0x8641, 0x8661, 0x8681, 0x86a1, 0x86c1, 0x86e1,
	 0x8741, 0x8761, 0x8781, 0x87a1
	};


	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (in[i]>= 0xac00 && in[i] <= 0xd7af) 
		{
			UCS2 ch = in[i]-0xac00;
			int l = ch / 588;  // 588 = 21 * 28
			int m = (ch / 28) % 21; 
			int t = ch % 28;
			
			got =   ( (l+2) << 10 )  |
			((m + (m<5 ? 3 : (m<11 ? 5 : (m<17 ? 7 : 9)))) << 5) |
			(t + (t<17 ? 1 : 2))   | 0x8000;
			append ((unsigned char) ((got & 0xff00) >> 8) );
			append ((unsigned char) (got&0xff) );
			continue;
		}
		if (in[i] > 0x3130 && in[i] < 0x3164) 
		{
			got = jamo_from_ucs[in[i]-0x3131];
			append ((unsigned char) ((got & 0xff00) >>8) );
			append ((unsigned char) (got&0xff) );
			continue;
		}
		if (mKSC5601!=0 && (got=mKSC5601->encode ((UCS2)in[i])) != 0)
		{
			c1 = (got >> 8) & 0x7f;
			c2 = got & 0x7f;
			if ( in[i] >=0x4e00 && in[i] <=0x9fa5 ||
				in[i] >= 0xf900 && in[i] <= 0xfa0b)
			{
				c1 -= 0x4a;
				c2 |= 0x80;
				got =  ((c1 / 2) << 8)  + 0xe000 + c2
				    +  (c1 % 2 ? 0 
					: (c2 > 0xee ? 0x43 : 0x31) - 0xa1 );
			}
			else 
			{
				c1 -= 0x21;
				c2 |= 0x80;
				got = ((c1 / 2) << 8) + 0xd900 + c2
				    + (c1 % 2 ? 0 
			              : (c2 > 0xee ? 0x43 : 0x31) - 0xa1 ); 
			}
			append ((unsigned char) ((got & 0xff00) >>8) );
			append ((unsigned char) (got&0xff) );
			continue;
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//------------------------------------------------------------------------------
// BIG5
//------------------------------------------------------------------------------
UBIG5Conv::UBIG5Conv () : UCS2Converter ((const char*) "BIG5")
{
}

UBIG5Conv::~UBIG5Conv ()
{
}


void
UBIG5Conv::initialize ()
{
	if (initialized) return;

	mBIG5 = new UMap ();
	CHECKNULL (mBIG5);
	if (mBIG5->rename("BIG5") != UMap::OK)
	{
		cerr << "error: can not get BIG5 umap.\n";
		delete mBIG5;
		mBIG5=0;
	}
	initialized=1;
}

UCS2*
UBIG5Conv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		got=0;
		if (inputSize -i > 1 && in[i] > 0xa0 && in[i] < 0xff
			&& ((in[i+1] > 0xa0 && in[i+1] < 0xff)
				|| (in[i+1] >= 0x40 && in[i+1] < 0x7f)))
		{
			// BIG5
			if (mBIG5!=0)
			{
				got = mBIG5->decode 
					((UCS2)((in[i] << 8) | in[i+1]));
			}
			if (got != 0)
			{
				append (got);
			}
			else
			{
				append8 ((QUOTE8) in[i]);
				append8 ((QUOTE8) in[i+1]);
			}
			i++;
			continue;
		} 
		append ((UCS2) in[i]);
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

unsigned char*
UBIG5Conv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;

	initialize (); 
	init (in, _size);
	for (i=0; i<inputSize; i++)
	{
		if (in[i] < 0x80)
		{
			append ((unsigned char) in[i]);
			continue;
		}
		if (mBIG5!=0 && (got=mBIG5->encode ((UCS2)in[i])) != 0)
		{
			if ((got&0xff00) > 0xa000 && (got&0xff00) < 0xff00
				&& (((got&0xff) > 0xa0 && (got&0xff) < 0xff) 
				 || ((got&0xff) >= 0x40 && (got&0xff) < 0x7f)))
			{
				append ((unsigned char) (got>>8));
				append ((unsigned char) (got&0xff));
				continue;
			}
		}
		append16 ((const QUOTE16) in[i]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//
// 8 Bit converters with umap
//
U8BitConv::U8BitConv (const char* mapName) : UCS2Converter (mapName)
{
	initialized = 0;
	map=0;
}

U8BitConv::~U8BitConv()
{
	if (map) delete map;
}

void
U8BitConv::initialize ()
{
	if (initialized) return;

	map = new UMap ();
	CHECKNULL (map);
	if (map->rename(name) != UMap::OK)
	{
		cerr << "error: can not get " << name << " umap.\n";
		delete map;
		map =0;
	}
	initialized = 1;
}

// 
// Only chars that are >= 0x80 are mapped
// IN case the encoder can map > 0xff two byte stream is assumed.
// In case it is 16bit mapped we use Big endian.
//
UCS2*
U8BitConv::decode (const unsigned char* in, int _size, int* retSize)
{
	UCS2		got;
	int		i;
	int		step;
	UCS2		value;

	initialize (); 
	init (in, _size);
	step = (map !=0 && map->encodeResultMax() > 0xff) ? 2 : 1;
	for (i=0; i<inputSize-step+1; i=i+step)
	{
		value = (step==1) ? in[i] : ((in[i]<<8) | in[i+1]); 
		if (map != 0 && (got=map->decode (value)) != 0)
		{
			append ((UCS2) got);
			continue;
		}
		if (value<0x80)
		{
			append (value);
			continue;
		}
		if (step==1)
		{
			append8 ((QUOTE8) value);
		}
		else
		{
			append ((UCS2) '?');
		}
	}
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

// 
// Only chars that are >= 0x80 are mapped
// IN case the encoder can map > 0xff two byte stream is assumed.
// In case it is 16bit mapped we use Big endian.
//
unsigned char*
U8BitConv::encode (const UCS2 *in, int _size, int* retSize)
{
	UCS2		got;
	int		i;
	int		step;

	initialize (); 
	init (in, _size);

	step = (map !=0 && map->encodeResultMax() > 0xff) ? 2 : 1;
	for (i=0; i<inputSize; i++)
	{
		if (map != 0 && (got=map->encode (in[i])) != 0)
		{
			if (step == 2) append ((unsigned char) (got>>8));
			append ((unsigned char) (got&0xff));
			continue;
		}
		if (in[i]<0x80)
		{
			if (step == 2) append ((unsigned char) 0);
			append ((unsigned char)in[i]);
			continue;
		}
		if (step == 2)
		{
			append ((unsigned char) 0);
			append ((unsigned char) '?');
		}
		else
		{
			append16 ((const QUOTE16) in[i]);
		}
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

U16BitConv::U16BitConv (const char* mapName, Endian en) 
	: UCS2Converter (mapName)
{
	endian = en;
}

U16BitConv::~U16BitConv()
{
}

unsigned char*
U16BitConv::encode (const UCS2 *in, int _size, int* retSize)
{
	int		i;
	unsigned char	ucs[2];
	init 		(in, _size);

	for (i=0; i<inputSize; i++)
	{
		// 12 34 -> 12 34
		if (endian==UCS2BE)
		{
			ucs[0] = in[i]>>8;
			ucs[1] = in[i]&0xff;
		}
		else // 12 34 -> 34 12
		{
			ucs[1] = in[i]>>8;
			ucs[0] = in[i]&0xff;
		}

		append (ucs[0]);
		append (ucs[1]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

UCS2*
U16BitConv::decode (const unsigned char *in, int _size, int* retSize)
{
	int	i;
	UCS2	ucs;

	init (in, _size);
	for (i=0; i<inputSize-1; i=i+2)
	{
		if (endian==UCS2BE)
		{
			ucs = (in[i] << 8) | in[i+1];
		}
		else // 12 34 -> 34 12
		{
			ucs = (in[i+1] << 8) | in[i];
		}
		append (ucs);
	}
	if (inputSize % 2 != 0) append ((UCS2) '?');
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

U16BEConv::U16BEConv(const char* mapName) : U16BitConv (mapName, UCS2BE)
{
}

U16BEConv::~U16BEConv ()
{
}

U16LEConv::U16LEConv(const char* mapName) : U16BitConv (mapName, UCS2LE)
{
}

U16LEConv::~U16LEConv ()
{
}

//
//
//
UUniConv::UUniConv (const char* mapName) : UCS2Converter (mapName)
{
}

UUniConv::~UUniConv()
{
}

//
// This wiil always produce big endian output.
// It puts \r at the end of line.
//
unsigned char*
UUniConv::encode (const UCS2 *in, int _size, int* retSize)
{
	int		i;
	unsigned char	ucs[2];
	init 		(in, _size);

	endian = UCS2BE;
	if (endian==UCS2BE)
	{
		ucs[0] = UNI_MAGIC >>8;
		ucs[1] = UNI_MAGIC & 0xff;
	}
	else // 12 34 -> 34 12
	{
		ucs[1] = UNI_MAGIC>>8;
		ucs[0] = UNI_MAGIC&0xff;
	}

	append (ucs[0]);
	append (ucs[1]);
	for (i=0; i<inputSize; i++)
	{
		// 12 34 -> 12 34
		if (endian==UCS2BE)
		{
			ucs[0] = in[i]>>8;
			ucs[1] = in[i]&0xff;
		}
		else // 12 34 -> 34 12
		{
			ucs[1] = in[i]>>8;
			ucs[0] = in[i]&0xff;
		}
		append (ucs[0]);
		append (ucs[1]);
	}
	*retSize = size;
	terminateStr();
	return uchar;
}

//
// Endiannness is decided from beginning of file.
// line  separator paragraph seperator and \r is 
// translated.
UCS2*
UUniConv::decode (const unsigned char *in, int _size, int* retSize)
{
	int	i;
	UCS2	ucs;

	init (in, _size);

	// set some defualt state.
	endian = UCS2BE;
	for (i=0; i<inputSize-1; i=i+2)
	{
		if (in[i] == (UNI_MAGIC>>8) 
			&& in[i+1] == (UNI_MAGIC&0xff))
		{
			endian=UCS2BE;
			continue;
		}
		if (in[i+1] == (UNI_MAGIC>>8) 
			&& in[i] == (UNI_MAGIC&0xff))
		{
			endian=UCS2LE;
			continue;
		}
		if (endian==UCS2BE)
		{
			ucs = (in[i] << 8) | in[i+1];
		}
		else // 12 34 -> 34 12
		{
			ucs = (in[i+1] << 8) | in[i];
		}
		append (ucs);
		
	}
	if (inputSize % 2 != 0) append ((UCS2) '?');
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}

UMSTXTConv::UMSTXTConv (const char* mapName) : UCS2Converter (mapName)
{
}

UMSTXTConv::~UMSTXTConv()
{
}

//
// This wiil always produce little endian output.
// It puts \r at the end of line.
//
unsigned char*
UMSTXTConv::encode (const UCS2 *in, int _size, int* retSize)
{
	int		i;
	unsigned char	ucs[2];
	unsigned char	ucsr[2];
	init 		(in, _size);

	endian = UCS2LE;
	if (endian==UCS2BE)
	{
		ucs[0] = UNI_MAGIC >>8;
		ucs[1] = UNI_MAGIC & 0xff;
	}
	else // 12 34 -> 34 12
	{
		ucs[1] = UNI_MAGIC>>8;
		ucs[0] = UNI_MAGIC&0xff;
	}

	append (ucs[0]);
	append (ucs[1]);
	ucsr[0] = (endian==UCS2BE) ?  0 : '\r';
	ucsr[1] = (endian==UCS2BE) ?  '\r' : 0;
	for (i=0; i<inputSize; i++)
	{
		// 12 34 -> 12 34
		if (endian==UCS2BE)
		{
			ucs[0] = in[i]>>8;
			ucs[1] = in[i]&0xff;
		}
		else // 12 34 -> 34 12
		{
			ucs[1] = in[i]>>8;
			ucs[0] = in[i]&0xff;
		}

		if (in[i]=='\n')
		{
			append (ucsr[0]);
			append (ucsr[1]);
		}
		append (ucs[0]);
		append (ucs[1]);
	}

	// HUh. Hope nobody wants to cut&paste 
	if (i==0 || in[i-1]!='\n')
	{
		append (ucsr[0]);
		append (ucsr[1]);
	}

	*retSize = size;
	terminateStr();
	return uchar;
}

//
// Endiannness is decided from beginning of file.
// line  separator paragraph seperator and \r is 
// translated.
UCS2*
UMSTXTConv::decode (const unsigned char *in, int _size, int* retSize)
{
	int	i;
	UCS2	ucs;

	init (in, _size);

	// set some defualt state.
	endian = UCS2LE;
	for (i=0; i<inputSize-1; i=i+2)
	{
		if (in[i] == (UNI_MAGIC>>8) 
			&& in[i+1] == (UNI_MAGIC&0xff))
		{
			endian=UCS2BE;
			continue;
		}
		if (in[i+1] == (UNI_MAGIC>>8) 
			&& in[i] == (UNI_MAGIC&0xff))
		{
			endian=UCS2LE;
			continue;
		}
		if (endian==UCS2BE)
		{
			ucs = (in[i] << 8) | in[i+1];
		}
		else // 12 34 -> 34 12
		{
			ucs = (in[i+1] << 8) | in[i];
		}
		switch (ucs)
		{
		case '\r':
			break;
		case 0x2029: // paragraph separator
		case 0x2028: // line separator
			append ((UCS2)'\n');
			break;
		default:
			append (ucs);
		}
		
	}
	if (inputSize % 2 != 0) append ((UCS2) '?');
	terminateUCS2 ();
	*retSize = size-1;
	return ucs2;
}
