转换汉字为unicode形式的字符串和转换unicode形式字符串转换成汉字
生活随笔
收集整理的這篇文章主要介紹了
转换汉字为unicode形式的字符串和转换unicode形式字符串转换成汉字
小編覺得挺不錯的,現在分享給大家,幫大家做個參考.
http://www.cppblog.com/biao/archive/2013/07/04/137087.html
/* 輸出 Original: 黃 彪彪 to unicode: \u9EC4\ \t\u5F6A\u5F6A from unicode: 黃 彪彪 使用命令轉換: native2ascii -encoding utf-8 a.txt a.txt Java的properties屬性文件會把字符先轉換成unicode的形式存儲. */import java.io.UnsupportedEncodingException;public class UnicodeConverter {public static void main(String[] args) throws UnsupportedEncodingException {String s = "黃 \t彪\u5F6A";System.out.println("Original:\t\t" + s);s = toEncodedUnicode(s, true);System.out.println("to unicode:\t\t" + s);s = fromEncodedUnicode(s.toCharArray(), 0, s.length());System.out.println("from unicode:\t" + s);}private static final char[] hexDigit = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A','B', 'C', 'D', 'E', 'F' };private static char toHex(int nibble) {return hexDigit[(nibble & 0xF)];}/*** 將字符串編碼成 Unicode 形式的字符串. 如 "黃" to "\u9EC4"* Converts unicodes to encoded \\uxxxx and escapes* special characters with a preceding slash* * @param theString* 待轉換成Unicode編碼的字符串。* @param escapeSpace* 是否忽略空格,為true時在空格后面是否加個反斜杠。* @return 返回轉換后Unicode編碼的字符串。*/public static String toEncodedUnicode(String theString, boolean escapeSpace) {int len = theString.length();int bufLen = len * 2;if (bufLen < 0) {bufLen = Integer.MAX_VALUE;}StringBuffer outBuffer = new StringBuffer(bufLen);for (int x = 0; x < len; x++) {char aChar = theString.charAt(x);// Handle common case first, selecting largest block that// avoids the specials belowif ((aChar > 61) && (aChar < 127)) {if (aChar == '\\') {outBuffer.append('\\');outBuffer.append('\\');continue;}outBuffer.append(aChar);continue;}switch (aChar) {case ' ':if (x == 0 || escapeSpace) outBuffer.append('\\');outBuffer.append(' ');break;case '\t':outBuffer.append('\\');outBuffer.append('t');break;case '\n':outBuffer.append('\\');outBuffer.append('n');break;case '\r':outBuffer.append('\\');outBuffer.append('r');break;case '\f':outBuffer.append('\\');outBuffer.append('f');break;case '=': // Fall throughcase ':': // Fall throughcase '#': // Fall throughcase '!':outBuffer.append('\\');outBuffer.append(aChar);break;default:if ((aChar < 0x0020) || (aChar > 0x007e)) {// 每個unicode有16位,每四位對應的16進制從高位保存到低位outBuffer.append('\\');outBuffer.append('u');outBuffer.append(toHex((aChar >> 12) & 0xF));outBuffer.append(toHex((aChar >> 8) & 0xF));outBuffer.append(toHex((aChar >> 4) & 0xF));outBuffer.append(toHex(aChar & 0xF));} else {outBuffer.append(aChar);}}}return outBuffer.toString();}/*** 從 Unicode 形式的字符串轉換成對應的編碼的特殊字符串。 如 "\u9EC4" to "黃".* Converts encoded \\uxxxx to unicode chars* and changes special saved chars to their original forms* * @param in* Unicode編碼的字符數組。* @param off* 轉換的起始偏移量。* @param len* 轉換的字符長度。* @param convtBuf* 轉換的緩存字符數組。* @return 完成轉換,返回編碼前的特殊字符串。*/public static String fromEncodedUnicode(char[] in, int off, int len) {char aChar;char[] out = new char[len]; // 只短不長int outLen = 0;int end = off + len;while (off < end) {aChar = in[off++];if (aChar == '\\') {aChar = in[off++];if (aChar == 'u') {// Read the xxxxint value = 0;for (int i = 0; i < 4; i++) {aChar = in[off++];switch (aChar) {case '0':case '1':case '2':case '3':case '4':case '5':case '6':case '7':case '8':case '9':value = (value << 4) + aChar - '0';break;case 'a':case 'b':case 'c':case 'd':case 'e':case 'f':value = (value << 4) + 10 + aChar - 'a';break;case 'A':case 'B':case 'C':case 'D':case 'E':case 'F':value = (value << 4) + 10 + aChar - 'A';break;default:throw new IllegalArgumentException("Malformed \\uxxxx encoding.");}}out[outLen++] = (char) value;} else {if (aChar == 't') {aChar = '\t';} else if (aChar == 'r') {aChar = '\r';} else if (aChar == 'n') {aChar = '\n';} else if (aChar == 'f') {aChar = '\f';}out[outLen++] = aChar;}} else {out[outLen++] = (char) aChar;}}return new String(out, 0, outLen);} }總結
以上是生活随笔為你收集整理的转换汉字为unicode形式的字符串和转换unicode形式字符串转换成汉字的全部內容,希望文章能夠幫你解決所遇到的問題。
- 上一篇: C++的类型萃取技术
- 下一篇: 可用内存