龙盟编程博客 | 无障碍搜索 | 云盘搜索神器
快速搜索
主页 > 软件开发 > JAVA开发 >

java实现十六进制字符unicode与中英文转换示例

时间:2014-05-24 02:05来源:网络整理 作者:网络 点击:
分享到:
当需要对一个unicode十六进制字符串进行编码时,首先做的应该是确认字符集编码格式,在无法快速获知的情况下,通过一下的str4all方法可以达到这一目的

关于unicode和utf的关系,可以简单的记忆:Unicode是一个编码组织、一个编码规范、在java中指utf-16;utf是Unicode编码的translation转换格式,以便于很好地在网络中传递、在存储媒介汇总保存,于是utf存在多种格式,如8、16、32,而关联le、te的区别,Unicode编码格式才会有以下过程中的10种。

代码如下:

public static void main(String[] args) throws UnsupportedEncodingException {
                 StringUtil.str2all("0 产品型号描述");
  StringUtil.str4all("30000900A74EC1548B57F753CF63F08F");
 }

/**
  * 尝试所有编码格式对十六进制数字字符串进行编码
  *
  * @param hexStr
  * @throws UnsupportedEncodingException
  */
 public static void str4all(String uStr) throws UnsupportedEncodingException{

  System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[uStr.length()/2];
  for (int i = 0; i < bs.length; i++) {
   bs[i] = (byte) Integer.parseInt(uStr.substring(i*2, i*2+2), 16);
  }

  System.out.println(new String(bs, "utf-8"));
  // 16
  System.out.println(new String(bs, "utf-16")); // 同unicode
  System.out.println(new String(bs, "utf-16le"));
  System.out.println(new String(bs, "x-utf-16le-bom"));
  System.out.println(new String(bs, "utf-16be"));
//  System.out.println(new String(bs, "x-utf-16be-bom")); // UnsupportedEncodingException
  // 32
  System.out.println(new String(bs, "utf-32"));
  System.out.println(new String(bs, "utf-32le"));
  System.out.println(new String(bs, "x-utf-32le-bom"));
  System.out.println(new String(bs, "utf-32be"));
  System.out.println(new String(bs, "x-utf-32le-bom"));
 }
/**
  * 列出所有编码对应的解码后的十六进制数字字符串
  *
  * @param uStr
  * @throws UnsupportedEncodingException
  */
 public static void str2all(String uStr) throws UnsupportedEncodingException{

  System.out.println("+++++++++++++++++++++++++++++++++++++++++++++++++++");

  byte[] bs = new byte[]{};

  bs = uStr.getBytes("utf-8");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  // 16
  bs = uStr.getBytes("utf-16");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-16le");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("x-utf-16le-bom");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-16be");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
//  bs = uStr.getBytes("x-utf-16be-bom"); // UnsupportedEncodingException
  // 32
  bs = uStr.getBytes("utf-32");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-32le");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("x-utf-32le-bom");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("utf-32be");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
  bs = uStr.getBytes("x-utf-32le-bom");
  for(byte b:bs){
   System.out.print(Integer.toHexString(b & 0xff));
  }
  System.out.println();
 }

编码名称收集

代码如下:

charset US-ASCII %S
    historicalName ASCII
    # IANA aliases
    alias iso-ir-6
    alias ANSI_X3.4-1986
    alias ISO_646.irv:1991
    alias ASCII
    alias ISO646-US
    alias us
    alias IBM367
    alias cp367
    alias csASCII
    alias default
    # Other aliases
    alias 646 # Solaris POSIX locale
    alias iso_646.irv:1983
    alias ANSI_X3.4-1968 # Linux POSIX locale (RedHat)
    alias ascii7

charset UTF-8 UTF_8
    historicalName UTF8
    alias UTF8
    alias unicode-1-1-utf-8

charset UTF-16 UTF_16
    historicalName UTF-16
    alias UTF_16
    alias utf16
    alias unicode
    alias UnicodeBig

charset UTF-16BE UTF_16BE
    historicalName UnicodeBigUnmarked
    alias UTF_16BE
    alias ISO-10646-UCS-2
    alias X-UTF-16BE
    alias UnicodeBigUnmarked

charset UTF-16LE UTF_16LE
    historicalName UnicodeLittleUnmarked
    alias UTF_16LE
    alias X-UTF-16LE
    alias UnicodeLittleUnmarked

charset x-UTF-16LE-BOM UTF_16LE_BOM
    historicalName UnicodeLittle
    alias UnicodeLittle

charset UTF-32 UTF_32
    alias UTF_32
    alias UTF32

charset UTF-32LE UTF_32LE
    alias UTF_32LE
    alias X-UTF-32LE

charset UTF-32BE UTF_32BE
    alias UTF_32BE
    alias X-UTF-32BE

charset X-UTF-32LE-BOM UTF_32LE_BOM
    alias UTF_32LE_BOM
    alias UTF-32LE-BOM

charset X-UTF-32BE-BOM UTF_32BE_BOM
    alias UTF_32BE_BOM
    alias UTF-32BE-BOM

charset ISO-8859-1 %S
    historicalName ISO8859_1
    # IANA aliases
    alias iso-ir-100
    alias ISO_8859-1
    alias latin1
    alias l1
    alias IBM819
    alias cp819
    alias csISOLatin1
    # Other aliases
    alias 819
    alias IBM-819
    alias ISO8859_1
    alias ISO_8859-1:1987
    alias ISO_8859_1
    alias 8859_1
    alias ISO8859-1

charset ISO-8859-2 %S
    historicalName ISO8859_2
    alias iso8859_2
    alias 8859_2
    alias iso-ir-101
    alias ISO_8859-2
    alias ISO_8859-2:1987
    alias ISO8859-2
    alias latin2
    alias l2
    alias ibm912
    alias ibm-912
    alias cp912
    alias 912
    alias csISOLatin2

charset ISO-8859-4 %S
    historicalName ISO8859_4
    alias iso8859_4
    alias iso8859-4
    alias 8859_4
    alias iso-ir-110
    alias ISO_8859-4
    alias ISO_8859-4:1988
    alias latin4
    alias l4
    alias ibm914
    alias ibm-914
    alias cp914
    alias 914
    alias csISOLatin4

charset ISO-8859-5 %S
    historicalName ISO8859_5
    alias iso8859_5
    alias 8859_5
    alias iso-ir-144
    alias ISO_8859-5
    alias ISO_8859-5:1988
    alias ISO8859-5
    alias cyrillic
    alias ibm915
    alias ibm-915
    alias cp915
    alias 915
    alias csISOLatinCyrillic

charset ISO-8859-7 %S
    historicalName ISO8859_7
    alias iso8859_7
    alias 8859_7
    alias iso-ir-126
    alias ISO_8859-7
    alias ISO_8859-7:1987
    alias ELOT_928
    alias ECMA-118
    alias greek
    alias greek8
    alias csISOLatinGreek
    alias sun_eu_greek # Solaris 7/8 compatibility
    alias ibm813
    alias ibm-813
    alias 813
    alias cp813
    alias iso8859-7 # Solaris 9 compatibility

charset ISO-8859-9 %S
    historicalName ISO8859_9
    alias iso8859_9
    alias 8859_9
    alias iso-ir-148
    alias ISO_8859-9
    alias ISO_8859-9:1989
    alias ISO8859-9
    alias latin5
    alias l5
    alias ibm920
    alias ibm-920
    alias 920
    alias cp920
    alias csISOLatin5

charset ISO-8859-13 %S
    historicalName ISO8859_13
    alias iso8859_13
    alias 8859_13
    alias iso_8859-13
    alias ISO8859-13

charset ISO-8859-15 %S
    historicalName ISO8859_15
    # IANA alias
    alias ISO_8859-15
    # Other aliases
    alias 8859_15
    alias ISO8859_15
    alias ISO8859-15
    alias IBM923
    alias IBM-923
    alias cp923
    alias 923
    alias LATIN0
    alias LATIN9
    alias L9
    alias csISOlatin0
    alias csISOlatin9
    alias ISO8859_15_FDIS

charset KOI8-R %S
    historicalName KOI8_R
    alias koi8_r
    alias koi8
    alias cskoi8r

charset KOI8-U %S
    alias koi8_u

charset windows-1250 %S
    historicalName Cp1250
    alias cp1250
    alias cp5346 # Euro IBM CCSID

charset windows-1251 %S
    historicalName Cp1251
    alias cp1251
    alias cp5347 # Euro IBM CCSID
    alias ansi-1251 # Solaris compatibility

charset windows-1252 %S
    historicalName Cp1252
    alias cp1252
    alias cp5348 # Euro IBM CCSID

charset windows-1253 %S
    historicalName Cp1253
    alias cp1253
    alias cp5349 # Euro IBM CCSID

charset windows-1254 %S
    historicalName Cp1254
    alias cp1254
    alias cp5350 # Euro IBM CCSID

charset windows-1257 %S
    historicalName Cp1257
    alias cp1257
    alias cp5353 # Euro IBM CCSID


charset IBM437 %S
    historicalName Cp437
    alias cp437
    alias ibm-437
    alias 437
    alias cspc8codepage437
    alias windows-437

charset x-IBM737 %S
    historicalName Cp737
    alias cp737
    alias ibm737
    alias ibm-737
    alias 737

charset IBM775 %S
    historicalName Cp775
    alias cp775
    alias ibm-775
    alias 775

charset IBM850 %S
    historicalName Cp850
    alias cp850
    alias ibm-850
    alias 850
    alias cspc850multilingual

charset IBM852 %S
    historicalName Cp852
    alias cp852
    alias ibm-852
    alias 852
    alias csPCp852

charset IBM855 %S
    historicalName Cp855
    alias cp855
    alias ibm-855
    alias 855
    alias cspcp855

charset IBM857 %S
    historicalName Cp857
    alias cp857
    alias ibm-857
    alias 857
    alias csIBM857

charset IBM00858 %S
    historicalName Cp858
    alias cp858
    alias ccsid00858
    alias cp00858
    alias 858

charset IBM862 %S
    historicalName Cp862
    alias cp862
    alias ibm-862
    alias 862
    alias csIBM862
    alias cspc862latinhebrew

charset IBM866 %S
    historicalName Cp866
    alias cp866
    alias ibm-866
    alias 866
    alias csIBM866

charset x-IBM874 %S
    historicalName Cp874
    alias cp874
    alias ibm874
    alias ibm-874
    alias 874

精彩图集

赞助商链接