UTF_8 | = | 'UTF-8' | Native, default format is UTF-8, so it is declared here rather than in an encodings/ definition. | |
UTF_16 | = | 'UTF-16' | ||
UNILE | = | 'UNILE' |
encoding | [R] | ID —> Encoding name |
# File rexml/encoding.rb, line 55 def check_encoding str # We have to recognize UTF-16, LSB UTF-16, and UTF-8 return UTF_16 if str[0] == 254 && str[1] == 255 return UNILE if str[0] == 255 && str[1] == 254 str =~ /^\s*<?xml\s*version=(['"]).*?\2\s*encoding=(["'])(.*?)\2/um return $1.upcase if $1 return UTF_8 end
Convert to UTF-8
# File rexml/encodings/US-ASCII.rb, line 19 def decode_ascii(str) str.unpack('C*').pack('U*') end
# File rexml/encodings/UNILE.rb, line 18 def decode_unile(str) array_enc=str.unpack('C*') array_utf8 = [] 2.step(array_enc.size-1, 2){|i| array_utf8 << (array_enc.at(i) + array_enc.at(i+1)*0x100) } array_utf8.pack('U*') end
# File rexml/encodings/UTF-16.rb, line 18 def decode_utf16(str) array_enc=str.unpack('C*') array_utf8 = [] 2.step(array_enc.size-1, 2){|i| array_utf8 << (array_enc.at(i+1) + array_enc.at(i)*0x100) } array_utf8.pack('U*') end
Convert from UTF-8
# File rexml/encodings/US-ASCII.rb, line 4 def encode_ascii content array_utf8 = content.unpack('U*') array_enc = [] array_utf8.each do |num| if num <= 0x7F array_enc << num else # Numeric entity (&#nnnn;); shard by Stefan Scholl array_enc.concat "&\##{num};".unpack('C*') end end array_enc.pack('C*') end
# File rexml/encodings/ICONV.rb, line 10 def encode_iconv(content) Iconv.conv(@encoding, UTF_8, content) end
# File rexml/encodings/UNILE.rb, line 3 def encode_unile content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| if ((num>>16) > 0) array_enc << ?? array_enc << 0 else array_enc << (num & 0xFF) array_enc << (num >> 8) end end array_enc.pack('C*') end
# File rexml/encodings/UTF-16.rb, line 3 def encode_utf16 content array_utf8 = content.unpack("U*") array_enc = [] array_utf8.each do |num| if ((num>>16) > 0) array_enc << 0 array_enc << ?? else array_enc << (num >> 8) array_enc << (num & 0xFF) end end array_enc.pack('C*') end
# File rexml/encoding.rb, line 23 def encoding=( enc ) old_verbosity = $VERBOSE begin $VERBOSE = false return if defined? @encoding and enc == @encoding if enc and enc != UTF_8 @encoding = enc.upcase begin require 'rexml/encodings/ICONV.rb' Encoding.apply(self, "ICONV") rescue LoadError, Exception => err raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/ @encoding.untaint enc_file = File.join( "rexml", "encodings", "#@encoding.rb" ) begin require enc_file Encoding.apply(self, @encoding) rescue LoadError puts $!.message raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv." end end else @encoding = UTF_8 require 'rexml/encodings/UTF-8.rb' Encoding.apply(self, @encoding) end ensure $VERBOSE = old_verbosity end end