diff --git a/modules/charset/charset_test.go b/modules/charset/charset_test.go index fc9ec6a979..fde42732e8 100644 --- a/modules/charset/charset_test.go +++ b/modules/charset/charset_test.go @@ -62,19 +62,24 @@ func TestToUTF8WithErr(t *testing.T) { } func TestToUTF8WithFallback(t *testing.T) { + // "ABC" res := ToUTF8WithFallback([]byte{0x41, 0x42, 0x43}) - assert.Equal(t, []byte("ABC"), res) + assert.Equal(t, []byte{0x41, 0x42, 0x43}, res) + // "áéíóú" res = ToUTF8WithFallback([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) - assert.Equal(t, []byte("áéíóú"), res) + assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) + // UTF8 BOM + "áéíóú" res = ToUTF8WithFallback([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) - assert.Equal(t, []byte("áéíóú"), res) + assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) + // "Hola, así cómo ños" res = ToUTF8WithFallback([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73}) - assert.Equal(t, []byte("Hola, así cómo ños"), res) + assert.Equal(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20, 0xC3, 0xB1, 0x6F, 0x73}, res) - minmatch := []byte("Hola, así cómo ") + // "Hola, así cómo " + minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20} res = ToUTF8WithFallback([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}) // Do not fail for differences in invalid cases, as the library might change the conversion criteria for those @@ -85,8 +90,10 @@ func TestToUTF8WithFallback(t *testing.T) { assert.Equal(t, minmatch, res[0:len(minmatch)]) // Japanese (Shift-JIS) + // "日属秘ぞしちゅ。" res = ToUTF8WithFallback([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}) - assert.Equal(t, []byte("日属秘ぞしちゅ。"), res) + assert.Equal(t, []byte{0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3, + 0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82}, res) res = ToUTF8WithFallback([]byte{0x00, 0x00, 0x00, 0x00}) assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res) @@ -124,19 +131,24 @@ func TestToUTF8(t *testing.T) { } func TestToUTF8DropErrors(t *testing.T) { + // "ABC" res := ToUTF8DropErrors([]byte{0x41, 0x42, 0x43}) - assert.Equal(t, []byte("ABC"), res) + assert.Equal(t, []byte{0x41, 0x42, 0x43}, res) + // "áéíóú" res = ToUTF8DropErrors([]byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) - assert.Equal(t, []byte("áéíóú"), res) + assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) + // UTF8 BOM + "áéíóú" res = ToUTF8DropErrors([]byte{0xef, 0xbb, 0xbf, 0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}) - assert.Equal(t, []byte("áéíóú"), res) + assert.Equal(t, []byte{0xc3, 0xa1, 0xc3, 0xa9, 0xc3, 0xad, 0xc3, 0xb3, 0xc3, 0xba}, res) + // "Hola, así cómo ños" res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0xF1, 0x6F, 0x73}) - assert.Equal(t, []byte("Hola, así cómo ños"), res) + assert.Equal(t, []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20, 0xC3, 0xB1, 0x6F, 0x73}, res) - minmatch := []byte("Hola, así cómo ") + // "Hola, así cómo " + minmatch := []byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xC3, 0xAD, 0x20, 0x63, 0xC3, 0xB3, 0x6D, 0x6F, 0x20} res = ToUTF8DropErrors([]byte{0x48, 0x6F, 0x6C, 0x61, 0x2C, 0x20, 0x61, 0x73, 0xED, 0x20, 0x63, 0xF3, 0x6D, 0x6F, 0x20, 0x07, 0xA4, 0x6F, 0x73}) // Do not fail for differences in invalid cases, as the library might change the conversion criteria for those @@ -147,8 +159,10 @@ func TestToUTF8DropErrors(t *testing.T) { assert.Equal(t, minmatch, res[0:len(minmatch)]) // Japanese (Shift-JIS) + // "日属秘ぞしちゅ。" res = ToUTF8DropErrors([]byte{0x93, 0xFA, 0x91, 0xAE, 0x94, 0xE9, 0x82, 0xBC, 0x82, 0xB5, 0x82, 0xBF, 0x82, 0xE3, 0x81, 0x42}) - assert.Equal(t, []byte("日属秘ぞしちゅ。"), res) + assert.Equal(t, []byte{0xE6, 0x97, 0xA5, 0xE5, 0xB1, 0x9E, 0xE7, 0xA7, 0x98, 0xE3, + 0x81, 0x9E, 0xE3, 0x81, 0x97, 0xE3, 0x81, 0xA1, 0xE3, 0x82, 0x85, 0xE3, 0x80, 0x82}, res) res = ToUTF8DropErrors([]byte{0x00, 0x00, 0x00, 0x00}) assert.Equal(t, []byte{0x00, 0x00, 0x00, 0x00}, res)