From 322254a191e29f4eabbe5dd05962f1212937a6d8 Mon Sep 17 00:00:00 2001 From: Yaroslav Kolomiiets Date: Tue, 21 Feb 2017 13:33:43 +0200 Subject: emu: fix conversion to windows UTF-16 and back --- emu/Nt/r16.c | 90 ++++++++++++++++++++++++++++++++++-------------------------- 1 file changed, 51 insertions(+), 39 deletions(-) diff --git a/emu/Nt/r16.c b/emu/Nt/r16.c index 92a36521..ba0ceb0d 100644 --- a/emu/Nt/r16.c +++ b/emu/Nt/r16.c @@ -9,26 +9,16 @@ #include "error.h" #include "r16.h" -#define Bit(i) (7-(i)) -/* N 0's preceded by i 1's, T(Bit(2)) is 1100 0000 */ -#define T(i) (((1 << (Bit(i)+1))-1) ^ 0xFF) -/* 0000 0000 0000 0111 1111 1111 */ -#define RuneX(i) ((1 << (Bit(i) + ((i)-1)*Bitx))-1) - enum { - Bitx = Bit(1), - - Tx = T(1), /* 1000 0000 */ - Rune1 = (1<<(Bit(0)+0*Bitx))-1, /* 0000 0000 0000 0000 0111 1111 */ - - Maskx = (1<= Runeself) - n = runelen(c); + if(c > Runemax) + c = Runeerror; + if(c >= LSurrogateMin && c <= LSurrogateMax) + c = Runeerror; + if(c >= HSurrogateMin && c<= HSurrogateMax){ + lc = *r++; + if(lc >= LSurrogateMin || lc <= LSurrogateMax) + c = (c&Bits10)<<10 | (lc&Bits10) + R16self; + else + c = Runeerror; + } + n = runelen(c); if(p + n >= ep) break; - rc = c; - if(c < Runeself) - *p++ = c; - else - p += runetochar(p, &rc); + p += runetochar(p, &c); } *p = '\0'; return op; @@ -84,20 +79,18 @@ runes16toutf(char *p, Rune16 *r, int nc) int rune16nlen(Rune16 *r, int nrune) { - int nb, i; + int nb; Rune c; nb = 0; while(nrune--) { c = *r++; - if(c <= Rune1){ - nb++; - } else { - for(i = 2; i < UTFmax + 1; i++) - if(c <= RuneX(i) || i == UTFmax){ - nb += i; - break; - } + if(c < R16self) + nb += runelen(c); + else { + c -= R16self; + nb += runelen(HSurrogateMin | (c>>10)); + nb += runelen(LSurrogateMin | (c&Bits10)); } } return nb; @@ -113,7 +106,17 @@ utftorunes16(Rune16 *r, char *p, int nc) er = r + nc; while(*p != '\0' && r + 1 < er){ p += chartorune(&rc, p); - *r++ = rc; /* we'll ignore surrogate pairs */ + if(rc < R16self){ + *r++ = rc; + continue; + } + if(rc > Runemax || er-r < 2){ + *r++ = Runeerror; + continue; + } + rc -= R16self; + *r++ = HSurrogateMin | (rc>>10); + *r++ = LSurrogateMin | (rc&Bits10); } *r = '\0'; return or; @@ -167,8 +170,17 @@ int widebytes(wchar_t *ws) { int n = 0; - - while (*ws) - n += runelen(*ws++); + wchar_t c; + + while (*ws){ + c = *ws++; + if(c < R16self) + n += runelen(c); + else { + c -= R16self; + n += runelen(HSurrogateMin | (c>>10)); + n += runelen(LSurrogateMin | (c&Bits10)); + } + } return n+1; } -- cgit v1.2.3