#include #include "String.h" Strings::Range::Range(const Content& o,int i,int n) { if (i < 0) { nRange = n + i; iRange = 0; } if (o.length() <= i) { nRange = 0; iRange = o.length(); return; } iRange = i; if (n <= 0) { nRange = 0; } else if (o.length() < (i + n)) { nRange = o.length() - i; } else { nRange = n; } } UCS::String::String() : pBuffer(0) { nBuffer = QUANTA; pBuffer = new int[nBuffer]; } UCS::String::String(int n) : pBuffer(0) { nBuffer = sizeFor(n); pBuffer = new int[nBuffer]; } void UTF8::String::resize(int n) { char* p = pBuffer; nBuffer = sizeFor(n); pBuffer = new char[nBuffer]; memcpy(pBuffer,p,nContent); delete p; } void UTF8::String::copy(String& o,Range r) { if (0 == r.nRange) { nContent = 0; return; } nContent = 0; upsize(r.nRange); nContent = r.nRange; memcpy(pBuffer,o.pBuffer + r.iRange,r.nRange); } void UTF8::String::append(String& o,Range r) { if (0 == r.nRange) { return; } upsize(nContent + r.nRange); memcpy(pBuffer + nContent,o.pBuffer + r.iRange,r.nRange); nContent += r.nRange; } int UTF8::String::findChar(int c) const { char* p = pBuffer; char* pEOS = pBuffer + nContent; if (c < 0x80) { while (p < pEOS) { if (c == *p++) { return p - 1 - pBuffer; } } } else if (c < 0x800) { int c1 = 0xC0 | (c >> 6); int c2 = 0x80 | (63 & c); while (p < pEOS) { if ((c1 == *p++) && (c2 == *p++)) { return p - 2 - pBuffer; } } return -1; } else if (c < 0x10000) { int c1 = 0xE0 | (c >> 12); int c2 = 0x80 | (63 & (c >> 6)); int c3 = 0x80 | (63 & c); while (p < pEOS) { if (c1 == *p++) { if (c2 != *p++) { p += 1; } else if (c3 == *p++) { return p - 3 - pBuffer; } } } } else if (c < 0x200000) { int c1 = 0xF0 | (c >> 18); int c2 = 0x80 | (63 & (c >> 12)); int c3 = 0x80 | (63 & (c >> 6)); int c4 = 0x80 | (63 & c); while (p < pEOS) { if (c1 == *p++) { if (c2 != *p++) { p += 2; } else if (c3 != *p++) { p += 1; } else if (c4 == *p++) { return p - 4 - pBuffer; } } } } else if (c < 0x4000000) { int c1 = 0xF8 | (c >> 24); int c2 = 0x80 | (63 & (c >> 18)); int c3 = 0x80 | (63 & (c >> 12)); int c4 = 0x80 | (63 & (c >> 6)); int c5 = 0x80 | (63 & c); while (p < pEOS) { if (c1 == *p++) { if (c2 != *p++) { p += 3; } else if (c3 != *p++) { p += 2; } else if (c4 != *p++) { p += 1; } else if (c5 == *p++) { return p - 5 - pBuffer; } } } } else { int c1 = 0xFC | (c >> 30); int c2 = 0x80 | (63 & (c >> 24)); int c3 = 0x80 | (63 & (c >> 18)); int c4 = 0x80 | (63 & (c >> 12)); int c5 = 0x80 | (63 & (c >> 6)); int c6 = 0x80 | (63 & c); while (p < pEOS) { if (c1 == *p++) { if (c2 != *p++) { p += 4; } else if (c3 != *p++) { p += 3; } else if (c4 != *p++) { p += 2; } else if (c5 != *p++) { p += 1; } else if (c6 == *p++) { return p - 6 - pBuffer; } } } } return -1; } char* UTF8::String::asChars() { upsize(nBuffer + 1); pBuffer[nBuffer] = 0; return pBuffer; } void UCS::String::resize(int n) { int* p = pBuffer; nBuffer = sizeFor(n); pBuffer = new int[nBuffer]; memcpy(pBuffer,p,nContent * sizeof(int)); delete p; } void UCS::String::copy(String& o,Range r) { if (0 == r.nRange) { nContent = 0; return; } nContent = 0; upsize(r.nRange); memcpy(pBuffer,o.pBuffer + r.iRange,sizeof(int) * r.nRange); nContent = r.nRange; } void UCS::String::append(String& o,Range r) { if (0 == r.nRange) { return; } upsize(nContent + r.nRange); memcpy(pBuffer + nContent,o.pBuffer + r.iRange,sizeof(int) * r.nRange); nContent += r.nRange; } int UCS::String::findChar(int c) const { for (int i = 0; i < nContent; ++i) { if (c == pBuffer[i]) { return i; } } return -1; } static inline int min(int a,int b) { return (a < b) ? a : b; } void UTF8::String::convertFrom(const UCS::String& s) { int* p1 = s.asChars(); int* pEOS = p1 + s.length(); char* p2 = pBuffer; char* pEOB = p2 + nBuffer; while (p1 < pEOS) { int* pCheck = p1 + ((pEOB - p2) / 6); if (pEOS < pCheck) { pCheck = pEOS; } while (p1 < pCheck) { int c = *p1++; if (c < 0x80) { *p2++ = c; } else if (c < 0x800) { *p2++ = 0xC0 | (c >> 6); *p2++ = 0x80 | (63 & c); } else if (c < 0x10000) { *p2++ = 0xE0 | (c >> 12); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } else if (c < 0x200000) { *p2++ = 0xF0 | (c >> 18); *p2++ = 0x80 | (63 & (c >> 12)); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } else if (c < 0x4000000) { *p2++ = 0xF8 | (c >> 24); *p2++ = 0x80 | (63 & (c >> 18)); *p2++ = 0x80 | (63 & (c >> 12)); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } else { *p2++ = 0xFC | (1 & (c >> 30)); *p2++ = 0x80 | (63 & (c >> 24)); *p2++ = 0x80 | (63 & (c >> 18)); *p2++ = 0x80 | (63 & (c >> 12)); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } } nContent = (int) (p2 - pBuffer); if (p1 < pEOS) { upsize(nContent + QUANTA); p2 = pBuffer + nContent; pEOB = pBuffer + nBuffer; } } } void UCS::String::from(const int* s,int n) { upsize(n); memcpy(pBuffer,s,n * sizeof(int)); nContent = n; } void UTF8::String::from(const char* s) { int n = strlen(s); upsize(n + sizeof(long long)); // Allow for over-run on bad codes. *(long long*)(pBuffer+n) = 0; // Prevent over-run from disclosing prior data (however improbable). strcpy(pBuffer,s); nContent = n; } void UTF8::String::recode() { // Iterate until all UTF8 characters are normalized. // UTF8 in canonical form can only be smaller, so work in-place. char* p1 = pBuffer; char* p2 = pBuffer; char* pEOS = pBuffer + nContent; while (p1 < pEOS) { int c = 255 & *p1++; if (c < 0x80) { *p2++ = c; continue; } if (c < 0xE0) { c = (31 & c) << 6; c |= 63 & *p1++; } else if (c < 0xF0) { c = (15 & c) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; } else if (c < 0xF8) { c = (7 & c) << 18; c |= (63 & *p1++) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; } else if (c < 0xFC) { c = (3 & c) << 24; c |= (63 & *p1++) << 18; c |= (63 & *p1++) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; } else { c = (1 & c) << 30; c |= (63 & *p1++) << 24; c |= (63 & *p1++) << 18; c |= (63 & *p1++) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; } if (c < 0x80) { *p2++ = c; } else if (c < 0x800) { *p2++ = 0xC0 | (c >> 6); *p2++ = 0x80 | (63 & c); } else if (c < 0x10000) { *p2++ = 0xE0 | (c >> 12); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } else if (c < 0x200000) { *p2++ = 0xF0 | (c >> 18); *p2++ = 0x80 | (63 & (c >> 12)); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } else if (c < 0x4000000) { *p2++ = 0xF8 | (c >> 24); *p2++ = 0x80 | (63 & (c >> 18)); *p2++ = 0x80 | (63 & (c >> 12)); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } else { *p2++ = 0xFC | (1 & (c >> 30)); *p2++ = 0x80 | (63 & (c >> 24)); *p2++ = 0x80 | (63 & (c >> 18)); *p2++ = 0x80 | (63 & (c >> 12)); *p2++ = 0x80 | (63 & (c >> 6)); *p2++ = 0x80 | (63 & c); } } nContent = (int) (p2 - pBuffer); } void UTF8::String::convertTo(UCS::String& s) const { s.setLength(0); // Most likely guess for a minimum-sized UCS buffer. s.upsize(length() / 2); // Iterate until all UTF8 characters are translated. char* p1 = pBuffer; char* pEOS = p1 + nContent; int* p2 = s.asChars(); int* pEOB = p2 + s.nBuffer; while (p1 < pEOS) { char* pCheck = p1 + (pEOB - p2); if (pEOS < pCheck) { pCheck = pEOS; } while (p1 < pCheck) { int c = 255 & *p1++; if (c < 0x80) { *p2++ = c; continue; } if (c < 0xE0) { c = (31 & c) << 6; c |= 63 & *p1++; *p2++ = c; continue; } if (c < 0xF0) { c = (15 & c) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; *p2++ = c; continue; } if (c < 0xF8) { c = (7 & c) << 18; c |= (63 & *p1++) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; *p2++ = c; continue; } if (c < 0xFC) { c = (3 & c) << 24; c |= (63 & *p1++) << 18; c |= (63 & *p1++) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; *p2++ = c; continue; } c = (1 & c) << 30; c |= (63 & *p1++) << 24; c |= (63 & *p1++) << 18; c |= (63 & *p1++) << 12; c |= (63 & *p1++) << 6; c |= 63 & *p1++; *p2++ = c; } s.setLength(p2 - s.asChars()); if (p1 < pEOS) { s.upsize(s.length() + QUANTA); p2 = s.asChars() + s.length(); pEOB = s.asChars() + s.nBuffer; } } } int UTF8::String::compare(const String& s) const { int n = min(nContent,s.nContent); for (int i = 0; i < n; ++i) { int c1 = 255 & pBuffer[i]; int c2 = 255 & s.pBuffer[i]; int v = c1 - c2; if (0 != v) { return v; } } return nContent - s.nContent; } int UCS::String::compare(const String& s) const { int n = min(nContent,s.nContent); for (int i = 0; i < n; ++i) { int c1 = pBuffer[i]; int c2 = s.pBuffer[i]; int v = c1 - c2; if (0 != v) { return v; } } return nContent - s.nContent; }