X-Git-Url: http://www.project-moonshot.org/gitweb/?a=blobdiff_plain;f=src%2Futf.c;h=9f4cb937a0ae663ee8471fe1e0a1c195eabf38b4;hb=7e8b128740336bad50e32bbe9dc86f47b406ce6a;hp=092959d6e21b41f8b2c1ff3f5063ae4abd0d43e6;hpb=902bcdaa5e0d0273b1ba576c2fd676a5565b6d5e;p=jansson.git diff --git a/src/utf.c b/src/utf.c index 092959d..9f4cb93 100644 --- a/src/utf.c +++ b/src/utf.c @@ -1,4 +1,48 @@ +/* + * Copyright (c) 2009, 2010 Petri Lehtinen + * + * Jansson is free software; you can redistribute it and/or modify + * it under the terms of the MIT license. See LICENSE for details. + */ + #include +#include + +int utf8_encode(int32_t codepoint, char *buffer, int *size) +{ + if(codepoint < 0) + return -1; + else if(codepoint < 0x80) + { + buffer[0] = (char)codepoint; + *size = 1; + } + else if(codepoint < 0x800) + { + buffer[0] = 0xC0 + ((codepoint & 0x7C0) >> 6); + buffer[1] = 0x80 + ((codepoint & 0x03F)); + *size = 2; + } + else if(codepoint < 0x10000) + { + buffer[0] = 0xE0 + ((codepoint & 0xF000) >> 12); + buffer[1] = 0x80 + ((codepoint & 0x0FC0) >> 6); + buffer[2] = 0x80 + ((codepoint & 0x003F)); + *size = 3; + } + else if(codepoint <= 0x10FFFF) + { + buffer[0] = 0xF0 + ((codepoint & 0x1C0000) >> 18); + buffer[1] = 0x80 + ((codepoint & 0x03F000) >> 12); + buffer[2] = 0x80 + ((codepoint & 0x000FC0) >> 6); + buffer[3] = 0x80 + ((codepoint & 0x00003F)); + *size = 4; + } + else + return -1; + + return 0; +} int utf8_check_first(char byte) { @@ -36,9 +80,10 @@ int utf8_check_first(char byte) } } -int utf8_check_full(const char *buffer, int size) +int utf8_check_full(const char *buffer, int size, int32_t *codepoint) { - int i, value = 0; + int i; + int32_t value = 0; unsigned char u = (unsigned char)buffer[0]; if(size == 2) @@ -85,9 +130,38 @@ int utf8_check_full(const char *buffer, int size) return 0; } + if(codepoint) + *codepoint = value; + return 1; } +const char *utf8_iterate(const char *buffer, int32_t *codepoint) +{ + int count; + int32_t value; + + if(!*buffer) + return buffer; + + count = utf8_check_first(buffer[0]); + if(count <= 0) + return NULL; + + if(count == 1) + value = (unsigned char)buffer[0]; + else + { + if(!utf8_check_full(buffer, count, &value)) + return NULL; + } + + if(codepoint) + *codepoint = value; + + return buffer + count; +} + int utf8_check_string(const char *string, int length) { int i; @@ -105,7 +179,7 @@ int utf8_check_string(const char *string, int length) if(i + count > length) return 0; - if(!utf8_check_full(&string[i], count)) + if(!utf8_check_full(&string[i], count, NULL)) return 0; i += count - 1;