1
0
mirror of https://gitlab.gnome.org/GNOME/libxml2.git synced 2025-07-30 22:43:14 +03:00

fixed problem with comments reported by Nick Kew added routines

* HTMLparser.c: fixed problem with comments reported by Nick Kew
* encoding.c: added routines xmlUTF8Size and xmlUTF8Charcmp for
  some future cleanup of UTF8 handling
This commit is contained in:
William M. Brack
2003-07-29 04:28:04 +00:00
parent 9deb242b55
commit 4a557d97bf
4 changed files with 59 additions and 5 deletions

View File

@ -84,6 +84,53 @@ static int xmlLittleEndian = 1;
* *
************************************************************************/
/**
* xmlUTF8Size:
* @utf: pointer to the UTF8 character
*
* returns the numbers of bytes in the character, -1 on format error
*/
int
xmlUTF8Size(const xmlChar *utf) {
xmlChar mask;
int len;
if (utf == NULL)
return -1;
if (*utf < 0x80)
return 1;
/* check valid UTF8 character */
if (!(*utf & 0x40))
return -1;
/* determine number of bytes in char */
len = 2;
for (mask=0x20; mask != 0; mask>>=1) {
if (!(*utf & mask))
return len;
len++;
}
return -1;
}
/**
* xmlUTF8Charcmp
* @utf1: pointer to first UTF8 char
* @utf2: pointer to second UTF8 char
*
* returns result of comparing the two UCS4 values
* as with xmlStrncmp
*/
int
xmlUTF8Charcmp(const xmlChar *utf1, const xmlChar *utf2) {
if (utf1 == NULL ) {
if (utf2 == NULL)
return 0;
return -1;
}
return xmlStrncmp(utf1, utf2, xsltUTF8Size(utf1));
}
/**
* xmlUTF8Strlen:
* @utf: a sequence of UTF-8 encoded bytes