1
0
mirror of https://git.savannah.gnu.org/git/gnulib.git synced 2025-08-18 23:42:00 +03:00

New module 'mbscasecmp'. strcasecmp is not expected to work with multibyte

strings.
This commit is contained in:
Bruno Haible
2007-02-05 01:57:07 +00:00
parent 521d8b2939
commit 623e3b9edb
7 changed files with 65 additions and 78 deletions

View File

@@ -1,3 +1,20 @@
2007-02-04 Bruno Haible <bruno@clisp.org>
New module mbscasecmp, reduced goal of strcasecmp.
* modules/mbscasecmp: New file.
* lib/mbscasecmp.c: New file, copied from lib/strcasecmp.c.
(mbscasecmp): Renamed from strcasecmp.
* lib/strcasecmp.c: Don't include mbuiter.h.
(strcasecmp): Remove support for multibyte locales.
* lib/string_.h (strcasecmp): Don`t rename. Declare only if missing.
Change the conditional link warning.
(mbscasecmp): New declaration.
* m4/mbscasecmp.m4: New file.
* m4/string_h.m4 (gl_STRING_MODULE_INDICATOR_DEFAULTS): Initialize
GNULIB_MBSCASECMP.
* modules/string (string.h): Also substitute GNULIB_MBSCASECMP.
* MODULES.html.sh (Internationalization functions): Add mbscasecmp.
2007-02-04 Bruno Haible <bruno@clisp.org> 2007-02-04 Bruno Haible <bruno@clisp.org>
New module mbsstr. Remove module strstr. New module mbsstr. Remove module strstr.

View File

@@ -2163,6 +2163,7 @@ func_all_modules ()
func_module mbschr func_module mbschr
func_module mbsrchr func_module mbsrchr
func_module mbsstr func_module mbsstr
func_module mbscasecmp
func_module mbswidth func_module mbswidth
func_module memcasecmp func_module memcasecmp
func_module memcoll func_module memcoll

View File

@@ -31,13 +31,13 @@
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or /* Compare the character strings S1 and S2, ignoring case, returning less than,
greater than zero if S1 is lexicographically less than, equal to or greater equal to or greater than zero if S1 is lexicographically less than, equal to
than S2. or greater than S2.
Note: This function may, in multibyte locales, return 0 for strings of Note: This function may, in multibyte locales, return 0 for strings of
different lengths! */ different lengths! */
int int
strcasecmp (const char *s1, const char *s2) mbscasecmp (const char *s1, const char *s2)
{ {
if (s1 == s2) if (s1 == s2)
return 0; return 0;

View File

@@ -1,7 +1,5 @@
/* Case-insensitive string comparison function. /* Case-insensitive string comparison function.
Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc. Copyright (C) 1998-1999, 2005-2007 Free Software Foundation, Inc.
Written by Bruno Haible <bruno@clisp.org>, 2005,
based on earlier glibc code.
This program is free software; you can redistribute it and/or modify This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by it under the terms of the GNU General Public License as published by
@@ -25,79 +23,41 @@
#include <ctype.h> #include <ctype.h>
#include <limits.h> #include <limits.h>
#if HAVE_MBRTOWC
# include "mbuiter.h"
#endif
#define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch)) #define TOLOWER(Ch) (isupper (Ch) ? tolower (Ch) : (Ch))
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater greater than zero if S1 is lexicographically less than, equal to or greater
than S2. than S2.
Note: This function may, in multibyte locales, return 0 for strings of Note: This function does not work with multibyte strings! */
different lengths! */
int int
strcasecmp (const char *s1, const char *s2) strcasecmp (const char *s1, const char *s2)
{ {
if (s1 == s2) const unsigned char *p1 = (const unsigned char *) s1;
const unsigned char *p2 = (const unsigned char *) s2;
unsigned char c1, c2;
if (p1 == p2)
return 0; return 0;
/* Be careful not to look at the entire extent of s1 or s2 until needed. do
This is useful because when two strings differ, the difference is
most often already in the very few first characters. */
#if HAVE_MBRTOWC
if (MB_CUR_MAX > 1)
{ {
mbui_iterator_t iter1; c1 = TOLOWER (*p1);
mbui_iterator_t iter2; c2 = TOLOWER (*p2);
mbui_init (iter1, s1); if (c1 == '\0')
mbui_init (iter2, s2); break;
while (mbui_avail (iter1) && mbui_avail (iter2)) ++p1;
{ ++p2;
int cmp = mb_casecmp (mbui_cur (iter1), mbui_cur (iter2));
if (cmp != 0)
return cmp;
mbui_advance (iter1);
mbui_advance (iter2);
}
if (mbui_avail (iter1))
/* s2 terminated before s1. */
return 1;
if (mbui_avail (iter2))
/* s1 terminated before s2. */
return -1;
return 0;
} }
while (c1 == c2);
if (UCHAR_MAX <= INT_MAX)
return c1 - c2;
else else
#endif /* On machines where 'char' and 'int' are types of the same size, the
{ difference of two 'unsigned char' values - including the sign bit -
const unsigned char *p1 = (const unsigned char *) s1; doesn't fit in an 'int'. */
const unsigned char *p2 = (const unsigned char *) s2; return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
unsigned char c1, c2;
do
{
c1 = TOLOWER (*p1);
c2 = TOLOWER (*p2);
if (c1 == '\0')
break;
++p1;
++p2;
}
while (c1 == c2);
if (UCHAR_MAX <= INT_MAX)
return c1 - c2;
else
/* On machines where 'char' and 'int' are types of the same size, the
difference of two 'unsigned char' values - including the sign bit -
doesn't fit in an 'int'. */
return (c1 > c2 ? 1 : c1 < c2 ? -1 : 0);
}
} }

View File

@@ -115,20 +115,17 @@ extern char *stpncpy (char *restrict __dst, char const *restrict __src,
/* Compare strings S1 and S2, ignoring case, returning less than, equal to or /* Compare strings S1 and S2, ignoring case, returning less than, equal to or
greater than zero if S1 is lexicographically less than, equal to or greater greater than zero if S1 is lexicographically less than, equal to or greater
than S2. than S2.
Note: This function may, in multibyte locales, return 0 for strings of Note: This function does not work in multibyte locales. */
different lengths! #if ! @HAVE_STRCASECMP@
No known system has a strcasecmp() function that works correctly in extern int strcasecmp (char const *s1, char const *s2);
multibyte locales. Therefore use our version always, if the #endif
strcase module is available. */ #if defined GNULIB_POSIXCHECK
#if @GNULIB_STRCASE@ /* strcasecmp() does not work with multibyte strings:
# if @REPLACE_STRCASECMP@ POSIX says that it operates on "strings", and "string" in POSIX is defined
# define strcasecmp rpl_strcasecmp as a sequence of bytes, not of characters. */
extern int strcasecmp (char const *__s1, char const *__s2);
# endif
#elif defined GNULIB_POSIXCHECK
# undef strcasecmp # undef strcasecmp
# define strcasecmp(a,b) \ # define strcasecmp(a,b) \
(GL_LINK_WARNING ("strcasecmp is often incorrectly implemented for multibyte locales - use gnulib module 'strcase' for correct and portable internationalization"), \ (GL_LINK_WARNING ("strcasecmp cannot work correctly on character strings in multibyte locales - use mbscasecmp if you care about internationalization, or use c_strcasecmp (from gnulib module c-strcase) if you want a locale independent function"), \
strcasecmp (a, b)) strcasecmp (a, b))
#endif #endif
@@ -337,6 +334,16 @@ extern char * mbsrchr (const char *string, int c);
extern char * mbsstr (const char *haystack, const char *needle); extern char * mbsstr (const char *haystack, const char *needle);
#endif #endif
#if @GNULIB_MBSCASECMP@
/* Compare the character strings S1 and S2, ignoring case, returning less than,
equal to or greater than zero if S1 is lexicographically less than, equal to
or greater than S2.
Note: This function may, in multibyte locales, return 0 for strings of
different lengths!
Unlike strcasecmp(), this function works correctly in multibyte locales. */
extern int mbscasecmp (const char *s1, const char *s2);
#endif
#ifdef __cplusplus #ifdef __cplusplus
} }

View File

@@ -71,4 +71,5 @@ AC_DEFUN([gl_STRING_MODULE_INDICATOR_DEFAULTS],
GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR]) GNULIB_MBSCHR=0; AC_SUBST([GNULIB_MBSCHR])
GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR]) GNULIB_MBSRCHR=0; AC_SUBST([GNULIB_MBSRCHR])
GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR]) GNULIB_MBSSTR=0; AC_SUBST([GNULIB_MBSSTR])
GNULIB_MBSCASECMP=0; AC_SUBST([GNULIB_MBSCASECMP])
]) ])

View File

@@ -24,6 +24,7 @@ string.h: string_.h
-e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \ -e 's|@''GNULIB_MBSCHR''@|$(GNULIB_MBSCHR)|g' \
-e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \ -e 's|@''GNULIB_MBSRCHR''@|$(GNULIB_MBSRCHR)|g' \
-e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \ -e 's|@''GNULIB_MBSSTR''@|$(GNULIB_MBSSTR)|g' \
-e 's|@''GNULIB_MBSCASECMP''@|$(GNULIB_MBSCASECMP)|g' \
-e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \ -e 's|@''GNULIB_MEMMEM''@|$(GNULIB_MEMMEM)|g' \
-e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \ -e 's|@''GNULIB_MEMPCPY''@|$(GNULIB_MEMPCPY)|g' \
-e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \ -e 's|@''GNULIB_MEMRCHR''@|$(GNULIB_MEMRCHR)|g' \