mirror of
				https://sourceware.org/git/glibc.git
				synced 2025-11-03 20:53:13 +03:00 
			
		
		
		
	
		
			
				
	
	
		
			834 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			834 lines
		
	
	
		
			21 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* idna.c	Convert to or from IDN strings.
 | 
						|
 * Copyright (C) 2002, 2003, 2004, 2011  Simon Josefsson
 | 
						|
 *
 | 
						|
 * This file is part of GNU Libidn.
 | 
						|
 *
 | 
						|
 * GNU Libidn is free software; you can redistribute it and/or
 | 
						|
 * modify it under the terms of the GNU Lesser General Public
 | 
						|
 * License as published by the Free Software Foundation; either
 | 
						|
 * version 2.1 of the License, or (at your option) any later version.
 | 
						|
 *
 | 
						|
 * GNU Libidn is distributed in the hope that it will be useful,
 | 
						|
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
						|
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 | 
						|
 * Lesser General Public License for more details.
 | 
						|
 *
 | 
						|
 * You should have received a copy of the GNU Lesser General Public
 | 
						|
 * License along with GNU Libidn; if not, see <http://www.gnu.org/licenses/>.
 | 
						|
 */
 | 
						|
 | 
						|
#if HAVE_CONFIG_H
 | 
						|
# include "config.h"
 | 
						|
#endif
 | 
						|
 | 
						|
#include <stdlib.h>
 | 
						|
#include <string.h>
 | 
						|
#include <stringprep.h>
 | 
						|
#include <punycode.h>
 | 
						|
 | 
						|
#include "idna.h"
 | 
						|
 | 
						|
#define DOTP(c) ((c) == 0x002E || (c) == 0x3002 ||	\
 | 
						|
		 (c) == 0xFF0E || (c) == 0xFF61)
 | 
						|
 | 
						|
/* Core functions */
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_ascii_4i
 | 
						|
 * @in: input array with unicode code points.
 | 
						|
 * @inlen: length of input array with unicode code points.
 | 
						|
 * @out: output zero terminated string that must have room for at
 | 
						|
 *       least 63 characters plus the terminating zero.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * The ToASCII operation takes a sequence of Unicode code points that make
 | 
						|
 * up one label and transforms it into a sequence of code points in the
 | 
						|
 * ASCII range (0..7F). If ToASCII succeeds, the original sequence and the
 | 
						|
 * resulting sequence are equivalent labels.
 | 
						|
 *
 | 
						|
 * It is important to note that the ToASCII operation can fail. ToASCII
 | 
						|
 * fails if any step of it fails. If any step of the ToASCII operation
 | 
						|
 * fails on any label in a domain name, that domain name MUST NOT be used
 | 
						|
 * as an internationalized domain name. The method for deadling with this
 | 
						|
 * failure is application-specific.
 | 
						|
 *
 | 
						|
 * The inputs to ToASCII are a sequence of code points, the AllowUnassigned
 | 
						|
 * flag, and the UseSTD3ASCIIRules flag. The output of ToASCII is either a
 | 
						|
 * sequence of ASCII code points or a failure condition.
 | 
						|
 *
 | 
						|
 * ToASCII never alters a sequence of code points that are all in the ASCII
 | 
						|
 * range to begin with (although it could fail). Applying the ToASCII
 | 
						|
 * operation multiple times has exactly the same effect as applying it just
 | 
						|
 * once.
 | 
						|
 *
 | 
						|
 * Return value: Returns 0 on success, or an error code.
 | 
						|
 */
 | 
						|
int
 | 
						|
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
 | 
						|
{
 | 
						|
  size_t len, outlen;
 | 
						|
  uint32_t *src;		/* XXX don't need to copy data? */
 | 
						|
  int rc;
 | 
						|
 | 
						|
  /*
 | 
						|
   * ToASCII consists of the following steps:
 | 
						|
   *
 | 
						|
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
 | 
						|
   * then skip to step 3.
 | 
						|
   */
 | 
						|
 | 
						|
  {
 | 
						|
    size_t i;
 | 
						|
    int inasciirange;
 | 
						|
 | 
						|
    inasciirange = 1;
 | 
						|
    for (i = 0; i < inlen; i++)
 | 
						|
      if (in[i] > 0x7F)
 | 
						|
	inasciirange = 0;
 | 
						|
    if (inasciirange)
 | 
						|
      {
 | 
						|
	src = malloc (sizeof (in[0]) * (inlen + 1));
 | 
						|
	if (src == NULL)
 | 
						|
	  return IDNA_MALLOC_ERROR;
 | 
						|
 | 
						|
	memcpy (src, in, sizeof (in[0]) * inlen);
 | 
						|
	src[inlen] = 0;
 | 
						|
 | 
						|
	goto step3;
 | 
						|
      }
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
 | 
						|
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
 | 
						|
   */
 | 
						|
 | 
						|
  {
 | 
						|
    char *p;
 | 
						|
 | 
						|
    p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
 | 
						|
    if (p == NULL)
 | 
						|
      return IDNA_MALLOC_ERROR;
 | 
						|
 | 
						|
    len = strlen (p);
 | 
						|
    do
 | 
						|
      {
 | 
						|
	char *newp;
 | 
						|
 | 
						|
	len = 2 * len + 10;	/* XXX better guess? */
 | 
						|
	newp = realloc (p, len);
 | 
						|
	if (newp == NULL)
 | 
						|
	  {
 | 
						|
	    free (p);
 | 
						|
	    return IDNA_MALLOC_ERROR;
 | 
						|
	  }
 | 
						|
	p = newp;
 | 
						|
 | 
						|
	if (flags & IDNA_ALLOW_UNASSIGNED)
 | 
						|
	  rc = stringprep_nameprep (p, len);
 | 
						|
	else
 | 
						|
	  rc = stringprep_nameprep_no_unassigned (p, len);
 | 
						|
      }
 | 
						|
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);
 | 
						|
 | 
						|
    if (rc != STRINGPREP_OK)
 | 
						|
      {
 | 
						|
	free (p);
 | 
						|
	return IDNA_STRINGPREP_ERROR;
 | 
						|
      }
 | 
						|
 | 
						|
    src = stringprep_utf8_to_ucs4 (p, -1, NULL);
 | 
						|
 | 
						|
    free (p);
 | 
						|
  }
 | 
						|
 | 
						|
step3:
 | 
						|
  /*
 | 
						|
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
 | 
						|
   *
 | 
						|
   * (a) Verify the absence of non-LDH ASCII code points; that is,
 | 
						|
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
 | 
						|
   *
 | 
						|
   * (b) Verify the absence of leading and trailing hyphen-minus;
 | 
						|
   * that is, the absence of U+002D at the beginning and end of
 | 
						|
   * the sequence.
 | 
						|
   */
 | 
						|
 | 
						|
  if (flags & IDNA_USE_STD3_ASCII_RULES)
 | 
						|
    {
 | 
						|
      size_t i;
 | 
						|
 | 
						|
      for (i = 0; src[i]; i++)
 | 
						|
	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
 | 
						|
	    (src[i] >= 0x3A && src[i] <= 0x40) ||
 | 
						|
	    (src[i] >= 0x5B && src[i] <= 0x60) ||
 | 
						|
	    (src[i] >= 0x7B && src[i] <= 0x7F))
 | 
						|
	  {
 | 
						|
	    free (src);
 | 
						|
	    return IDNA_CONTAINS_NON_LDH;
 | 
						|
	  }
 | 
						|
 | 
						|
      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
 | 
						|
	{
 | 
						|
	  free (src);
 | 
						|
	  return IDNA_CONTAINS_MINUS;
 | 
						|
	}
 | 
						|
    }
 | 
						|
 | 
						|
  /*
 | 
						|
   * 4. If all code points in the sequence are in the ASCII range
 | 
						|
   * (0..7F), then skip to step 8.
 | 
						|
   */
 | 
						|
 | 
						|
  {
 | 
						|
    size_t i;
 | 
						|
    int inasciirange;
 | 
						|
 | 
						|
    inasciirange = 1;
 | 
						|
    for (i = 0; src[i]; i++)
 | 
						|
      {
 | 
						|
	if (src[i] > 0x7F)
 | 
						|
	  inasciirange = 0;
 | 
						|
	/* copy string to output buffer if we are about to skip to step8 */
 | 
						|
	if (i < 64)
 | 
						|
	  out[i] = src[i];
 | 
						|
      }
 | 
						|
    if (i < 64)
 | 
						|
      out[i] = '\0';
 | 
						|
    if (inasciirange)
 | 
						|
      goto step8;
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
 | 
						|
   *
 | 
						|
   */
 | 
						|
 | 
						|
  {
 | 
						|
    size_t i;
 | 
						|
    int match;
 | 
						|
 | 
						|
    match = 1;
 | 
						|
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
 | 
						|
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
 | 
						|
	match = 0;
 | 
						|
    if (match)
 | 
						|
      {
 | 
						|
	free (src);
 | 
						|
	return IDNA_CONTAINS_ACE_PREFIX;
 | 
						|
      }
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
 | 
						|
   * and fail if there is an error.
 | 
						|
   */
 | 
						|
  for (len = 0; src[len]; len++)
 | 
						|
    ;
 | 
						|
  src[len] = '\0';
 | 
						|
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
 | 
						|
  rc = punycode_encode (len, src, NULL,
 | 
						|
			&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
 | 
						|
  if (rc != PUNYCODE_SUCCESS)
 | 
						|
    {
 | 
						|
      free (src);
 | 
						|
      return IDNA_PUNYCODE_ERROR;
 | 
						|
    }
 | 
						|
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
 | 
						|
 | 
						|
  /*
 | 
						|
   * 7. Prepend the ACE prefix.
 | 
						|
   */
 | 
						|
 | 
						|
  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
 | 
						|
 | 
						|
  /*
 | 
						|
   * 8. Verify that the number of code points is in the range 1 to 63
 | 
						|
   * inclusive (0 is excluded).
 | 
						|
   */
 | 
						|
 | 
						|
step8:
 | 
						|
  free (src);
 | 
						|
  if (strlen (out) < 1 || strlen (out) > 63)
 | 
						|
    return IDNA_INVALID_LENGTH;
 | 
						|
 | 
						|
  return IDNA_SUCCESS;
 | 
						|
}
 | 
						|
 | 
						|
/* ToUnicode().  May realloc() utf8in. */
 | 
						|
static int
 | 
						|
idna_to_unicode_internal (char *utf8in,
 | 
						|
			  uint32_t * out, size_t * outlen, int flags)
 | 
						|
{
 | 
						|
  int rc;
 | 
						|
  char tmpout[64];
 | 
						|
  size_t utf8len = strlen (utf8in) + 1;
 | 
						|
  size_t addlen = 0;
 | 
						|
 | 
						|
  /*
 | 
						|
   * ToUnicode consists of the following steps:
 | 
						|
   *
 | 
						|
   * 1. If the sequence contains any code points outside the ASCII range
 | 
						|
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
 | 
						|
   */
 | 
						|
 | 
						|
  {
 | 
						|
    size_t i;
 | 
						|
    int inasciirange;
 | 
						|
 | 
						|
    inasciirange = 1;
 | 
						|
    for (i = 0; utf8in[i]; i++)
 | 
						|
      if (utf8in[i] & ~0x7F)
 | 
						|
	inasciirange = 0;
 | 
						|
    if (inasciirange)
 | 
						|
      goto step3;
 | 
						|
  }
 | 
						|
 | 
						|
  /*
 | 
						|
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
 | 
						|
   * error. (If step 3 of ToASCII is also performed here, it will not
 | 
						|
   * affect the overall behavior of ToUnicode, but it is not
 | 
						|
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
 | 
						|
   */
 | 
						|
  do
 | 
						|
    {
 | 
						|
      char *newp = realloc (utf8in, utf8len + addlen);
 | 
						|
      if (newp == NULL)
 | 
						|
	{
 | 
						|
	  free (utf8in);
 | 
						|
	  return IDNA_MALLOC_ERROR;
 | 
						|
	}
 | 
						|
      utf8in = newp;
 | 
						|
      if (flags & IDNA_ALLOW_UNASSIGNED)
 | 
						|
	rc = stringprep_nameprep (utf8in, utf8len + addlen);
 | 
						|
      else
 | 
						|
	rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
 | 
						|
      addlen += 1;
 | 
						|
    }
 | 
						|
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);
 | 
						|
 | 
						|
  if (rc != STRINGPREP_OK)
 | 
						|
    {
 | 
						|
      free (utf8in);
 | 
						|
      return IDNA_STRINGPREP_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
  /* 3. Verify that the sequence begins with the ACE prefix, and save a
 | 
						|
   * copy of the sequence.
 | 
						|
   */
 | 
						|
 | 
						|
step3:
 | 
						|
  if (memcmp (IDNA_ACE_PREFIX, utf8in, strlen (IDNA_ACE_PREFIX)) != 0)
 | 
						|
    {
 | 
						|
      free (utf8in);
 | 
						|
      return IDNA_NO_ACE_PREFIX;
 | 
						|
    }
 | 
						|
 | 
						|
  /* 4. Remove the ACE prefix.
 | 
						|
   */
 | 
						|
 | 
						|
  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
 | 
						|
	   strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
 | 
						|
 | 
						|
  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
 | 
						|
   * and fail if there is an error. Save a copy of the result of
 | 
						|
   * this step.
 | 
						|
   */
 | 
						|
 | 
						|
  (*outlen)--;			/* reserve one for the zero */
 | 
						|
 | 
						|
  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
 | 
						|
  if (rc != PUNYCODE_SUCCESS)
 | 
						|
    {
 | 
						|
      free (utf8in);
 | 
						|
      return IDNA_PUNYCODE_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
  out[*outlen] = 0;		/* add zero */
 | 
						|
 | 
						|
  /* 6. Apply ToASCII.
 | 
						|
   */
 | 
						|
 | 
						|
  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
 | 
						|
  if (rc != IDNA_SUCCESS)
 | 
						|
    {
 | 
						|
      free (utf8in);
 | 
						|
      return rc;
 | 
						|
    }
 | 
						|
 | 
						|
  /* 7. Verify that the result of step 6 matches the saved copy from
 | 
						|
   * step 3, using a case-insensitive ASCII comparison.
 | 
						|
   */
 | 
						|
 | 
						|
  if (strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
 | 
						|
    {
 | 
						|
      free (utf8in);
 | 
						|
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
 | 
						|
    }
 | 
						|
 | 
						|
  /* 8. Return the saved copy from step 5.
 | 
						|
   */
 | 
						|
 | 
						|
  free (utf8in);
 | 
						|
  return IDNA_SUCCESS;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_unicode_44i
 | 
						|
 * @in: input array with unicode code points.
 | 
						|
 * @inlen: length of input array with unicode code points.
 | 
						|
 * @out: output array with unicode code points.
 | 
						|
 * @outlen: on input, maximum size of output array with unicode code points,
 | 
						|
 *          on exit, actual size of output array with unicode code points.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * The ToUnicode operation takes a sequence of Unicode code points
 | 
						|
 * that make up one label and returns a sequence of Unicode code
 | 
						|
 * points. If the input sequence is a label in ACE form, then the
 | 
						|
 * result is an equivalent internationalized label that is not in ACE
 | 
						|
 * form, otherwise the original sequence is returned unaltered.
 | 
						|
 *
 | 
						|
 * ToUnicode never fails. If any step fails, then the original input
 | 
						|
 * sequence is returned immediately in that step.
 | 
						|
 *
 | 
						|
 * The Punycode decoder can never output more code points than it
 | 
						|
 * inputs, but Nameprep can, and therefore ToUnicode can.  Note that
 | 
						|
 * the number of octets needed to represent a sequence of code points
 | 
						|
 * depends on the particular character encoding used.
 | 
						|
 *
 | 
						|
 * The inputs to ToUnicode are a sequence of code points, the
 | 
						|
 * AllowUnassigned flag, and the UseSTD3ASCIIRules flag. The output of
 | 
						|
 * ToUnicode is always a sequence of Unicode code points.
 | 
						|
 *
 | 
						|
 * Return value: Returns error condition, but it must only be used for
 | 
						|
 *               debugging purposes.  The output buffer is always
 | 
						|
 *               guaranteed to contain the correct data according to
 | 
						|
 *               the specification (sans malloc induced errors).  NB!
 | 
						|
 *               This means that you normally ignore the return code
 | 
						|
 *               from this function, as checking it means breaking the
 | 
						|
 *               standard.
 | 
						|
 */
 | 
						|
int
 | 
						|
idna_to_unicode_44i (const uint32_t * in, size_t inlen,
 | 
						|
		     uint32_t * out, size_t * outlen, int flags)
 | 
						|
{
 | 
						|
  int rc;
 | 
						|
  size_t outlensave = *outlen;
 | 
						|
  char *p;
 | 
						|
 | 
						|
  p = stringprep_ucs4_to_utf8 (in, inlen, NULL, NULL);
 | 
						|
  if (p == NULL)
 | 
						|
    return IDNA_MALLOC_ERROR;
 | 
						|
 | 
						|
  rc = idna_to_unicode_internal (p, out, outlen, flags);
 | 
						|
  if (rc != IDNA_SUCCESS)
 | 
						|
    {
 | 
						|
      memcpy (out, in, sizeof (in[0]) * (inlen < outlensave ?
 | 
						|
					 inlen : outlensave));
 | 
						|
      *outlen = inlen;
 | 
						|
    }
 | 
						|
 | 
						|
  /* p is freed in idna_to_unicode_internal.  */
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/* Wrappers that handle several labels */
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_ascii_4z:
 | 
						|
 * @input: zero terminated input Unicode string.
 | 
						|
 * @output: pointer to newly allocated output string.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert UCS-4 domain name to ASCII string.  The domain name may
 | 
						|
 * contain several labels, separated by dots.  The output buffer must
 | 
						|
 * be deallocated by the caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
 | 
						|
{
 | 
						|
  const uint32_t *start = input;
 | 
						|
  const uint32_t *end = input;
 | 
						|
  char buf[64];
 | 
						|
  char *out = NULL;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  /* 1) Whenever dots are used as label separators, the following
 | 
						|
     characters MUST be recognized as dots: U+002E (full stop),
 | 
						|
     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
 | 
						|
     U+FF61 (halfwidth ideographic full stop). */
 | 
						|
 | 
						|
  if (input[0] == 0)
 | 
						|
    {
 | 
						|
      /* Handle implicit zero-length root label. */
 | 
						|
      *output = malloc (1);
 | 
						|
      if (!*output)
 | 
						|
	return IDNA_MALLOC_ERROR;
 | 
						|
      strcpy (*output, "");
 | 
						|
      return IDNA_SUCCESS;
 | 
						|
    }
 | 
						|
 | 
						|
  if (DOTP (input[0]) && input[1] == 0)
 | 
						|
    {
 | 
						|
      /* Handle explicit zero-length root label. */
 | 
						|
      *output = malloc (2);
 | 
						|
      if (!*output)
 | 
						|
	return IDNA_MALLOC_ERROR;
 | 
						|
      strcpy (*output, ".");
 | 
						|
      return IDNA_SUCCESS;
 | 
						|
    }
 | 
						|
 | 
						|
  *output = NULL;
 | 
						|
  do
 | 
						|
    {
 | 
						|
      end = start;
 | 
						|
 | 
						|
      for (; *end && !DOTP (*end); end++)
 | 
						|
	;
 | 
						|
 | 
						|
      if (*end == '\0' && start == end)
 | 
						|
	{
 | 
						|
	  /* Handle explicit zero-length root label. */
 | 
						|
	  buf[0] = '\0';
 | 
						|
	}
 | 
						|
      else
 | 
						|
	{
 | 
						|
	  rc = idna_to_ascii_4i (start, end - start, buf, flags);
 | 
						|
	  if (rc != IDNA_SUCCESS)
 | 
						|
	    return rc;
 | 
						|
	}
 | 
						|
 | 
						|
      if (out)
 | 
						|
	{
 | 
						|
	  char *newp = realloc (out, strlen (out) + 1 + strlen (buf) + 1);
 | 
						|
	  if (!newp)
 | 
						|
	    {
 | 
						|
	      free (out);
 | 
						|
	      return IDNA_MALLOC_ERROR;
 | 
						|
	    }
 | 
						|
	  out = newp;
 | 
						|
	  strcat (out, ".");
 | 
						|
	  strcat (out, buf);
 | 
						|
	}
 | 
						|
      else
 | 
						|
	{
 | 
						|
	  out = (char *) malloc (strlen (buf) + 1);
 | 
						|
	  if (!out)
 | 
						|
	    return IDNA_MALLOC_ERROR;
 | 
						|
	  strcpy (out, buf);
 | 
						|
	}
 | 
						|
 | 
						|
      start = end + 1;
 | 
						|
    }
 | 
						|
  while (*end);
 | 
						|
 | 
						|
  *output = out;
 | 
						|
 | 
						|
  return IDNA_SUCCESS;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_ascii_8z:
 | 
						|
 * @input: zero terminated input UTF-8 string.
 | 
						|
 * @output: pointer to newly allocated output string.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert UTF-8 domain name to ASCII string.  The domain name may
 | 
						|
 * contain several labels, separated by dots.  The output buffer must
 | 
						|
 * be deallocated by the caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_ascii_8z (const char *input, char **output, int flags)
 | 
						|
{
 | 
						|
  uint32_t *ucs4;
 | 
						|
  size_t ucs4len;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
 | 
						|
  if (!ucs4)
 | 
						|
    return IDNA_ICONV_ERROR;
 | 
						|
 | 
						|
  rc = idna_to_ascii_4z (ucs4, output, flags);
 | 
						|
 | 
						|
  free (ucs4);
 | 
						|
 | 
						|
  return rc;
 | 
						|
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_ascii_lz:
 | 
						|
 * @input: zero terminated input UTF-8 string.
 | 
						|
 * @output: pointer to newly allocated output string.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert domain name in the locale's encoding to ASCII string.  The
 | 
						|
 * domain name may contain several labels, separated by dots.  The
 | 
						|
 * output buffer must be deallocated by the caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_ascii_lz (const char *input, char **output, int flags)
 | 
						|
{
 | 
						|
  char *utf8;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  utf8 = stringprep_locale_to_utf8 (input);
 | 
						|
  if (!utf8)
 | 
						|
    return IDNA_ICONV_ERROR;
 | 
						|
 | 
						|
  rc = idna_to_ascii_8z (utf8, output, flags);
 | 
						|
 | 
						|
  free (utf8);
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_unicode_4z4z:
 | 
						|
 * @input: zero-terminated Unicode string.
 | 
						|
 * @output: pointer to newly allocated output Unicode string.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert possibly ACE encoded domain name in UCS-4 format into a
 | 
						|
 * UCS-4 string.  The domain name may contain several labels,
 | 
						|
 * separated by dots.  The output buffer must be deallocated by the
 | 
						|
 * caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_unicode_4z4z (const uint32_t * input, uint32_t ** output, int flags)
 | 
						|
{
 | 
						|
  const uint32_t *start = input;
 | 
						|
  const uint32_t *end = input;
 | 
						|
  uint32_t *buf;
 | 
						|
  size_t buflen;
 | 
						|
  uint32_t *out = NULL;
 | 
						|
  size_t outlen = 0;
 | 
						|
 | 
						|
  *output = NULL;
 | 
						|
 | 
						|
  do
 | 
						|
    {
 | 
						|
      end = start;
 | 
						|
 | 
						|
      for (; *end && !DOTP (*end); end++)
 | 
						|
	;
 | 
						|
 | 
						|
      buflen = end - start;
 | 
						|
      buf = malloc (sizeof (buf[0]) * (buflen + 1));
 | 
						|
      if (!buf)
 | 
						|
	return IDNA_MALLOC_ERROR;
 | 
						|
 | 
						|
      idna_to_unicode_44i (start, end - start, buf, &buflen, flags);
 | 
						|
      /* don't check return value as per specification! */
 | 
						|
 | 
						|
      if (out)
 | 
						|
	{
 | 
						|
	  uint32_t *newp = realloc (out,
 | 
						|
				    sizeof (out[0])
 | 
						|
				    * (outlen + 1 + buflen + 1));
 | 
						|
	  if (!newp)
 | 
						|
	    {
 | 
						|
	      free (buf);
 | 
						|
	      free (out);
 | 
						|
	      return IDNA_MALLOC_ERROR;
 | 
						|
	    }
 | 
						|
	  out = newp;
 | 
						|
	  out[outlen++] = 0x002E;	/* '.' (full stop) */
 | 
						|
	  memcpy (out + outlen, buf, sizeof (buf[0]) * buflen);
 | 
						|
	  outlen += buflen;
 | 
						|
	  out[outlen] = 0x0;
 | 
						|
	  free (buf);
 | 
						|
	}
 | 
						|
      else
 | 
						|
	{
 | 
						|
	  out = buf;
 | 
						|
	  outlen = buflen;
 | 
						|
	  out[outlen] = 0x0;
 | 
						|
	}
 | 
						|
 | 
						|
      start = end + 1;
 | 
						|
    }
 | 
						|
  while (*end);
 | 
						|
 | 
						|
  *output = out;
 | 
						|
 | 
						|
  return IDNA_SUCCESS;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_unicode_8z4z:
 | 
						|
 * @input: zero-terminated UTF-8 string.
 | 
						|
 * @output: pointer to newly allocated output Unicode string.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert possibly ACE encoded domain name in UTF-8 format into a
 | 
						|
 * UCS-4 string.  The domain name may contain several labels,
 | 
						|
 * separated by dots.  The output buffer must be deallocated by the
 | 
						|
 * caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_unicode_8z4z (const char *input, uint32_t ** output, int flags)
 | 
						|
{
 | 
						|
  uint32_t *ucs4;
 | 
						|
  size_t ucs4len;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  ucs4 = stringprep_utf8_to_ucs4 (input, -1, &ucs4len);
 | 
						|
  if (!ucs4)
 | 
						|
    return IDNA_ICONV_ERROR;
 | 
						|
 | 
						|
  rc = idna_to_unicode_4z4z (ucs4, output, flags);
 | 
						|
  free (ucs4);
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_unicode_8z8z:
 | 
						|
 * @input: zero-terminated UTF-8 string.
 | 
						|
 * @output: pointer to newly allocated output UTF-8 string.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert possibly ACE encoded domain name in UTF-8 format into a
 | 
						|
 * UTF-8 string.  The domain name may contain several labels,
 | 
						|
 * separated by dots.  The output buffer must be deallocated by the
 | 
						|
 * caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_unicode_8z8z (const char *input, char **output, int flags)
 | 
						|
{
 | 
						|
  uint32_t *ucs4;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  rc = idna_to_unicode_8z4z (input, &ucs4, flags);
 | 
						|
  *output = stringprep_ucs4_to_utf8 (ucs4, -1, NULL, NULL);
 | 
						|
  free (ucs4);
 | 
						|
 | 
						|
  if (!*output)
 | 
						|
    return IDNA_ICONV_ERROR;
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_unicode_8zlz:
 | 
						|
 * @input: zero-terminated UTF-8 string.
 | 
						|
 * @output: pointer to newly allocated output string encoded in the
 | 
						|
 *   current locale's character set.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert possibly ACE encoded domain name in UTF-8 format into a
 | 
						|
 * string encoded in the current locale's character set.  The domain
 | 
						|
 * name may contain several labels, separated by dots.  The output
 | 
						|
 * buffer must be deallocated by the caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_unicode_8zlz (const char *input, char **output, int flags)
 | 
						|
{
 | 
						|
  char *utf8;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  rc = idna_to_unicode_8z8z (input, &utf8, flags);
 | 
						|
  *output = stringprep_utf8_to_locale (utf8);
 | 
						|
  free (utf8);
 | 
						|
 | 
						|
  if (!*output)
 | 
						|
    return IDNA_ICONV_ERROR;
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * idna_to_unicode_lzlz:
 | 
						|
 * @input: zero-terminated string encoded in the current locale's
 | 
						|
 *   character set.
 | 
						|
 * @output: pointer to newly allocated output string encoded in the
 | 
						|
 *   current locale's character set.
 | 
						|
 * @flags: IDNA flags, e.g. IDNA_ALLOW_UNASSIGNED or IDNA_USE_STD3_ASCII_RULES.
 | 
						|
 *
 | 
						|
 * Convert possibly ACE encoded domain name in the locale's character
 | 
						|
 * set into a string encoded in the current locale's character set.
 | 
						|
 * The domain name may contain several labels, separated by dots.  The
 | 
						|
 * output buffer must be deallocated by the caller.
 | 
						|
 *
 | 
						|
 * Return value: Returns IDNA_SUCCESS on success, or error code.
 | 
						|
 **/
 | 
						|
int
 | 
						|
idna_to_unicode_lzlz (const char *input, char **output, int flags)
 | 
						|
{
 | 
						|
  char *utf8;
 | 
						|
  int rc;
 | 
						|
 | 
						|
  utf8 = stringprep_locale_to_utf8 (input);
 | 
						|
  if (!utf8)
 | 
						|
    return IDNA_ICONV_ERROR;
 | 
						|
 | 
						|
  rc = idna_to_unicode_8zlz (utf8, output, flags);
 | 
						|
  free (utf8);
 | 
						|
 | 
						|
  return rc;
 | 
						|
}
 | 
						|
 | 
						|
/**
 | 
						|
 * IDNA_ACE_PREFIX
 | 
						|
 *
 | 
						|
 * The IANA allocated prefix to use for IDNA. "xn--"
 | 
						|
 */
 | 
						|
 | 
						|
/**
 | 
						|
 * Idna_rc:
 | 
						|
 * @IDNA_SUCCESS: Successful operation.  This value is guaranteed to
 | 
						|
 *   always be zero, the remaining ones are only guaranteed to hold
 | 
						|
 *   non-zero values, for logical comparison purposes.
 | 
						|
 * @IDNA_STRINGPREP_ERROR:  Error during string preparation.
 | 
						|
 * @IDNA_PUNYCODE_ERROR: Error during punycode operation.
 | 
						|
 * @IDNA_CONTAINS_NON_LDH: For IDNA_USE_STD3_ASCII_RULES, indicate that
 | 
						|
 *   the string contains non-LDH ASCII characters.
 | 
						|
 * @IDNA_CONTAINS_MINUS: For IDNA_USE_STD3_ASCII_RULES, indicate that
 | 
						|
 *   the string contains a leading or trailing hyphen-minus (U+002D).
 | 
						|
 * @IDNA_INVALID_LENGTH: The final output string is not within the
 | 
						|
 *   (inclusive) range 1 to 63 characters.
 | 
						|
 * @IDNA_NO_ACE_PREFIX: The string does not contain the ACE prefix
 | 
						|
 *   (for ToUnicode).
 | 
						|
 * @IDNA_ROUNDTRIP_VERIFY_ERROR: The ToASCII operation on output
 | 
						|
 *   string does not equal the input.
 | 
						|
 * @IDNA_CONTAINS_ACE_PREFIX: The input contains the ACE prefix (for
 | 
						|
 *   ToASCII).
 | 
						|
 * @IDNA_ICONV_ERROR: Could not convert string in locale encoding.
 | 
						|
 * @IDNA_MALLOC_ERROR: Could not allocate buffer (this is typically a
 | 
						|
 *   fatal error).
 | 
						|
 * @IDNA_DLOPEN_ERROR: Could not dlopen the libcidn DSO (only used
 | 
						|
 *   internally in libc).
 | 
						|
 *
 | 
						|
 * Enumerated return codes of idna_to_ascii_4i(),
 | 
						|
 * idna_to_unicode_44i() functions (and functions derived from those
 | 
						|
 * functions).  The value 0 is guaranteed to always correspond to
 | 
						|
 * success.
 | 
						|
 */
 | 
						|
 | 
						|
 | 
						|
/**
 | 
						|
 * Idna_flags:
 | 
						|
 * @IDNA_ALLOW_UNASSIGNED: Don't reject strings containing unassigned
 | 
						|
 *   Unicode code points.
 | 
						|
 * @IDNA_USE_STD3_ASCII_RULES: Validate strings according to STD3
 | 
						|
 *   rules (i.e., normal host name rules).
 | 
						|
 *
 | 
						|
 * Flags to pass to idna_to_ascii_4i(), idna_to_unicode_44i() etc.
 | 
						|
 */
 |