diff --git a/src/backend/utils/mb/wchar.c b/src/backend/utils/mb/wchar.c
index 49eed316531..53489cf62c3 100644
--- a/src/backend/utils/mb/wchar.c
+++ b/src/backend/utils/mb/wchar.c
@@ -1,7 +1,7 @@
 /*
  * conversion functions between pg_wchar and multibyte streams.
  * Tatsuo Ishii
- * $Id: wchar.c,v 1.34.2.2 2006/05/21 20:06:44 tgl Exp $
+ * $Id: wchar.c,v 1.34.2.3 2007/01/24 17:12:41 tgl Exp $
  *
  * WIN1250 client encoding updated by Pavel Behal
  *
@@ -267,18 +267,18 @@ pg_johab_mblen(const unsigned char *s)
 }
 
 /*
- * convert UTF-8 string to pg_wchar (UCS-2)
- * caller should allocate enough space for "to"
+ * convert UTF8 string to pg_wchar (UCS-4)
+ * caller must allocate enough space for "to", including a trailing zero!
  * len: length of from.
  * "from" not necessarily null terminated.
  */
 static int
 pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 {
-	unsigned char c1,
+	int			cnt = 0;
+	uint32		c1,
 				c2,
 				c3;
-	int			cnt = 0;
 
 	while (len > 0 && *from)
 	{
@@ -287,26 +287,28 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 			*to = *from++;
 			len--;
 		}
-		else if ((*from & 0xe0) == 0xc0 && len >= 2)
+		else if ((*from & 0xe0) == 0xc0)
 		{
+			if (len < 2)
+				break;			/* drop trailing incomplete char */
 			c1 = *from++ & 0x1f;
 			c2 = *from++ & 0x3f;
-			*to = c1 << 6;
-			*to |= c2;
+			*to = (c1 << 6) | c2;
 			len -= 2;
 		}
-		else if ((*from & 0xe0) == 0xe0 && len >= 3)
+		else if ((*from & 0xf0) == 0xe0)
 		{
+			if (len < 3)
+				break;			/* drop trailing incomplete char */
 			c1 = *from++ & 0x0f;
 			c2 = *from++ & 0x3f;
 			c3 = *from++ & 0x3f;
-			*to = c1 << 12;
-			*to |= c2 << 6;
-			*to |= c3;
+			*to = (c1 << 12) | (c2 << 6) | c3;
 			len -= 3;
 		}
 		else
 		{
+			/* treat a bogus char as length 1; not ours to raise error */
 			*to = *from++;
 			len--;
 		}
@@ -318,20 +320,38 @@ pg_utf2wchar_with_len(const unsigned char *from, pg_wchar *to, int len)
 }
 
 /*
- * returns the byte length of a UTF-8 word pointed to by s
+ * Return the byte length of a UTF8 character pointed to by s
+ *
+ * Note: in the current implementation we do not support UTF8 sequences
+ * of more than 3 bytes; hence do NOT return a value larger than 3.
+ * We return "1" for any leading byte that is either flat-out illegal or
+ * indicates a length larger than we support.
+ *
+ * pg_utf2wchar_with_len(), utf2ucs(), pg_utf8_islegal(), and perhaps
+ * other places would need to be fixed to change this.
  */
 int
 pg_utf_mblen(const unsigned char *s)
 {
-	int			len = 1;
+	int			len;
 
 	if ((*s & 0x80) == 0)
 		len = 1;
 	else if ((*s & 0xe0) == 0xc0)
 		len = 2;
-	else if ((*s & 0xe0) == 0xe0)
+	else if ((*s & 0xf0) == 0xe0)
 		len = 3;
-	return (len);
+#ifdef NOT_USED
+	else if ((*s & 0xf8) == 0xf0)
+		len = 4;
+	else if ((*s & 0xfc) == 0xf8)
+		len = 5;
+	else if ((*s & 0xfe) == 0xfc)
+		len = 6;
+#endif
+	else
+		len = 1;
+	return len;
 }
 
 /*