diff options
Diffstat (limited to 'lib/sh')
-rw-r--r-- | lib/sh/Makefile.in | 26 | ||||
-rw-r--r-- | lib/sh/casemod.c | 7 | ||||
-rw-r--r-- | lib/sh/dprintf.c | 70 | ||||
-rw-r--r-- | lib/sh/eaccess.c | 25 | ||||
-rw-r--r-- | lib/sh/fnxform.c | 199 | ||||
-rw-r--r-- | lib/sh/fpurge.c | 96 | ||||
-rw-r--r-- | lib/sh/mbscasecmp.c | 78 | ||||
-rw-r--r-- | lib/sh/mbschr.c | 78 | ||||
-rw-r--r-- | lib/sh/oslib.c | 9 | ||||
-rw-r--r-- | lib/sh/shmatch.c | 2 | ||||
-rw-r--r-- | lib/sh/shmbchar.c | 92 | ||||
-rw-r--r-- | lib/sh/shquote.c | 10 | ||||
-rw-r--r-- | lib/sh/snprintf.c | 2 | ||||
-rw-r--r-- | lib/sh/strcasestr.c | 46 | ||||
-rw-r--r-- | lib/sh/strchrnul.c | 144 | ||||
-rw-r--r-- | lib/sh/strftime.c | 49 | ||||
-rw-r--r-- | lib/sh/strtrans.c | 34 | ||||
-rw-r--r-- | lib/sh/tmpfile.c | 2 | ||||
-rw-r--r-- | lib/sh/unicode.c | 235 | ||||
-rw-r--r-- | lib/sh/wcswidth.c | 46 |
20 files changed, 1200 insertions, 50 deletions
diff --git a/lib/sh/Makefile.in b/lib/sh/Makefile.in index 80f6cc5..d97cd99 100644 --- a/lib/sh/Makefile.in +++ b/lib/sh/Makefile.in @@ -2,7 +2,7 @@ # Makefile for the Bash library # # -# Copyright (C) 1998-2009 Free Software Foundation, Inc. +# Copyright (C) 1998-2010 Free Software Foundation, Inc. # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by @@ -91,7 +91,8 @@ CSOURCES = clktck.c clock.c getcwd.c getenv.c oslib.c setlinebuf.c \ strtoll.c strtoull.c strtoimax.c strtoumax.c memset.c strstr.c \ mktime.c strftime.c mbschr.c zcatfd.c zmapfd.c winsize.c eaccess.c \ wcsdup.c fpurge.c zgetline.c mbscmp.c uconvert.c ufuncs.c \ - casemod.c fdprintf.c input_avail.c mbscasecmp.c fnxform.c + casemod.c dprintf.c input_avail.c mbscasecmp.c fnxform.c \ + strchrnul.c unicode.c wcswidth.c shmbchar.c # The header files for this library. HSOURCES = @@ -105,7 +106,7 @@ OBJECTS = clktck.o clock.o getenv.o oslib.o setlinebuf.o strnlen.o \ strtrans.o snprintf.o mailstat.o fmtulong.o \ fmtullong.o fmtumax.o zcatfd.o zmapfd.o winsize.o wcsdup.o \ fpurge.o zgetline.o mbscmp.o uconvert.o ufuncs.o casemod.o \ - fdprintf.o input_avail.o mbscasecmp.o fnxform.o ${LIBOBJS} + input_avail.o mbscasecmp.o fnxform.o unicode.o shmbchar.o ${LIBOBJS} SUPPORT = Makefile @@ -144,7 +145,7 @@ casemod.o: casemod.c clktck.o: clktck.c clock.o: clock.c eaccess.o: eaccess.c -fdprintf.o: fdprintf.c +dprintf.o: dprintf.c fmtullong.o: fmtullong.c fmtulong.o: fmtulong.c fmtumax.o: fmtumax.c @@ -168,6 +169,7 @@ pathcanon.o: pathcanon.c pathphys.o: pathphys.c rename.o: rename.c setlinebuf.o: setlinebuf.c +shmbchar.o: shmbchar.c shquote.o: shquote.c shtty.o: shtty.c snprintf.o: snprintf.c @@ -195,6 +197,7 @@ uconvert.o: uconvert.c ufuncs.o: ufuncs.c vprint.o: vprint.c wcsdup.o: wcsdup.c +wcswidth.o: wcswidth.c mbschr.o: mbschr.c zcatfd.o: zcatfd.c zmapfd.o: zmapfd.c @@ -214,7 +217,7 @@ casemod.o: ${BUILD_DIR}/config.h clktck.o: ${BUILD_DIR}/config.h clock.o: ${BUILD_DIR}/config.h eaccess.o: ${BUILD_DIR}/config.h -fdprintf.o: ${BUILD_DIR}/config.h +dprintf.o: ${BUILD_DIR}/config.h fmtullong.o: ${BUILD_DIR}/config.h fmtulong.o: ${BUILD_DIR}/config.h fmtumax.o: ${BUILD_DIR}/config.h @@ -238,6 +241,7 @@ pathcanon.o: ${BUILD_DIR}/config.h pathphys.o: ${BUILD_DIR}/config.h rename.o: ${BUILD_DIR}/config.h setlinebuf.o: ${BUILD_DIR}/config.h +shmbchare.o: ${BUILD_DIR}/config.h shquote.o: ${BUILD_DIR}/config.h shtty.o: ${BUILD_DIR}/config.h snprintf.o: ${BUILD_DIR}/config.h @@ -265,6 +269,7 @@ uconvert.o: ${BUILD_DIR}/config.h ufuncs.o: ${BUILD_DIR}/config.h vprint.o: ${BUILD_DIR}/config.h wcsdup.o: ${BUILD_DIR}/config.h +wcswidth.o: ${BUILD_DIR}/config.h mbschr.o: ${BUILD_DIR}/config.h zcatfd.o: ${BUILD_DIR}/config.h zgetline.o: ${BUILD_DIR}/config.h @@ -505,9 +510,12 @@ wcsdup.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h wcsdup.o: ${BASHINCDIR}/stdc.h wcsdup.o: ${topdir}/xmalloc.h +wcswidth.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h +wcswidth.o: ${BASHINCDIR}/stdc.h + mbschr.o: ${topdir}/bashansi.h mbschr.o: ${BASHINCDIR}/ansi_stdlib.h -mbschr.o: ${BASHINCDIR}/shmbutil.h +mbschr.o: ${BASHINCDIR}/shmbutil.h ${BASHINCDIR}/shmbchar.h zgetline.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h zgetline.o: ${BASHINCDIR}/stdc.h @@ -526,10 +534,10 @@ casemod.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h casemod.o: ${BASHINCDIR}/stdc.h casemod.o: ${topdir}/xmalloc.h casemod.o: ${topdir}/bashtypes.h -casemod.o: ${BASHINCDIR}/shmbutil.h +casemod.o: ${BASHINCDIR}/shmbutil.h ${BASHINCDIR}/shmbchar.h casemod.o: ${topdir}/bashintl.h ${LIBINTL_H} ${BASHINCDIR}/gettext.h -fdprintf.o: ${BASHINCDIR}/stdc.h +dprintf.o: ${BASHINCDIR}/stdc.h input_avail.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h input_avail.o: ${BASHINCDIR}/stdc.h @@ -542,3 +550,5 @@ fnxform.o: ${topdir}/bashansi.h ${BASHINCDIR}/ansi_stdlib.h fnxform.o: ${BASHINCDIR}/stdc.h fnxform.o: ${topdir}/bashtypes.h fnxform.o: ${topdir}/bashintl.h ${LIBINTL_H} ${BASHINCDIR}/gettext.h + +shmbchar.o: ${BASHINCDIR}/shmbchar.h diff --git a/lib/sh/casemod.c b/lib/sh/casemod.c index d85549a..3127d8c 100644 --- a/lib/sh/casemod.c +++ b/lib/sh/casemod.c @@ -111,6 +111,13 @@ sh_modcase (string, pat, flags) mbstate_t state; #endif + if (string == 0 || *string == 0) + { + ret = (char *)xmalloc (1); + ret[0] = '\0'; + return ret; + } + #if defined (HANDLE_MULTIBYTE) memset (&state, 0, sizeof (mbstate_t)); #endif diff --git a/lib/sh/dprintf.c b/lib/sh/dprintf.c new file mode 100644 index 0000000..b3b5d64 --- /dev/null +++ b/lib/sh/dprintf.c @@ -0,0 +1,70 @@ +/* dprintf -- printf to a file descriptor */ + +/* Copyright (C) 2008-2010 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdc.h> + +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif + +#if defined (PREFER_STDARG) +# include <stdarg.h> +#else +# include <varargs.h> +#endif + +#include <stdio.h> + +int +#if defined (PREFER_STDARG) +dprintf(int fd, const char *format, ...) +#else +dprintf(fd, format, va_alist) + int fd; + const char *format; + va_dcl +#endif +{ + FILE *fp; + int fd2, rc, r2; + va_list args; + + if ((fd2 = dup(fd)) < 0) + return -1; + fp = fdopen (fd2, "w"); + if (fp == 0) + { + close (fd2); + return -1; + } + + SH_VA_START (args, format); + rc = vfprintf (fp, format, args); + fflush (fp); + va_end (args); + + r2 = fclose (fp); /* check here */ + + return rc; +} diff --git a/lib/sh/eaccess.c b/lib/sh/eaccess.c index 989bc22..d9bca8c 100644 --- a/lib/sh/eaccess.c +++ b/lib/sh/eaccess.c @@ -1,6 +1,6 @@ /* eaccess.c - eaccess replacement for the shell, plus other access functions. */ -/* Copyright (C) 2006 Free Software Foundation, Inc. +/* Copyright (C) 2006-2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -198,11 +198,20 @@ sh_eaccess (path, mode) char *path; int mode; { + int ret; + if (path_is_devfd (path)) return (sh_stataccess (path, mode)); -#if defined (HAVE_EACCESS) /* FreeBSD */ - return (eaccess (path, mode)); +#if defined (HAVE_FACCESSAT) && defined (AT_EACCESS) + return (faccessat (AT_FDCWD, path, mode, AT_EACCESS)); +#elif defined (HAVE_EACCESS) /* FreeBSD */ + ret = eaccess (path, mode); /* XXX -- not always correct for X_OK */ +# if defined (__FreeBSD__) + if (ret == 0 && current_user.euid == 0 && mode == X_OK) + return (sh_stataccess (path, mode)); +# endif + return ret; #elif defined (EFF_ONLY_OK) /* SVR4(?), SVR4.2 */ return access (path, mode|EFF_ONLY_OK); #else @@ -215,7 +224,15 @@ sh_eaccess (path, mode) # endif if (current_user.uid == current_user.euid && current_user.gid == current_user.egid) - return (access (path, mode)); + { + ret = access (path, mode); +#if defined (__FreeBSD__) || defined (SOLARIS) + if (ret == 0 && current_user.euid == 0 && mode == X_OK) + return (sh_stataccess (path, mode)); +#endif + return ret; + + } return (sh_stataccess (path, mode)); #endif diff --git a/lib/sh/fnxform.c b/lib/sh/fnxform.c new file mode 100644 index 0000000..d7e1b5a --- /dev/null +++ b/lib/sh/fnxform.c @@ -0,0 +1,199 @@ +/* fnxform - use iconv(3) to transform strings to and from "filename" format */ + +/* Copyright (C) 2009-2010 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif +#include "bashansi.h" +#include <stdio.h> +#include "bashtypes.h" + +#include "stdc.h" +#include "bashintl.h" +#include <xmalloc.h> + +#if defined (HAVE_ICONV) +# include <iconv.h> +#endif + +#if defined (HAVE_LOCALE_CHARSET) +extern const char *locale_charset __P((void)); +#else +extern char *get_locale_var __P((char *)); +#endif + +#if defined (HAVE_ICONV) +static iconv_t conv_fromfs = (iconv_t)-1; +static iconv_t conv_tofs = (iconv_t)-1; + +#define OUTLEN_MAX 4096 + +static char *outbuf = 0; +static size_t outlen = 0; + +static char *curencoding __P((void)); +static void init_tofs __P((void)); +static void init_fromfs __P((void)); + +static char * +curencoding () +{ + char *loc; +#if defined (HAVE_LOCALE_CHARSET) + loc = (char *)locale_charset (); + return loc; +#else + char *dot, *mod; + + loc = get_locale_var ("LC_CTYPE"); + if (loc == 0 || *loc == 0) + return ""; + dot = strchr (loc, '.'); + if (dot == 0) + return loc; + mod = strchr (dot, '@'); + if (mod) + *mod = '\0'; + return ++dot; +#endif +} + +static void +init_tofs () +{ + char *cur; + + cur = curencoding (); + conv_tofs = iconv_open ("UTF-8-MAC", cur); +} + +static void +init_fromfs () +{ + char *cur; + + cur = curencoding (); + conv_fromfs = iconv_open (cur, "UTF-8-MAC"); +} + +char * +fnx_tofs (string, len) + char *string; + size_t len; +{ +#ifdef MACOSX + ICONV_CONST char *inbuf; + char *tempbuf; + size_t templen; + + if (conv_tofs == (iconv_t)-1) + init_tofs (); + if (conv_tofs == (iconv_t)-1) + return string; + + /* Free and reallocate outbuf if it's *too* big */ + if (outlen >= OUTLEN_MAX && len < OUTLEN_MAX - 8) + { + free (outbuf); + outbuf = 0; + outlen = 0; + } + + inbuf = string; + if (outbuf == 0 || outlen < len + 8) + { + outlen = len + 8; + outbuf = outbuf ? xrealloc (outbuf, outlen + 1) : xmalloc (outlen + 1); + } + tempbuf = outbuf; + templen = outlen; + + iconv (conv_tofs, NULL, NULL, NULL, NULL); + + if (iconv (conv_tofs, &inbuf, &len, &tempbuf, &templen) == (size_t)-1) + return string; + + *tempbuf = '\0'; + return outbuf; +#else + return string; +#endif +} + +char * +fnx_fromfs (string, len) + char *string; + size_t len; +{ +#ifdef MACOSX + ICONV_CONST char *inbuf; + char *tempbuf; + size_t templen; + + if (conv_fromfs == (iconv_t)-1) + init_fromfs (); + if (conv_fromfs == (iconv_t)-1) + return string; + + /* Free and reallocate outbuf if it's *too* big */ + if (outlen >= OUTLEN_MAX && len < OUTLEN_MAX - 8) + { + free (outbuf); + outbuf = 0; + outlen = 0; + } + + inbuf = string; + if (outbuf == 0 || outlen < (len + 8)) + { + outlen = len + 8; + outbuf = outbuf ? xrealloc (outbuf, outlen + 1) : xmalloc (outlen + 1); + } + tempbuf = outbuf; + templen = outlen; + + iconv (conv_fromfs, NULL, NULL, NULL, NULL); + + if (iconv (conv_fromfs, &inbuf, &len, &tempbuf, &templen) == (size_t)-1) + return string; + + *tempbuf = '\0'; + return outbuf; +#else + return string; +#endif +} + +#else +char * +fnx_tofs (string) + char *string; +{ + return string; +} + +char * +fnx_fromfs (string) + char *string; +{ + return string; +} +#endif diff --git a/lib/sh/fpurge.c b/lib/sh/fpurge.c index f9e1b9d..13e8c78 100644 --- a/lib/sh/fpurge.c +++ b/lib/sh/fpurge.c @@ -1,6 +1,6 @@ /* fpurge - Flushing buffers of a FILE stream. */ -/* Copyright (C) 2007 Free Software Foundation, Inc. +/* Copyright (C) 2007-2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -36,6 +36,90 @@ extern int fpurge __P((FILE *stream)); #endif #include <stdlib.h> +/* Inline contents of gnulib:stdio-impl.h */ + +/* Many stdio implementations have the same logic and therefore can share + the same implementation of stdio extension API, except that some fields + have different naming conventions, or their access requires some casts. */ + +/* BSD stdio derived implementations. */ + +#if defined __NetBSD__ /* NetBSD */ +/* Get __NetBSD_Version__. */ +# include <sys/param.h> +#endif + +#if defined __sferror || defined __DragonFly__ /* FreeBSD, NetBSD, OpenBSD, DragonFly, MacOS X, Cygwin */ + +# if defined __DragonFly__ /* DragonFly */ + /* See <http://www.dragonflybsd.org/cvsweb/src/lib/libc/stdio/priv_stdio.h?rev=HEAD&content-type=text/x-cvsweb-markup>. */ +# define fp_ ((struct { struct __FILE_public pub; \ + struct { unsigned char *_base; int _size; } _bf; \ + void *cookie; \ + void *_close; \ + void *_read; \ + void *_seek; \ + void *_write; \ + struct { unsigned char *_base; int _size; } _ub; \ + int _ur; \ + unsigned char _ubuf[3]; \ + unsigned char _nbuf[1]; \ + struct { unsigned char *_base; int _size; } _lb; \ + int _blksize; \ + fpos_t _offset; \ + /* More fields, not relevant here. */ \ + } *) fp) + /* See <http://www.dragonflybsd.org/cvsweb/src/include/stdio.h?rev=HEAD&content-type=text/x-cvsweb-markup>. */ +# define _p pub._p +# define _flags pub._flags +# define _r pub._r +# define _w pub._w +# else +# define fp_ fp +# endif + +# if (defined __NetBSD__ && __NetBSD_Version__ >= 105270000) || defined __OpenBSD__ /* NetBSD >= 1.5ZA, OpenBSD */ + /* See <http://cvsweb.netbsd.org/bsdweb.cgi/src/lib/libc/stdio/fileext.h?rev=HEAD&content-type=text/x-cvsweb-markup> + and <http://www.openbsd.org/cgi-bin/cvsweb/src/lib/libc/stdio/fileext.h?rev=HEAD&content-type=text/x-cvsweb-markup> */ + struct __sfileext + { + struct __sbuf _ub; /* ungetc buffer */ + /* More fields, not relevant here. */ + }; +# define fp_ub ((struct __sfileext *) fp->_ext._base)->_ub +# else /* FreeBSD, NetBSD <= 1.5Z, DragonFly, MacOS X, Cygwin */ +# define fp_ub fp_->_ub +# endif + +# define HASUB(fp) (fp_ub._base != NULL) + +#endif + +/* SystemV derived implementations. */ + +#if defined _IOERR + +# if defined __sun && defined _LP64 /* Solaris/{SPARC,AMD64} 64-bit */ +# define fp_ ((struct { unsigned char *_ptr; \ + unsigned char *_base; \ + unsigned char *_end; \ + long _cnt; \ + int _file; \ + unsigned int _flag; \ + } *) fp) +# else +# define fp_ fp +# endif + +# if defined _SCO_DS /* OpenServer */ +# define _cnt __cnt +# define _ptr __ptr +# define _base __base +# define _flag __flag +# endif + +#endif + int fpurge (FILE *fp) { @@ -45,7 +129,7 @@ fpurge (FILE *fp) /* The __fpurge function does not have a return value. */ return 0; -#elif HAVE_FPURGE /* FreeBSD, NetBSD, OpenBSD, DragonFly, MacOS X */ +#elif HAVE_FPURGE /* FreeBSD, NetBSD, OpenBSD, DragonFly, MacOS X, Cygwin 1.7 */ /* Call the system's fpurge function. */ # undef fpurge @@ -59,10 +143,10 @@ fpurge (FILE *fp) <stdio.h> on BSD systems says: "The following always hold: if _flags & __SRD, _w is 0." If this invariant is not fulfilled and the stream is read-write but - currently writing, subsequent putc or fputc calls will write directly + currently reading, subsequent putc or fputc calls will write directly into the buffer, although they shouldn't be allowed to. */ - if ((fp->_flags & __SRD) != 0) - fp->_w = 0; + if ((fp_->_flags & __SRD) != 0) + fp_->_w = 0; # endif return result; @@ -101,7 +185,7 @@ fpurge (FILE *fp) fp->_wcount = 0; fp->_ungetc_count = 0; return 0; -# elif defined _IOERR /* AIX, HP-UX, IRIX, OSF/1, Solaris, OpenServer, mingw */ +# elif defined _IOERR || defined __TANDEM /* AIX, HP-UX, IRIX, OSF/1, Solaris, OpenServer, mingw */ fp->_ptr = fp->_base; if (fp->_ptr != NULL) fp->_cnt = 0; diff --git a/lib/sh/mbscasecmp.c b/lib/sh/mbscasecmp.c new file mode 100644 index 0000000..3828164 --- /dev/null +++ b/lib/sh/mbscasecmp.c @@ -0,0 +1,78 @@ +/* mbscasecmp - case-insensitive multibyte string comparison. */ + +/* Copyright (C) 2009 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#if !defined (HAVE_MBSCASECMP) && defined (HANDLE_MULTIBYTE) + +#include <stdlib.h> +#include <stddef.h> +#include <string.h> + +#include <wchar.h> +#include <wctype.h> + +/* Compare MBS1 and MBS2 without regard to case. */ +int +mbscasecmp (mbs1, mbs2) + const char *mbs1; + const char *mbs2; +{ + int len1, len2; + wchar_t c1, c2, l1, l2; + + len1 = len2 = 0; + /* Reset multibyte characters to their initial state. */ + (void) mblen ((char *) NULL, 0); + + do + { + len1 = mbtowc (&c1, mbs1, MB_CUR_MAX); + len2 = mbtowc (&c2, mbs2, MB_CUR_MAX); + + if (len1 == 0) + return len2 == 0 ? 0 : -1; + else if (len2 == 0) + return 1; + else if (len1 > 0 && len2 < 0) + return -1; + else if (len1 < 0 && len2 > 0) + return 1; + else if (len1 < 0 && len2 < 0) + { + len1 = strlen (mbs1); + len2 = strlen (mbs2); + return (len1 == len2 ? memcmp (mbs1, mbs2, len1) + : ((len1 < len2) ? (memcmp (mbs1, mbs2, len1) > 0 ? 1 : -1) + : (memcmp (mbs1, mbs2, len2) >= 0 ? 1 : -1))); + } + + l1 = towlower (c1); + l2 = towlower (c2); + + mbs1 += len1; + mbs2 += len2; + } + while (l1 == l2); + + return l1 - l2; +} + +#endif diff --git a/lib/sh/mbschr.c b/lib/sh/mbschr.c new file mode 100644 index 0000000..5143d64 --- /dev/null +++ b/lib/sh/mbschr.c @@ -0,0 +1,78 @@ +/* mbschr.c - strchr(3) that handles multibyte characters. */ + +/* Copyright (C) 2002 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#ifdef HAVE_STDLIB_H +# include <stdlib.h> +#endif + +#include "bashansi.h" +#include "shmbutil.h" + +#undef mbschr + +/* In some locales, the non-first byte of some multibyte characters have + the same value as some ascii character. Faced with these strings, a + legacy strchr() might return the wrong value. */ + +char * +#if defined (PROTOTYPES) +mbschr (const char *s, int c) +#else +mbschr (s, c) + const char *s; + int c; +#endif +{ +#if HANDLE_MULTIBYTE + char *pos; + mbstate_t state; + size_t strlength, mblength; + + /* The locale encodings with said weird property are BIG5, BIG5-HKSCS, + GBK, GB18030, SHIFT_JIS, and JOHAB. They exhibit the problem only + when c >= 0x30. We can therefore use the faster bytewise search if + c <= 0x30. */ + if ((unsigned char)c >= '0' && MB_CUR_MAX > 1) + { + pos = (char *)s; + memset (&state, '\0', sizeof(mbstate_t)); + strlength = strlen (s); + + while (strlength > 0) + { + mblength = mbrlen (pos, strlength, &state); + if (mblength == (size_t)-2 || mblength == (size_t)-1 || mblength == (size_t)0) + mblength = 1; + + if (mblength == 1 && c == (unsigned char)*pos) + return pos; + + strlength -= mblength; + pos += mblength; + } + + return ((char *)NULL); + } + else +#endif + return (strchr (s, c)); +} diff --git a/lib/sh/oslib.c b/lib/sh/oslib.c index d47f9dc..b3470d1 100644 --- a/lib/sh/oslib.c +++ b/lib/sh/oslib.c @@ -1,6 +1,6 @@ /* oslib.c - functions present only in some unix versions. */ -/* Copyright (C) 1995 Free Software Foundation, Inc. +/* Copyright (C) 1995,2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -37,6 +37,10 @@ #include <filecntl.h> #include <bashansi.h> +#if !defined (HAVE_KILLPG) +# include <signal.h> +#endif + #include <stdio.h> #include <errno.h> #include <chartypes.h> @@ -209,7 +213,8 @@ gethostname (name, namelen) # else /* !HAVE_UNAME */ int gethostname (name, namelen) - int name, namelen; + char *name; + int namelen; { strncpy (name, "unknown", namelen); name[namelen] = '\0'; diff --git a/lib/sh/shmatch.c b/lib/sh/shmatch.c index 6de1dc1..3abefed 100644 --- a/lib/sh/shmatch.c +++ b/lib/sh/shmatch.c @@ -62,7 +62,7 @@ sh_regmatch (string, pattern, flags) #if defined (ARRAY_VARS) rematch = (SHELL_VAR *)NULL; #endif - + rflags = REG_EXTENDED; if (glob_ignore_case || match_ignore_case) rflags |= REG_ICASE; diff --git a/lib/sh/shmbchar.c b/lib/sh/shmbchar.c new file mode 100644 index 0000000..c5badc1 --- /dev/null +++ b/lib/sh/shmbchar.c @@ -0,0 +1,92 @@ +/* Copyright (C) 2001, 2006, 2009, 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + + +#include <config.h> + +#if defined (HANDLE_MULTIBYTE) +#include <stdlib.h> +#include <limits.h> + +#include <shmbutil.h> +#include <shmbchar.h> + +#if IS_BASIC_ASCII + +/* Bit table of characters in the ISO C "basic character set". */ +const unsigned int is_basic_table [UCHAR_MAX / 32 + 1] = +{ + 0x00001a00, /* '\t' '\v' '\f' */ + 0xffffffef, /* ' '...'#' '%'...'?' */ + 0xfffffffe, /* 'A'...'Z' '[' '\\' ']' '^' '_' */ + 0x7ffffffe /* 'a'...'z' '{' '|' '}' '~' */ + /* The remaining bits are 0. */ +}; + +#endif /* IS_BASIC_ASCII */ + +size_t +mbstrlen (s) + const char *s; +{ + size_t clen, nc; + mbstate_t mbs = { 0 }, mbsbak = { 0 }; + int f; + + nc = 0; + while (*s && (clen = (f = is_basic (*s)) ? 1 : mbrlen(s, MB_CUR_MAX, &mbs)) != 0) + { + if (MB_INVALIDCH(clen)) + { + clen = 1; /* assume single byte */ + mbs = mbsbak; + } + + if (f == 0) + mbsbak = mbs; + + s += clen; + nc++; + } + return nc; +} + +/* Return pointer to first multibyte char in S, or NULL if none. */ +char * +mbsmbchar (s) + const char *s; +{ + char *t; + size_t clen; + mbstate_t mbs = { 0 }; + + for (t = (char *)s; *t; t++) + { + if (is_basic (*t)) + continue; + + clen = mbrlen (t, MB_CUR_MAX, &mbs); + + if (clen == 0) + return 0; + if (MB_INVALIDCH(clen)) + continue; + + if (clen > 1) + return t; + } + return 0; +} +#endif diff --git a/lib/sh/shquote.c b/lib/sh/shquote.c index a267d38..a1e9146 100644 --- a/lib/sh/shquote.c +++ b/lib/sh/shquote.c @@ -42,10 +42,11 @@ Used by alias and trap, among others. */ char * sh_single_quote (string) - char *string; + const char *string; { register int c; - char *result, *r, *s; + char *result, *r; + const char *s; result = (char *)xmalloc (3 + (4 * strlen (string))); r = result; @@ -72,10 +73,11 @@ sh_single_quote (string) /* Quote STRING using double quotes. Return a new string. */ char * sh_double_quote (string) - char *string; + const char *string; { register unsigned char c; - char *result, *r, *s; + char *result, *r; + const char *s; result = (char *)xmalloc (3 + (2 * strlen (string))); r = result; diff --git a/lib/sh/snprintf.c b/lib/sh/snprintf.c index d681b16..d46b2d9 100644 --- a/lib/sh/snprintf.c +++ b/lib/sh/snprintf.c @@ -9,7 +9,7 @@ Unix snprintf implementation. derived from inetutils/libinetutils/snprintf.c Version 1.1 - Copyright (C) 2001,2006 Free Software Foundation, Inc. + Copyright (C) 2001,2006,2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. diff --git a/lib/sh/strcasestr.c b/lib/sh/strcasestr.c new file mode 100644 index 0000000..c819b3e --- /dev/null +++ b/lib/sh/strcasestr.c @@ -0,0 +1,46 @@ +/* strcasestr.c - Find if one string appears as a substring of another string, + without regard to case. */ + +/* Copyright (C) 2000 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#include <bashansi.h> +#include <chartypes.h> + +#include <stdc.h> + +/* Determine if s2 occurs in s1. If so, return a pointer to the + match in s1. The compare is case insensitive. This is a + case-insensitive strstr(3). */ +char * +strcasestr (s1, s2) + const char *s1; + const char *s2; +{ + register int i, l, len, c; + + c = TOLOWER ((unsigned char)s2[0]); + len = strlen (s1); + l = strlen (s2); + for (i = 0; (len - i) >= l; i++) + if ((TOLOWER ((unsigned char)s1[i]) == c) && (strncasecmp (s1 + i, s2, l) == 0)) + return ((char *)s1 + i); + return ((char *)0); +} diff --git a/lib/sh/strchrnul.c b/lib/sh/strchrnul.c new file mode 100644 index 0000000..2e1608e --- /dev/null +++ b/lib/sh/strchrnul.c @@ -0,0 +1,144 @@ +/* Searching in a string. + Copyright (C) 2003, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + +#include <config.h> + +/* Specification. */ +#include <string.h> + + /* On 32-bit hardware, choosing longword to be a 32-bit unsigned + long instead of a 64-bit uintmax_t tends to give better + performance. On 64-bit hardware, unsigned long is generally 64 + bits already. Change this typedef to experiment with + performance. */ + typedef unsigned long int longword; + +/* Find the first occurrence of C in S or the final NUL byte. */ +char * +strchrnul (s, c_in) + const char *s; + int c_in; +{ + const unsigned char *char_ptr; + const longword *longword_ptr; + longword repeated_one; + longword repeated_c; + unsigned char c; + + c = (unsigned char) c_in; + if (c == 0) /* find final null byte */ + return (char *)(s ? (s + strlen (s)) : s); + + /* Handle the first few bytes by reading one byte at a time. + Do this until CHAR_PTR is aligned on a longword boundary. */ + for (char_ptr = (const unsigned char *) s; + (size_t) char_ptr % sizeof (longword) != 0; + ++char_ptr) + if (!*char_ptr || *char_ptr == c) + return (char *) char_ptr; + + longword_ptr = (const longword *) char_ptr; + + /* All these elucidatory comments refer to 4-byte longwords, + but the theory applies equally well to any size longwords. */ + + /* Compute auxiliary longword values: + repeated_one is a value which has a 1 in every byte. + repeated_c has c in every byte. */ + repeated_one = 0x01010101; + repeated_c = c | (c << 8); + repeated_c |= repeated_c << 16; + if (0xffffffffU < (longword) -1) + { + repeated_one |= repeated_one << 31 << 1; + repeated_c |= repeated_c << 31 << 1; + if (8 < sizeof (longword)) + { + size_t i; + + for (i = 64; i < sizeof (longword) * 8; i *= 2) + { + repeated_one |= repeated_one << i; + repeated_c |= repeated_c << i; + } + } + } + + /* Instead of the traditional loop which tests each byte, we will + test a longword at a time. The tricky part is testing if *any of + the four* bytes in the longword in question are equal to NUL or + c. We first use an xor with repeated_c. This reduces the task + to testing whether *any of the four* bytes in longword1 or + longword2 is zero. + + Let's consider longword1. We compute tmp = + ((longword1 - repeated_one) & ~longword1) & (repeated_one << 7). + That is, we perform the following operations: + 1. Subtract repeated_one. + 2. & ~longword1. + 3. & a mask consisting of 0x80 in every byte. + Consider what happens in each byte: + - If a byte of longword1 is zero, step 1 and 2 transform it into 0xff, + and step 3 transforms it into 0x80. A carry can also be propagated + to more significant bytes. + - If a byte of longword1 is nonzero, let its lowest 1 bit be at + position k (0 <= k <= 7); so the lowest k bits are 0. After step 1, + the byte ends in a single bit of value 0 and k bits of value 1. + After step 2, the result is just k bits of value 1: 2^k - 1. After + step 3, the result is 0. And no carry is produced. + So, if longword1 has only non-zero bytes, tmp is zero. + Whereas if longword1 has a zero byte, call j the position of the least + significant zero byte. Then the result has a zero at positions 0, ..., + j-1 and a 0x80 at position j. We cannot predict the result at the more + significant bytes (positions j+1..3), but it does not matter since we + already have a non-zero bit at position 8*j+7. + + The test whether any byte in longword1 or longword2 is zero is equivalent + to testing whether tmp1 is nonzero or tmp2 is nonzero. We can combine + this into a single test, whether (tmp1 | tmp2) is nonzero. + + This test can read more than one byte beyond the end of a string, + depending on where the terminating NUL is encountered. However, + this is considered safe since the initialization phase ensured + that the read will be aligned, therefore, the read will not cross + page boundaries and will not cause a fault. */ + + while (1) + { + longword longword1 = *longword_ptr ^ repeated_c; + longword longword2 = *longword_ptr; + + if (((((longword1 - repeated_one) & ~longword1) + | ((longword2 - repeated_one) & ~longword2)) + & (repeated_one << 7)) != 0) + break; + longword_ptr++; + } + + char_ptr = (const unsigned char *) longword_ptr; + + /* At this point, we know that one of the sizeof (longword) bytes + starting at char_ptr is == 0 or == c. On little-endian machines, + we could determine the first such byte without any further memory + accesses, just by looking at the tmp result from the last loop + iteration. But this does not work on big-endian machines. + Choose code that works in both cases. */ + + char_ptr = (unsigned char *) longword_ptr; + while (*char_ptr && (*char_ptr != c)) + char_ptr++; + return (char *) char_ptr; +} diff --git a/lib/sh/strftime.c b/lib/sh/strftime.c index 572baae..aa3fc85 100644 --- a/lib/sh/strftime.c +++ b/lib/sh/strftime.c @@ -24,9 +24,6 @@ * It also doesn't worry about multi-byte characters. * So there. * - * This file is also shipped with GAWK (GNU Awk), gawk specific bits of - * code are included if GAWK is defined. - * * Arnold Robbins * January, February, March, 1991 * Updated March, April 1992 @@ -39,6 +36,8 @@ * Updated July, 1997 * Updated October, 1999 * Updated September, 2000 + * Updated December, 2001 + * Updated January, 2011 * * Fixes from ado@elsie.nci.nih.gov, * February 1991, May 1992 @@ -54,14 +53,15 @@ * July 1997 * Moved to C99 specification. * September 2000 + * Fixes from Tanaka Akira <akr@m17n.org> + * December 2001 */ #include <config.h> -#ifndef GAWK #include <stdio.h> #include <ctype.h> #include <time.h> -#endif + #if defined(TM_IN_SYS_TIME) #include <sys/types.h> #include <sys/time.h> @@ -74,9 +74,7 @@ #define SUNOS_EXT 1 /* stuff in SunOS strftime routine */ #define VMS_EXT 1 /* include %v for VMS date format */ #define HPUX_EXT 1 /* non-conflicting stuff in HP-UX date */ -#ifndef GAWK #define POSIX_SEMANTICS 1 /* call tzset() if TZ changes */ -#endif #undef strchr /* avoid AIX weirdness */ @@ -114,6 +112,11 @@ extern int timezone, altzone; #undef min /* just in case */ +/* format for %+ -- currently unused */ +#ifndef NATIONAL_FORMAT +#define NATIONAL_FORMAT "%a %b %e %H:%M:%S %Z %Y" +#endif + /* min --- return minimum of two numbers */ static inline int @@ -141,7 +144,8 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) char *start = s; auto char tbuf[100]; long off; - int i, w, y; + int i, w; + long y; static short first = 1; #ifdef POSIX_SEMANTICS static char *savetz = NULL; @@ -282,7 +286,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) case 'C': century: - sprintf(tbuf, "%02d", (timeptr->tm_year + 1900) / 100); + sprintf(tbuf, "%02ld", (timeptr->tm_year + 1900L) / 100); break; case 'd': /* day of the month, 01 - 31 */ @@ -319,16 +323,16 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) */ w = iso8601wknum(timeptr); if (timeptr->tm_mon == 11 && w == 1) - y = 1900 + timeptr->tm_year + 1; + y = 1900L + timeptr->tm_year + 1; else if (timeptr->tm_mon == 0 && w >= 52) - y = 1900 + timeptr->tm_year - 1; + y = 1900L + timeptr->tm_year - 1; else - y = 1900 + timeptr->tm_year; + y = 1900L + timeptr->tm_year; if (*format == 'G') - sprintf(tbuf, "%d", y); + sprintf(tbuf, "%ld", y); else - sprintf(tbuf, "%02d", y % 100); + sprintf(tbuf, "%02ld", y % 100); break; case 'h': /* abbreviated month name */ @@ -387,7 +391,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) strftime(tbuf, sizeof tbuf, "%H:%M", timeptr); break; -#if defined(HAVE_MKTIME) || defined(GAWK) +#if defined(HAVE_MKTIME) case 's': /* time as seconds since the Epoch */ { struct tm non_const_timeptr; @@ -396,7 +400,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) sprintf(tbuf, "%ld", mktime(& non_const_timeptr)); break; } -#endif /* defined(HAVE_MKTIME) || defined(GAWK) */ +#endif /* defined(HAVE_MKTIME) */ case 'S': /* second, 00 - 60 */ i = range(0, timeptr->tm_sec, 60); @@ -452,7 +456,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) case 'Y': /* year with century */ fullyear: - sprintf(tbuf, "%d", 1900 + timeptr->tm_year); + sprintf(tbuf, "%ld", 1900L + timeptr->tm_year); break; /* @@ -495,6 +499,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) # ifdef HPUX off = -timezone / 60; # else + /* ADR: 4 August 2001, fixed this per gazelle@interaccess.com */ off = -(daylight ? altzone : timezone) / 60; # endif /* !HPUX */ #else /* !HAVE_TZNAME */ @@ -509,7 +514,7 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) } else { tbuf[0] = '+'; } - sprintf(tbuf+1, "%02d%02d", off/60, off%60); + sprintf(tbuf+1, "%02ld%02ld", off/60, off%60); break; case 'Z': /* time zone name or abbrevation */ @@ -558,10 +563,10 @@ strftime(char *s, size_t maxsize, const char *format, const struct tm *timeptr) #ifdef VMS_EXT case 'v': /* date as dd-bbb-YYYY */ - sprintf(tbuf, "%2d-%3.3s-%4d", + sprintf(tbuf, "%2d-%3.3s-%4ld", range(1, timeptr->tm_mday, 31), months_a[range(0, timeptr->tm_mon, 11)], - timeptr->tm_year + 1900); + timeptr->tm_year + 1900L); for (i = 3; i < 6; i++) if (islower(tbuf[i])) tbuf[i] = toupper(tbuf[i]); @@ -594,7 +599,7 @@ out: /* isleap --- is a year a leap year? */ static int -isleap(int year) +isleap(long year) { return ((year % 4 == 0 && year % 100 != 0) || year % 400 == 0); } @@ -677,7 +682,7 @@ iso8601wknum(const struct tm *timeptr) dec31ly.tm_mon = 11; dec31ly.tm_mday = 31; dec31ly.tm_wday = (jan1day == 0) ? 6 : jan1day - 1; - dec31ly.tm_yday = 364 + isleap(dec31ly.tm_year + 1900); + dec31ly.tm_yday = 364 + isleap(dec31ly.tm_year + 1900L); weeknum = iso8601wknum(& dec31ly); #endif } diff --git a/lib/sh/strtrans.c b/lib/sh/strtrans.c index 355a306..2265782 100644 --- a/lib/sh/strtrans.c +++ b/lib/sh/strtrans.c @@ -1,6 +1,6 @@ /* strtrans.c - Translate and untranslate strings with ANSI-C escape sequences. */ -/* Copyright (C) 2000 Free Software Foundation, Inc. +/* Copyright (C) 2000-2010 Free Software Foundation, Inc. This file is part of GNU Bash, the Bourne Again SHell. @@ -51,11 +51,16 @@ ansicstr (string, len, flags, sawc, rlen) { int c, temp; char *ret, *r, *s; + unsigned long v; if (string == 0 || *string == '\0') return ((char *)NULL); +#if defined (HANDLE_MULTIBYTE) + ret = (char *)xmalloc (4*len + 1); +#else ret = (char *)xmalloc (2*len + 1); /* 2*len for possible CTLESC */ +#endif for (r = ret, s = string; s && *s; ) { c = *s++; @@ -128,6 +133,29 @@ ansicstr (string, len, flags, sawc, rlen) } c &= 0xFF; break; +#if defined (HANDLE_MULTIBYTE) + case 'u': + case 'U': + temp = (c == 'u') ? 4 : 8; /* \uNNNN \UNNNNNNNN */ + for (v = 0; ISXDIGIT ((unsigned char)*s) && temp--; s++) + v = (v * 16) + HEXVALUE (*s); + if (temp == ((c == 'u') ? 4 : 8)) + { + *r++ = '\\'; /* c remains unchanged */ + break; + } + else if (v <= UCHAR_MAX) + { + c = v; + break; + } + else + { + temp = u32cconv (v, r); + r += temp; + continue; + } +#endif case '\\': break; case '\'': case '"': case '?': @@ -143,9 +171,13 @@ ansicstr (string, len, flags, sawc, rlen) *rlen = r - ret; return ret; } + else if ((flags & 1) == 0 && *s == 0) + ; /* pass \c through */ else if ((flags & 1) == 0 && (c = *s)) { s++; + if ((flags & 2) && c == '\\' && c == *s) + s++; /* Posix requires $'\c\\' do backslash escaping */ c = TOCTRL(c); break; } diff --git a/lib/sh/tmpfile.c b/lib/sh/tmpfile.c index a87c254..0bbc287 100644 --- a/lib/sh/tmpfile.c +++ b/lib/sh/tmpfile.c @@ -40,7 +40,7 @@ extern int errno; #endif -#define BASEOPENFLAGS (O_CREAT | O_TRUNC | O_EXCL) +#define BASEOPENFLAGS (O_CREAT | O_TRUNC | O_EXCL | O_BINARY) #define DEFAULT_TMPDIR "." /* bogus default, should be changed */ #define DEFAULT_NAMEROOT "shtmp" diff --git a/lib/sh/unicode.c b/lib/sh/unicode.c new file mode 100644 index 0000000..d34fa08 --- /dev/null +++ b/lib/sh/unicode.c @@ -0,0 +1,235 @@ +/* unicode.c - functions to convert unicode characters */ + +/* Copyright (C) 2010 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#if defined (HANDLE_MULTIBYTE) + +#include <stdc.h> +#include <wchar.h> +#include <bashansi.h> +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#include <limits.h> + +#if HAVE_ICONV +# include <iconv.h> +#endif + +#include <xmalloc.h> + +#ifndef USHORT_MAX +# ifdef USHRT_MAX +# define USHORT_MAX USHRT_MAX +# else +# define USHORT_MAX ((unsigned short) ~(unsigned short)0) +# endif +#endif + +#if !defined (STREQ) +# define STREQ(a, b) ((a)[0] == (b)[0] && strcmp ((a), (b)) == 0) +#endif /* !STREQ */ + +#if defined (HAVE_LOCALE_CHARSET) +extern const char *locale_charset __P((void)); +#else +extern char *get_locale_var __P((char *)); +#endif + +static int u32init = 0; +static int utf8locale = 0; +#if defined (HAVE_ICONV) +static iconv_t localconv; +#endif + +#ifndef HAVE_LOCALE_CHARSET +static char * +stub_charset () +{ + char *locale, *s, *t; + + locale = get_locale_var ("LC_CTYPE"); + if (locale == 0 || *locale == 0) + return "ASCII"; + s = strrchr (locale, '.'); + if (s) + { + t = strchr (s, '@'); + if (t) + *t = 0; + return ++s; + } + else if (STREQ (locale, "UTF-8")) + return "UTF-8"; + else + return "ASCII"; +} +#endif + +/* u32toascii ? */ +int +u32tochar (wc, s) + wchar_t wc; + char *s; +{ + unsigned long x; + int l; + + x = wc; + l = (x <= UCHAR_MAX) ? 1 : ((x <= USHORT_MAX) ? 2 : 4); + + if (x <= UCHAR_MAX) + s[0] = x & 0xFF; + else if (x <= USHORT_MAX) /* assume unsigned short = 16 bits */ + { + s[0] = (x >> 8) & 0xFF; + s[1] = x & 0xFF; + } + else + { + s[0] = (x >> 24) & 0xFF; + s[1] = (x >> 16) & 0xFF; + s[2] = (x >> 8) & 0xFF; + s[3] = x & 0xFF; + } + s[l] = '\0'; + return l; +} + +int +u32toutf8 (wc, s) + wchar_t wc; + char *s; +{ + int l; + + l = (wc < 0x0080) ? 1 : ((wc < 0x0800) ? 2 : 3); + + if (wc < 0x0080) + s[0] = (unsigned char)wc; + else if (wc < 0x0800) + { + s[0] = (wc >> 6) | 0xc0; + s[1] = (wc & 0x3f) | 0x80; + } + else + { + s[0] = (wc >> 12) | 0xe0; + s[1] = ((wc >> 6) & 0x3f) | 0x80; + s[2] = (wc & 0x3f) | 0x80; + } + s[l] = '\0'; + return l; +} + +/* convert a single unicode-32 character into a multibyte string and put the + result in S, which must be large enough (at least MB_LEN_MAX bytes) */ +int +u32cconv (c, s) + unsigned long c; + char *s; +{ + wchar_t wc; + int n; +#if HAVE_ICONV + const char *charset; + char obuf[25], *optr; + size_t obytesleft; + const char *iptr; + size_t sn; +#endif + + wc = c; + +#if __STDC_ISO_10646__ + if (sizeof (wchar_t) == 4) + { + n = wctomb (s, wc); + return n; + } +#endif + +#if HAVE_NL_LANGINFO + codeset = nl_langinfo (CODESET); + if (STREQ (codeset, "UTF-8")) + { + n = u32toutf8 (wc, s); + return n; + } +#endif + +#if HAVE_ICONV + /* this is mostly from coreutils-8.5/lib/unicodeio.c */ + if (u32init == 0) + { +# if HAVE_LOCALE_CHARSET + charset = locale_charset (); /* XXX - fix later */ +# else + charset = stub_charset (); +# endif + if (STREQ (charset, "UTF-8")) + utf8locale = 1; + else + { + localconv = iconv_open (charset, "UTF-8"); + if (localconv == (iconv_t)-1) + localconv = iconv_open (charset, "ASCII"); + } + u32init = 1; + } + + if (utf8locale) + { + n = u32toutf8 (wc, s); + return n; + } + + if (localconv == (iconv_t)-1) + { + n = u32tochar (wc, s); + return n; + } + + n = u32toutf8 (wc, s); + + optr = obuf; + obytesleft = sizeof (obuf); + iptr = s; + sn = n; + + iconv (localconv, NULL, NULL, NULL, NULL); + + if (iconv (localconv, (ICONV_CONST char **)&iptr, &sn, &optr, &obytesleft) == (size_t)-1) + return n; /* You get utf-8 if iconv fails */ + + *optr = '\0'; + + /* number of chars to be copied is optr - obuf if we want to do bounds + checking */ + strcpy (s, obuf); + return (optr - obuf); +#endif + + n = u32tochar (wc, s); /* fallback */ + return n; +} + +#endif /* HANDLE_MULTIBYTE */ diff --git a/lib/sh/wcswidth.c b/lib/sh/wcswidth.c new file mode 100644 index 0000000..1a30d9f --- /dev/null +++ b/lib/sh/wcswidth.c @@ -0,0 +1,46 @@ +/* wcswidth.c - compute display width of wide character string */ + +/* Copyright (C) 2010 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#include <config.h> + +#if defined (HANDLE_MULTIBYTE) && !defined (HAVE_WCSWIDTH) + +#include <stdc.h> +#include <wchar.h> +#include <bashansi.h> + +int +wcswidth(pwcs, n) + const wchar_t *pwcs; + size_t n; +{ + wchar_t wc; + int len, l; + + len = 0; + while (n-- > 0 && (wc = *pwcs++) != L'\0') + { + if ((l = wcwidth(wc)) < 0) + return (-1); + len += l; + } + return (len); +} +#endif |