diff options
author | Jakub Zawadzki <darkjames-ws@darkjames.pl> | 2014-01-07 22:17:32 +0000 |
---|---|---|
committer | Jakub Zawadzki <darkjames-ws@darkjames.pl> | 2014-01-07 22:17:32 +0000 |
commit | abda30e9e6d8fd9aa28edc4677796e61a9c88997 (patch) | |
tree | ab90b4994f29142a1554c36b6160033b3d499405 | |
parent | d1dcee936b2a0ed257c526889b664e2b314d3eb0 (diff) | |
download | wireshark-abda30e9e6d8fd9aa28edc4677796e61a9c88997.tar.gz wireshark-abda30e9e6d8fd9aa28edc4677796e61a9c88997.tar.bz2 wireshark-abda30e9e6d8fd9aa28edc4677796e61a9c88997.zip |
Fix bug #9618: Invalid utf8 causes JSON dissector assertion failure "g_utf8_validate"
Validate JSON UTF-8 characters, replace with '?' when invalid.
svn path=/trunk/; revision=54633
-rw-r--r-- | epan/dissectors/packet-json.c | 41 | ||||
-rw-r--r-- | wsutil/CMakeLists.txt | 2 | ||||
-rw-r--r-- | wsutil/Makefile.am | 2 | ||||
-rw-r--r-- | wsutil/Makefile.common | 6 | ||||
-rw-r--r-- | wsutil/Makefile.nmake | 1 | ||||
-rw-r--r-- | wsutil/unicode-utils.c | 21 | ||||
-rw-r--r-- | wsutil/unicode-utils.h | 18 |
7 files changed, 71 insertions, 20 deletions
diff --git a/epan/dissectors/packet-json.c b/epan/dissectors/packet-json.c index dc339b5e61..3c9f09193d 100644 --- a/epan/dissectors/packet-json.c +++ b/epan/dissectors/packet-json.c @@ -249,6 +249,30 @@ static void after_array(void *tvbparse_data, const void *wanted_data _U_, tvbpar wmem_stack_pop(data->stack); } +static int +json_tvb_memcpy_utf8(char *buf, tvbuff_t *tvb, int offset, int offset_max) +{ + int len = ws_utf8_char_len((guint8) *buf); + + /* XXX, before moving to core API check if it's off-by-one safe. + * For JSON analyzer it's not a problem + * (string always terminated by ", which is not valid UTF-8 continuation character) */ + if (len == -1 || ((guint) (offset + len)) >= (guint) offset_max) { + *buf = '?'; + return 1; + } + + /* assume it's valid UTF-8 */ + tvb_memcpy(tvb, buf + 1, offset + 1, len - 1); + + if (!g_utf8_validate(buf, len, NULL)) { + *buf = '?'; + return 1; + } + + return len; +} + static char *json_string_unescape(tvbparse_elem_t *tok) { char *str = (char *)wmem_alloc(wmem_packet_scope(), tok->len - 1); @@ -266,7 +290,6 @@ static char *json_string_unescape(tvbparse_elem_t *tok) case '\"': case '\\': case '/': - default: str[j++] = ch; break; @@ -361,10 +384,22 @@ static char *json_string_unescape(tvbparse_elem_t *tok) str[j++] = '?'; break; } + + default: + /* not valid by JSON grammar (also tvbparse rules should not allow it) */ + DISSECTOR_ASSERT_NOT_REACHED(); + break; } - } else - str[j++] = ch; + } else { + int utf_len; + + str[j] = ch; + /* XXX if it's not valid UTF-8 character, add some expert info? (it violates JSON grammar) */ + utf_len = json_tvb_memcpy_utf8(&str[j], tok->tvb, i, tok->len); + j += utf_len; + i += (utf_len - 1); + } } str[j] = '\0'; diff --git a/wsutil/CMakeLists.txt b/wsutil/CMakeLists.txt index b14c236238..c4251ea0bb 100644 --- a/wsutil/CMakeLists.txt +++ b/wsutil/CMakeLists.txt @@ -30,7 +30,6 @@ IF(WIN32) inet_ntop.c inet_pton.c strptime.c - unicode-utils.c wsgetopt.c ) ENDIF(WIN32) @@ -69,6 +68,7 @@ set(WSUTIL_FILES tempfile.c type_util.c u3.c + unicode-utils.c ${WSUTIL_PLATFORM_FILES} ) diff --git a/wsutil/Makefile.am b/wsutil/Makefile.am index 6ead86a92f..0b1256c7e8 100644 --- a/wsutil/Makefile.am +++ b/wsutil/Makefile.am @@ -99,8 +99,6 @@ EXTRA_DIST = \ Makefile.nmake \ file_util.c \ file_util.h \ - unicode-utils.c \ - unicode-utils.h \ wsgcrypt.h CLEANFILES = \ diff --git a/wsutil/Makefile.common b/wsutil/Makefile.common index 96b5a7cfa3..0efc26fb1a 100644 --- a/wsutil/Makefile.common +++ b/wsutil/Makefile.common @@ -61,7 +61,8 @@ LIBWSUTIL_SRC = \ report_err.c \ tempfile.c \ type_util.c \ - u3.c + u3.c \ + unicode-utils.c # Header files that are not generated from other files LIBWSUTIL_INCLUDES = \ @@ -100,4 +101,5 @@ LIBWSUTIL_INCLUDES = \ report_err.h \ tempfile.h \ type_util.h \ - u3.h + u3.h \ + unicode-utils.h diff --git a/wsutil/Makefile.nmake b/wsutil/Makefile.nmake index 673c6e4817..4da5224075 100644 --- a/wsutil/Makefile.nmake +++ b/wsutil/Makefile.nmake @@ -28,7 +28,6 @@ OBJECTS = file_util.obj \ inet_pton.obj \ $(LIBWSUTIL_SRC:.c=.obj) \ strptime.obj \ - unicode-utils.obj \ wsgetopt.obj # For use when making libwsutil.dll diff --git a/wsutil/unicode-utils.c b/wsutil/unicode-utils.c index 8935e46f38..21cc489df7 100644 --- a/wsutil/unicode-utils.c +++ b/wsutil/unicode-utils.c @@ -22,12 +22,23 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. */ -#ifndef _WIN32 -#error "This is only for Windows" -#endif - #include "unicode-utils.h" +int +ws_utf8_char_len(guint8 ch) +{ + if (ch >= 0xfe) return -1; + if (ch >= 0xfc) return 6; + if (ch >= 0xf8) return 5; + if (ch >= 0xf0) return 4; + if (ch >= 0xe0) return 3; + if (ch >= 0xc0) return 2; + else return 1; +} + + +#ifdef _WIN32 + #include <shellapi.h> /** @file @@ -157,3 +168,5 @@ arg_list_utf_16to8(int argc, char *argv[]) { } } /* XXX else bail because something is horribly, horribly wrong? */ } + +#endif diff --git a/wsutil/unicode-utils.h b/wsutil/unicode-utils.h index a9d5318f6b..7b29d8460a 100644 --- a/wsutil/unicode-utils.h +++ b/wsutil/unicode-utils.h @@ -25,21 +25,25 @@ #ifndef __UNICODEUTIL_H__ #define __UNICODEUTIL_H__ -#include "ws_symbol_export.h" - -#ifdef _WIN32 - #include "config.h" +#include "ws_symbol_export.h" + #include <glib.h> -#include <windows.h> -#include <tchar.h> -#include <wchar.h> /** * @file Unicode convenience routines. */ +WS_DLL_PUBLIC +int ws_utf8_char_len(guint8 ch); + +#ifdef _WIN32 + +#include <windows.h> +#include <tchar.h> +#include <wchar.h> + /** Given a UTF-8 string, convert it to UTF-16. This is meant to be used * to convert between GTK+ 2.x (UTF-8) to Windows (UTF-16). * |