//===-- sanitizer_common_interceptors_scanf.inc -----------------*- C++ -*-===// // // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// // // Scanf implementation for use in *Sanitizer interceptors. // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html // with a few common GNU extensions. // //===----------------------------------------------------------------------===// #include struct ScanfDirective { int argIdx; // argument index, or -1 of not specified ("%n$") int fieldWidth; bool suppressed; // suppress assignment ("*") bool allocate; // allocate space ("m") char lengthModifier[2]; char convSpecifier; bool maybeGnuMalloc; }; static const char *parse_number(const char *p, int *out) { *out = internal_atoll(p); while (*p >= '0' && *p <= '9') ++p; return p; } static bool char_is_one_of(char c, const char *s) { return !!internal_strchr(s, c); } // Parse scanf format string. If a valid directive in encountered, it is // returned in dir. This function returns the pointer to the first // unprocessed character, or 0 in case of error. // In case of the end-of-string, a pointer to the closing \0 is returned. static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, ScanfDirective *dir) { internal_memset(dir, 0, sizeof(*dir)); dir->argIdx = -1; while (*p) { if (*p != '%') { ++p; continue; } ++p; // %% if (*p == '%') { ++p; continue; } if (*p == '\0') { return 0; } // %n$ if (*p >= '0' && *p <= '9') { int number; const char *q = parse_number(p, &number); if (*q == '$') { dir->argIdx = number; p = q + 1; } // Otherwise, do not change p. This will be re-parsed later as the field // width. } // * if (*p == '*') { dir->suppressed = true; ++p; } // Field width. if (*p >= '0' && *p <= '9') { p = parse_number(p, &dir->fieldWidth); if (dir->fieldWidth <= 0) return 0; } // m if (*p == 'm') { dir->allocate = true; ++p; } // Length modifier. if (char_is_one_of(*p, "jztLq")) { dir->lengthModifier[0] = *p; ++p; } else if (*p == 'h') { dir->lengthModifier[0] = 'h'; ++p; if (*p == 'h') { dir->lengthModifier[1] = 'h'; ++p; } } else if (*p == 'l') { dir->lengthModifier[0] = 'l'; ++p; if (*p == 'l') { dir->lengthModifier[1] = 'l'; ++p; } } // Conversion specifier. dir->convSpecifier = *p++; // Consume %[...] expression. if (dir->convSpecifier == '[') { if (*p == '^') ++p; if (*p == ']') ++p; while (*p && *p != ']') ++p; if (*p == 0) return 0; // unexpected end of string // Consume the closing ']'. ++p; } // This is unfortunately ambiguous between old GNU extension // of %as, %aS and %a[...] and newer POSIX %a followed by // letters s, S or [. if (allowGnuMalloc && dir->convSpecifier == 'a' && !dir->lengthModifier[0]) { if (*p == 's' || *p == 'S') { dir->maybeGnuMalloc = true; ++p; } else if (*p == '[') { // Watch for %a[h-j%d], if % appears in the // [...] range, then we need to give up, we don't know // if scanf will parse it as POSIX %a [h-j %d ] or // GNU allocation of string with range dh-j plus %. const char *q = p + 1; if (*q == '^') ++q; if (*q == ']') ++q; while (*q && *q != ']' && *q != '%') ++q; if (*q == 0 || *q == '%') return 0; p = q + 1; // Consume the closing ']'. dir->maybeGnuMalloc = true; } } break; } return p; } // Returns true if the character is an integer conversion specifier. static bool scanf_is_integer_conv(char c) { return char_is_one_of(c, "diouxXn"); } // Returns true if the character is an floating point conversion specifier. static bool scanf_is_float_conv(char c) { return char_is_one_of(c, "aAeEfFgG"); } // Returns string output character size for string-like conversions, // or 0 if the conversion is invalid. static int scanf_get_char_size(ScanfDirective *dir) { if (char_is_one_of(dir->convSpecifier, "CS")) { // wchar_t return 0; } if (char_is_one_of(dir->convSpecifier, "cs[")) { if (dir->lengthModifier[0] == 'l') // wchar_t return 0; else if (dir->lengthModifier[0] == 0) return sizeof(char); else return 0; } return 0; } enum ScanfStoreSize { // Store size not known in advance; can be calculated as strlen() of the // destination buffer. SSS_STRLEN = -1, // Invalid conversion specifier. SSS_INVALID = 0 }; // Returns the store size of a scanf directive (if >0), or a value of // ScanfStoreSize. static int scanf_get_store_size(ScanfDirective *dir) { if (dir->allocate) { if (!char_is_one_of(dir->convSpecifier, "cCsS[")) return SSS_INVALID; return sizeof(char *); } if (dir->maybeGnuMalloc) { if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) return SSS_INVALID; // This is ambiguous, so check the smaller size of char * (if it is // a GNU extension of %as, %aS or %a[...]) and float (if it is // POSIX %a followed by s, S or [ letters). return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); } if (scanf_is_integer_conv(dir->convSpecifier)) { switch (dir->lengthModifier[0]) { case 'h': return dir->lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); case 'l': return dir->lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); case 'L': return sizeof(long long); case 'j': return sizeof(INTMAX_T); case 'z': return sizeof(SIZE_T); case 't': return sizeof(PTRDIFF_T); case 0: return sizeof(int); default: return SSS_INVALID; } } if (scanf_is_float_conv(dir->convSpecifier)) { switch (dir->lengthModifier[0]) { case 'L': case 'q': return sizeof(long double); case 'l': return dir->lengthModifier[1] == 'l' ? sizeof(long double) : sizeof(double); case 0: return sizeof(float); default: return SSS_INVALID; } } if (char_is_one_of(dir->convSpecifier, "sS[")) { unsigned charSize = scanf_get_char_size(dir); if (charSize == 0) return SSS_INVALID; if (dir->fieldWidth == 0) return SSS_STRLEN; return (dir->fieldWidth + 1) * charSize; } if (char_is_one_of(dir->convSpecifier, "cC")) { unsigned charSize = scanf_get_char_size(dir); if (charSize == 0) return SSS_INVALID; if (dir->fieldWidth == 0) return charSize; return dir->fieldWidth * charSize; } if (dir->convSpecifier == 'p') { if (dir->lengthModifier[1] != 0) return SSS_INVALID; return sizeof(void *); } return SSS_INVALID; } // Common part of *scanf interceptors. // Process format string and va_list, and report all store ranges. // Stops when "consuming" n_inputs input items. static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, const char *format, va_list aq) { CHECK_GT(n_inputs, 0); const char *p = format; while (*p) { ScanfDirective dir; p = scanf_parse_next(p, allowGnuMalloc, &dir); if (!p) break; if (dir.convSpecifier == 0) { // This can only happen at the end of the format string. CHECK_EQ(*p, 0); break; } // Here the directive is valid. Do what it says. if (dir.argIdx != -1) { // Unsupported. break; } if (dir.suppressed) continue; int size = scanf_get_store_size(&dir); if (size == SSS_INVALID) break; void *argp = va_arg(aq, void *); if (dir.convSpecifier != 'n') --n_inputs; if (n_inputs < 0) break; if (size == SSS_STRLEN) { size = internal_strlen((const char *)argp) + 1; } COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); } }