diff options
Diffstat (limited to 'gcc-4.8/libgo/go/bufio')
-rw-r--r-- | gcc-4.8/libgo/go/bufio/bufio.go | 5 | ||||
-rw-r--r-- | gcc-4.8/libgo/go/bufio/bufio_test.go | 49 | ||||
-rw-r--r-- | gcc-4.8/libgo/go/bufio/example_test.go | 74 | ||||
-rw-r--r-- | gcc-4.8/libgo/go/bufio/export_test.go | 27 | ||||
-rw-r--r-- | gcc-4.8/libgo/go/bufio/scan.go | 346 | ||||
-rw-r--r-- | gcc-4.8/libgo/go/bufio/scan_test.go | 406 |
6 files changed, 901 insertions, 6 deletions
diff --git a/gcc-4.8/libgo/go/bufio/bufio.go b/gcc-4.8/libgo/go/bufio/bufio.go index 33779014c..df3501f2c 100644 --- a/gcc-4.8/libgo/go/bufio/bufio.go +++ b/gcc-4.8/libgo/go/bufio/bufio.go @@ -274,11 +274,10 @@ func (b *Reader) ReadSlice(delim byte) (line []byte, err error) { return b.buf, ErrBufferFull } } - panic("not reached") } // ReadLine is a low-level line-reading primitive. Most callers should use -// ReadBytes('\n') or ReadString('\n') instead. +// ReadBytes('\n') or ReadString('\n') instead or use a Scanner. // // ReadLine tries to return a single line, not including the end-of-line bytes. // If the line was too long for the buffer then isPrefix is set and the @@ -331,6 +330,7 @@ func (b *Reader) ReadLine() (line []byte, isPrefix bool, err error) { // it returns the data read before the error and the error itself (often io.EOF). // ReadBytes returns err != nil if and only if the returned data does not end in // delim. +// For simple uses, a Scanner may be more convenient. func (b *Reader) ReadBytes(delim byte) (line []byte, err error) { // Use ReadSlice to look for array, // accumulating full buffers. @@ -378,6 +378,7 @@ func (b *Reader) ReadBytes(delim byte) (line []byte, err error) { // it returns the data read before the error and the error itself (often io.EOF). // ReadString returns err != nil if and only if the returned data does not end in // delim. +// For simple uses, a Scanner may be more convenient. func (b *Reader) ReadString(delim byte) (line string, err error) { bytes, err := b.ReadBytes(delim) return string(bytes), err diff --git a/gcc-4.8/libgo/go/bufio/bufio_test.go b/gcc-4.8/libgo/go/bufio/bufio_test.go index 418690aa4..79ed0f178 100644 --- a/gcc-4.8/libgo/go/bufio/bufio_test.go +++ b/gcc-4.8/libgo/go/bufio/bufio_test.go @@ -7,6 +7,7 @@ package bufio_test import ( . "bufio" "bytes" + "errors" "fmt" "io" "io/ioutil" @@ -434,9 +435,12 @@ func TestWriteErrors(t *testing.T) { t.Errorf("Write hello to %v: %v", w, e) continue } - e = buf.Flush() - if e != w.expect { - t.Errorf("Flush %v: got %v, wanted %v", w, e, w.expect) + // Two flushes, to verify the error is sticky. + for i := 0; i < 2; i++ { + e = buf.Flush() + if e != w.expect { + t.Errorf("Flush %d/2 %v: got %v, wanted %v", i+1, w, e, w.expect) + } } } } @@ -953,7 +957,7 @@ func TestNegativeRead(t *testing.T) { t.Fatal("read did not panic") case error: if !strings.Contains(err.Error(), "reader returned negative count from Read") { - t.Fatal("wrong panic: %v", err) + t.Fatalf("wrong panic: %v", err) } default: t.Fatalf("unexpected panic value: %T(%v)", err, err) @@ -962,6 +966,43 @@ func TestNegativeRead(t *testing.T) { b.Read(make([]byte, 100)) } +var errFake = errors.New("fake error") + +type errorThenGoodReader struct { + didErr bool + nread int +} + +func (r *errorThenGoodReader) Read(p []byte) (int, error) { + r.nread++ + if !r.didErr { + r.didErr = true + return 0, errFake + } + return len(p), nil +} + +func TestReaderClearError(t *testing.T) { + r := &errorThenGoodReader{} + b := NewReader(r) + buf := make([]byte, 1) + if _, err := b.Read(nil); err != nil { + t.Fatalf("1st nil Read = %v; want nil", err) + } + if _, err := b.Read(buf); err != errFake { + t.Fatalf("1st Read = %v; want errFake", err) + } + if _, err := b.Read(nil); err != nil { + t.Fatalf("2nd nil Read = %v; want nil", err) + } + if _, err := b.Read(buf); err != nil { + t.Fatalf("3rd Read with buffer = %v; want nil", err) + } + if r.nread != 2 { + t.Errorf("num reads = %d; want 2", r.nread) + } +} + // An onlyReader only implements io.Reader, no matter what other methods the underlying implementation may have. type onlyReader struct { r io.Reader diff --git a/gcc-4.8/libgo/go/bufio/example_test.go b/gcc-4.8/libgo/go/bufio/example_test.go new file mode 100644 index 000000000..08a39441e --- /dev/null +++ b/gcc-4.8/libgo/go/bufio/example_test.go @@ -0,0 +1,74 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio_test + +import ( + "bufio" + "fmt" + "os" + "strconv" + "strings" +) + +// The simplest use of a Scanner, to read standard input as a set of lines. +func ExampleScanner_lines() { + scanner := bufio.NewScanner(os.Stdin) + for scanner.Scan() { + fmt.Println(scanner.Text()) // Println will add back the final '\n' + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "reading standard input:", err) + } +} + +// Use a Scanner to implement a simple word-count utility by scanning the +// input as a sequence of space-delimited tokens. +func ExampleScanner_words() { + // An artificial input source. + const input = "Now is the winter of our discontent,\nMade glorious summer by this sun of York.\n" + scanner := bufio.NewScanner(strings.NewReader(input)) + // Set the split function for the scanning operation. + scanner.Split(bufio.ScanWords) + // Count the words. + count := 0 + for scanner.Scan() { + count++ + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "reading input:", err) + } + fmt.Printf("%d\n", count) + // Output: 15 +} + +// Use a Scanner with a custom split function (built by wrapping ScanWords) to validate +// 32-bit decimal input. +func ExampleScanner_custom() { + // An artificial input source. + const input = "1234 5678 1234567901234567890" + scanner := bufio.NewScanner(strings.NewReader(input)) + // Create a custom split function by wrapping the existing ScanWords function. + split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + advance, token, err = bufio.ScanWords(data, atEOF) + if err == nil && token != nil { + _, err = strconv.ParseInt(string(token), 10, 32) + } + return + } + // Set the split function for the scanning operation. + scanner.Split(split) + // Validate the input + for scanner.Scan() { + fmt.Printf("%s\n", scanner.Text()) + } + + if err := scanner.Err(); err != nil { + fmt.Printf("Invalid input: %s", err) + } + // Output: + // 1234 + // 5678 + // Invalid input: strconv.ParseInt: parsing "1234567901234567890": value out of range +} diff --git a/gcc-4.8/libgo/go/bufio/export_test.go b/gcc-4.8/libgo/go/bufio/export_test.go new file mode 100644 index 000000000..3d3bb27d8 --- /dev/null +++ b/gcc-4.8/libgo/go/bufio/export_test.go @@ -0,0 +1,27 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio + +// Exported for testing only. +import ( + "unicode/utf8" +) + +var IsSpace = isSpace + +func (s *Scanner) MaxTokenSize(n int) { + if n < utf8.UTFMax || n > 1e9 { + panic("bad max token size") + } + if n < len(s.buf) { + s.buf = make([]byte, n) + } + s.maxTokenSize = n +} + +// ErrOrEOF is like Err, but returns EOF. Used to test a corner case. +func (s *Scanner) ErrOrEOF() error { + return s.err +} diff --git a/gcc-4.8/libgo/go/bufio/scan.go b/gcc-4.8/libgo/go/bufio/scan.go new file mode 100644 index 000000000..2e1a2e999 --- /dev/null +++ b/gcc-4.8/libgo/go/bufio/scan.go @@ -0,0 +1,346 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio + +import ( + "bytes" + "errors" + "io" + "unicode/utf8" +) + +// Scanner provides a convenient interface for reading data such as +// a file of newline-delimited lines of text. Successive calls to +// the Scan method will step through the 'tokens' of a file, skipping +// the bytes between the tokens. The specification of a token is +// defined by a split function of type SplitFunc; the default split +// function breaks the input into lines with line termination stripped. Split +// functions are defined in this package for scanning a file into +// lines, bytes, UTF-8-encoded runes, and space-delimited words. The +// client may instead provide a custom split function. +// +// Scanning stops unrecoverably at EOF, the first I/O error, or a token too +// large to fit in the buffer. When a scan stops, the reader may have +// advanced arbitrarily far past the last token. Programs that need more +// control over error handling or large tokens, or must run sequential scans +// on a reader, should use bufio.Reader instead. +// +type Scanner struct { + r io.Reader // The reader provided by the client. + split SplitFunc // The function to split the tokens. + maxTokenSize int // Maximum size of a token; modified by tests. + token []byte // Last token returned by split. + buf []byte // Buffer used as argument to split. + start int // First non-processed byte in buf. + end int // End of data in buf. + err error // Sticky error. +} + +// SplitFunc is the signature of the split function used to tokenize the +// input. The arguments are an initial substring of the remaining unprocessed +// data and a flag, atEOF, that reports whether the Reader has no more data +// to give. The return values are the number of bytes to advance the input +// and the next token to return to the user, plus an error, if any. If the +// data does not yet hold a complete token, for instance if it has no newline +// while scanning lines, SplitFunc can return (0, nil) to signal the Scanner +// to read more data into the slice and try again with a longer slice +// starting at the same point in the input. +// +// If the returned error is non-nil, scanning stops and the error +// is returned to the client. +// +// The function is never called with an empty data slice unless atEOF +// is true. If atEOF is true, however, data may be non-empty and, +// as always, holds unprocessed text. +type SplitFunc func(data []byte, atEOF bool) (advance int, token []byte, err error) + +// Errors returned by Scanner. +var ( + ErrTooLong = errors.New("bufio.Scanner: token too long") + ErrNegativeAdvance = errors.New("bufio.Scanner: SplitFunc returns negative advance count") + ErrAdvanceTooFar = errors.New("bufio.Scanner: SplitFunc returns advance count beyond input") +) + +const ( + // Maximum size used to buffer a token. The actual maximum token size + // may be smaller as the buffer may need to include, for instance, a newline. + MaxScanTokenSize = 64 * 1024 +) + +// NewScanner returns a new Scanner to read from r. +// The split function defaults to ScanLines. +func NewScanner(r io.Reader) *Scanner { + return &Scanner{ + r: r, + split: ScanLines, + maxTokenSize: MaxScanTokenSize, + buf: make([]byte, 4096), // Plausible starting size; needn't be large. + } +} + +// Err returns the first non-EOF error that was encountered by the Scanner. +func (s *Scanner) Err() error { + if s.err == io.EOF { + return nil + } + return s.err +} + +// Bytes returns the most recent token generated by a call to Scan. +// The underlying array may point to data that will be overwritten +// by a subsequent call to Scan. It does no allocation. +func (s *Scanner) Bytes() []byte { + return s.token +} + +// Text returns the most recent token generated by a call to Scan +// as a newly allocated string holding its bytes. +func (s *Scanner) Text() string { + return string(s.token) +} + +// Scan advances the Scanner to the next token, which will then be +// available through the Bytes or Text method. It returns false when the +// scan stops, either by reaching the end of the input or an error. +// After Scan returns false, the Err method will return any error that +// occurred during scanning, except that if it was io.EOF, Err +// will return nil. +func (s *Scanner) Scan() bool { + // Loop until we have a token. + for { + // See if we can get a token with what we already have. + if s.end > s.start { + advance, token, err := s.split(s.buf[s.start:s.end], s.err != nil) + if err != nil { + s.setErr(err) + return false + } + if !s.advance(advance) { + return false + } + s.token = token + if token != nil { + return true + } + } + // We cannot generate a token with what we are holding. + // If we've already hit EOF or an I/O error, we are done. + if s.err != nil { + // Shut it down. + s.start = 0 + s.end = 0 + return false + } + // Must read more data. + // First, shift data to beginning of buffer if there's lots of empty space + // or space is neded. + if s.start > 0 && (s.end == len(s.buf) || s.start > len(s.buf)/2) { + copy(s.buf, s.buf[s.start:s.end]) + s.end -= s.start + s.start = 0 + } + // Is the buffer full? If so, resize. + if s.end == len(s.buf) { + if len(s.buf) >= s.maxTokenSize { + s.setErr(ErrTooLong) + return false + } + newSize := len(s.buf) * 2 + if newSize > s.maxTokenSize { + newSize = s.maxTokenSize + } + newBuf := make([]byte, newSize) + copy(newBuf, s.buf[s.start:s.end]) + s.buf = newBuf + s.end -= s.start + s.start = 0 + continue + } + // Finally we can read some input. Make sure we don't get stuck with + // a misbehaving Reader. Officially we don't need to do this, but let's + // be extra careful: Scanner is for safe, simple jobs. + for loop := 0; ; { + n, err := s.r.Read(s.buf[s.end:len(s.buf)]) + s.end += n + if err != nil { + s.setErr(err) + break + } + if n > 0 { + break + } + loop++ + if loop > 100 { + s.setErr(io.ErrNoProgress) + break + } + } + } +} + +// advance consumes n bytes of the buffer. It reports whether the advance was legal. +func (s *Scanner) advance(n int) bool { + if n < 0 { + s.setErr(ErrNegativeAdvance) + return false + } + if n > s.end-s.start { + s.setErr(ErrAdvanceTooFar) + return false + } + s.start += n + return true +} + +// setErr records the first error encountered. +func (s *Scanner) setErr(err error) { + if s.err == nil || s.err == io.EOF { + s.err = err + } +} + +// Split sets the split function for the Scanner. If called, it must be +// called before Scan. The default split function is ScanLines. +func (s *Scanner) Split(split SplitFunc) { + s.split = split +} + +// Split functions + +// ScanBytes is a split function for a Scanner that returns each byte as a token. +func ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + return 1, data[0:1], nil +} + +var errorRune = []byte(string(utf8.RuneError)) + +// ScanRunes is a split function for a Scanner that returns each +// UTF-8-encoded rune as a token. The sequence of runes returned is +// equivalent to that from a range loop over the input as a string, which +// means that erroneous UTF-8 encodings translate to U+FFFD = "\xef\xbf\xbd". +// Because of the Scan interface, this makes it impossible for the client to +// distinguish correctly encoded replacement runes from encoding errors. +func ScanRunes(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + + // Fast path 1: ASCII. + if data[0] < utf8.RuneSelf { + return 1, data[0:1], nil + } + + // Fast path 2: Correct UTF-8 decode without error. + _, width := utf8.DecodeRune(data) + if width > 1 { + // It's a valid encoding. Width cannot be one for a correctly encoded + // non-ASCII rune. + return width, data[0:width], nil + } + + // We know it's an error: we have width==1 and implicitly r==utf8.RuneError. + // Is the error because there wasn't a full rune to be decoded? + // FullRune distinguishes correctly between erroneous and incomplete encodings. + if !atEOF && !utf8.FullRune(data) { + // Incomplete; get more bytes. + return 0, nil, nil + } + + // We have a real UTF-8 encoding error. Return a properly encoded error rune + // but advance only one byte. This matches the behavior of a range loop over + // an incorrectly encoded string. + return 1, errorRune, nil +} + +// dropCR drops a terminal \r from the data. +func dropCR(data []byte) []byte { + if len(data) > 0 && data[len(data)-1] == '\r' { + return data[0 : len(data)-1] + } + return data +} + +// ScanLines is a split function for a Scanner that returns each line of +// text, stripped of any trailing end-of-line marker. The returned line may +// be empty. The end-of-line marker is one optional carriage return followed +// by one mandatory newline. In regular expression notation, it is `\r?\n`. +// The last non-empty line of input will be returned even if it has no +// newline. +func ScanLines(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF && len(data) == 0 { + return 0, nil, nil + } + if i := bytes.IndexByte(data, '\n'); i >= 0 { + // We have a full newline-terminated line. + return i + 1, dropCR(data[0:i]), nil + } + // If we're at EOF, we have a final, non-terminated line. Return it. + if atEOF { + return len(data), dropCR(data), nil + } + // Request more data. + return 0, nil, nil +} + +// isSpace returns whether the character is a Unicode white space character. +// We avoid dependency on the unicode package, but check validity of the implementation +// in the tests. +func isSpace(r rune) bool { + if r <= '\u00FF' { + // Obvious ASCII ones: \t through \r plus space. Plus two Latin-1 oddballs. + switch r { + case ' ', '\t', '\n', '\v', '\f', '\r': + return true + case '\u0085', '\u00A0': + return true + } + return false + } + // High-valued ones. + if '\u2000' <= r && r <= '\u200a' { + return true + } + switch r { + case '\u1680', '\u180e', '\u2028', '\u2029', '\u202f', '\u205f', '\u3000': + return true + } + return false +} + +// ScanWords is a split function for a Scanner that returns each +// space-separated word of text, with surrounding spaces deleted. It will +// never return an empty string. The definition of space is set by +// unicode.IsSpace. +func ScanWords(data []byte, atEOF bool) (advance int, token []byte, err error) { + // Skip leading spaces. + start := 0 + for width := 0; start < len(data); start += width { + var r rune + r, width = utf8.DecodeRune(data[start:]) + if !isSpace(r) { + break + } + } + if atEOF && len(data) == 0 { + return 0, nil, nil + } + // Scan until space, marking end of word. + for width, i := 0, start; i < len(data); i += width { + var r rune + r, width = utf8.DecodeRune(data[i:]) + if isSpace(r) { + return i + width, data[start:i], nil + } + } + // If we're at EOF, we have a final, non-empty, non-terminated word. Return it. + if atEOF && len(data) > start { + return len(data), data[start:], nil + } + // Request more data. + return 0, nil, nil +} diff --git a/gcc-4.8/libgo/go/bufio/scan_test.go b/gcc-4.8/libgo/go/bufio/scan_test.go new file mode 100644 index 000000000..c1483b268 --- /dev/null +++ b/gcc-4.8/libgo/go/bufio/scan_test.go @@ -0,0 +1,406 @@ +// Copyright 2013 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package bufio_test + +import ( + . "bufio" + "bytes" + "errors" + "io" + "strings" + "testing" + "unicode" + "unicode/utf8" +) + +// Test white space table matches the Unicode definition. +func TestSpace(t *testing.T) { + for r := rune(0); r <= utf8.MaxRune; r++ { + if IsSpace(r) != unicode.IsSpace(r) { + t.Fatalf("white space property disagrees: %#U should be %t", r, unicode.IsSpace(r)) + } + } +} + +var scanTests = []string{ + "", + "a", + "¼", + "☹", + "\x81", // UTF-8 error + "\uFFFD", // correctly encoded RuneError + "abcdefgh", + "abc def\n\t\tgh ", + "abc¼☹\x81\uFFFD日本語\x82abc", +} + +func TestScanByte(t *testing.T) { + for n, test := range scanTests { + buf := bytes.NewBufferString(test) + s := NewScanner(buf) + s.Split(ScanBytes) + var i int + for i = 0; s.Scan(); i++ { + if b := s.Bytes(); len(b) != 1 || b[0] != test[i] { + t.Errorf("#%d: %d: expected %q got %q", n, i, test, b) + } + } + if i != len(test) { + t.Errorf("#%d: termination expected at %d; got %d", n, len(test), i) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +// Test that the rune splitter returns same sequence of runes (not bytes) as for range string. +func TestScanRune(t *testing.T) { + for n, test := range scanTests { + buf := bytes.NewBufferString(test) + s := NewScanner(buf) + s.Split(ScanRunes) + var i, runeCount int + var expect rune + // Use a string range loop to validate the sequence of runes. + for i, expect = range string(test) { + if !s.Scan() { + break + } + runeCount++ + got, _ := utf8.DecodeRune(s.Bytes()) + if got != expect { + t.Errorf("#%d: %d: expected %q got %q", n, i, expect, got) + } + } + if s.Scan() { + t.Errorf("#%d: scan ran too long, got %q", n, s.Text()) + } + testRuneCount := utf8.RuneCountInString(test) + if runeCount != testRuneCount { + t.Errorf("#%d: termination expected at %d; got %d", n, testRuneCount, runeCount) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +var wordScanTests = []string{ + "", + " ", + "\n", + "a", + " a ", + "abc def", + " abc def ", + " abc\tdef\nghi\rjkl\fmno\vpqr\u0085stu\u00a0\n", +} + +// Test that the word splitter returns the same data as strings.Fields. +func TestScanWords(t *testing.T) { + for n, test := range wordScanTests { + buf := bytes.NewBufferString(test) + s := NewScanner(buf) + s.Split(ScanWords) + words := strings.Fields(test) + var wordCount int + for wordCount = 0; wordCount < len(words); wordCount++ { + if !s.Scan() { + break + } + got := s.Text() + if got != words[wordCount] { + t.Errorf("#%d: %d: expected %q got %q", n, wordCount, words[wordCount], got) + } + } + if s.Scan() { + t.Errorf("#%d: scan ran too long, got %q", n, s.Text()) + } + if wordCount != len(words) { + t.Errorf("#%d: termination expected at %d; got %d", n, len(words), wordCount) + } + err := s.Err() + if err != nil { + t.Errorf("#%d: %v", n, err) + } + } +} + +// slowReader is a reader that returns only a few bytes at a time, to test the incremental +// reads in Scanner.Scan. +type slowReader struct { + max int + buf *bytes.Buffer +} + +func (sr *slowReader) Read(p []byte) (n int, err error) { + if len(p) > sr.max { + p = p[0:sr.max] + } + return sr.buf.Read(p) +} + +// genLine writes to buf a predictable but non-trivial line of text of length +// n, including the terminal newline and an occasional carriage return. +// If addNewline is false, the \r and \n are not emitted. +func genLine(buf *bytes.Buffer, lineNum, n int, addNewline bool) { + buf.Reset() + doCR := lineNum%5 == 0 + if doCR { + n-- + } + for i := 0; i < n-1; i++ { // Stop early for \n. + c := 'a' + byte(lineNum+i) + if c == '\n' || c == '\r' { // Don't confuse us. + c = 'N' + } + buf.WriteByte(c) + } + if addNewline { + if doCR { + buf.WriteByte('\r') + } + buf.WriteByte('\n') + } + return +} + +// Test the line splitter, including some carriage returns but no long lines. +func TestScanLongLines(t *testing.T) { + const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. + tmp := new(bytes.Buffer) + buf := new(bytes.Buffer) + lineNum := 0 + j := 0 + for i := 0; i < 2*smallMaxTokenSize; i++ { + genLine(tmp, lineNum, j, true) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + buf.Write(tmp.Bytes()) + lineNum++ + } + s := NewScanner(&slowReader{1, buf}) + s.Split(ScanLines) + s.MaxTokenSize(smallMaxTokenSize) + j = 0 + for lineNum := 0; s.Scan(); lineNum++ { + genLine(tmp, lineNum, j, false) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + line := tmp.String() // We use the string-valued token here, for variety. + if s.Text() != line { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Text(), line) + } + } + err := s.Err() + if err != nil { + t.Fatal(err) + } +} + +// Test that the line splitter errors out on a long line. +func TestScanLineTooLong(t *testing.T) { + const smallMaxTokenSize = 256 // Much smaller for more efficient testing. + // Build a buffer of lots of line lengths up to but not exceeding smallMaxTokenSize. + tmp := new(bytes.Buffer) + buf := new(bytes.Buffer) + lineNum := 0 + j := 0 + for i := 0; i < 2*smallMaxTokenSize; i++ { + genLine(tmp, lineNum, j, true) + j++ + buf.Write(tmp.Bytes()) + lineNum++ + } + s := NewScanner(&slowReader{3, buf}) + s.Split(ScanLines) + s.MaxTokenSize(smallMaxTokenSize) + j = 0 + for lineNum := 0; s.Scan(); lineNum++ { + genLine(tmp, lineNum, j, false) + if j < smallMaxTokenSize { + j++ + } else { + j-- + } + line := tmp.Bytes() + if !bytes.Equal(s.Bytes(), line) { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line) + } + } + err := s.Err() + if err != ErrTooLong { + t.Fatalf("expected ErrTooLong; got %s", err) + } +} + +// Test that the line splitter handles a final line without a newline. +func testNoNewline(text string, lines []string, t *testing.T) { + buf := bytes.NewBufferString(text) + s := NewScanner(&slowReader{7, buf}) + s.Split(ScanLines) + for lineNum := 0; s.Scan(); lineNum++ { + line := lines[lineNum] + if s.Text() != line { + t.Errorf("%d: bad line: %d %d\n%.100q\n%.100q\n", lineNum, len(s.Bytes()), len(line), s.Bytes(), line) + } + } + err := s.Err() + if err != nil { + t.Fatal(err) + } +} + +var noNewlineLines = []string{ + "abcdefghijklmn\nopqrstuvwxyz", +} + +// Test that the line splitter handles a final line without a newline. +func TestScanLineNoNewline(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final line with a carriage return but nonewline. +func TestScanLineReturnButNoNewline(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\r" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final empty line. +func TestScanLineEmptyFinalLine(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\n\n" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + "", + } + testNoNewline(text, lines, t) +} + +// Test that the line splitter handles a final empty line with a carriage return but no newline. +func TestScanLineEmptyFinalLineWithCR(t *testing.T) { + const text = "abcdefghijklmn\nopqrstuvwxyz\n\r" + lines := []string{ + "abcdefghijklmn", + "opqrstuvwxyz", + "", + } + testNoNewline(text, lines, t) +} + +var testError = errors.New("testError") + +// Test the correct error is returned when the split function errors out. +func TestSplitError(t *testing.T) { + // Create a split function that delivers a little data, then a predictable error. + numSplits := 0 + const okCount = 7 + errorSplit := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + if atEOF { + panic("didn't get enough data") + } + if numSplits >= okCount { + return 0, nil, testError + } + numSplits++ + return 1, data[0:1], nil + } + // Read the data. + const text = "abcdefghijklmnopqrstuvwxyz" + buf := bytes.NewBufferString(text) + s := NewScanner(&slowReader{1, buf}) + s.Split(errorSplit) + var i int + for i = 0; s.Scan(); i++ { + if len(s.Bytes()) != 1 || text[i] != s.Bytes()[0] { + t.Errorf("#%d: expected %q got %q", i, text[i], s.Bytes()[0]) + } + } + // Check correct termination location and error. + if i != okCount { + t.Errorf("unexpected termination; expected %d tokens got %d", okCount, i) + } + err := s.Err() + if err != testError { + t.Fatalf("expected %q got %v", testError, err) + } +} + +// Test that an EOF is overridden by a user-generated scan error. +func TestErrAtEOF(t *testing.T) { + s := NewScanner(strings.NewReader("1 2 33")) + // This spitter will fail on last entry, after s.err==EOF. + split := func(data []byte, atEOF bool) (advance int, token []byte, err error) { + advance, token, err = ScanWords(data, atEOF) + if len(token) > 1 { + if s.ErrOrEOF() != io.EOF { + t.Fatal("not testing EOF") + } + err = testError + } + return + } + s.Split(split) + for s.Scan() { + } + if s.Err() != testError { + t.Fatal("wrong error:", s.Err()) + } +} + +// Test for issue 5268. +type alwaysError struct{} + +func (alwaysError) Read(p []byte) (int, error) { + return 0, io.ErrUnexpectedEOF +} + +func TestNonEOFWithEmptyRead(t *testing.T) { + scanner := NewScanner(alwaysError{}) + for scanner.Scan() { + t.Fatal("read should fail") + } + err := scanner.Err() + if err != io.ErrUnexpectedEOF { + t.Errorf("unexpected error: %v", err) + } +} + +// Test that Scan finishes if we have endless empty reads. +type endlessZeros struct{} + +func (endlessZeros) Read(p []byte) (int, error) { + return 0, nil +} + +func TestBadReader(t *testing.T) { + scanner := NewScanner(endlessZeros{}) + for scanner.Scan() { + t.Fatal("read should fail") + } + err := scanner.Err() + if err != io.ErrNoProgress { + t.Errorf("unexpected error: %v", err) + } +} |