diff options
author | yuta.256 <yuta.256@b7c3aa3b-274f-0410-ae0b-edc9d07c929d> | 2008-07-13 15:57:03 +0000 |
---|---|---|
committer | yuta.256 <yuta.256@b7c3aa3b-274f-0410-ae0b-edc9d07c929d> | 2008-07-13 15:57:03 +0000 |
commit | 101c19ff41d3cebfb3702ce980d02a09f81e2e0b (patch) | |
tree | a198be1471690ac944c9ab9aa2273273492a73c5 | |
parent | 7347305730fb9cc87bee02744c06fbc2089b1680 (diff) | |
download | platform_external_libdivsufsort-101c19ff41d3cebfb3702ce980d02a09f81e2e0b.tar.gz platform_external_libdivsufsort-101c19ff41d3cebfb3702ce980d02a09f81e2e0b.tar.bz2 platform_external_libdivsufsort-101c19ff41d3cebfb3702ce980d02a09f81e2e0b.zip |
Rewrote examples.
-rw-r--r-- | examples/mksary.c | 173 | ||||
-rw-r--r-- | examples/sasearch.c | 446 | ||||
-rw-r--r-- | examples/suftest.c | 137 | ||||
-rw-r--r-- | examples/unbwt.c | 166 |
4 files changed, 430 insertions, 492 deletions
diff --git a/examples/mksary.c b/examples/mksary.c index cdb6d35..b48177c 100644 --- a/examples/mksary.c +++ b/examples/mksary.c @@ -24,105 +24,166 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H # include "config.h" #endif -#include <divsufsort.h> #include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif #if HAVE_STDLIB_H # include <stdlib.h> #endif -#include <time.h> -#if HAVE_SYS_STAT_H -# include <sys/stat.h> +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> #endif +#if HAVE_STRINGS_H +# include <strings.h> +#endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <time.h> +#include <divsufsort.h> +#include "lfs.h" + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "mksary, a simple suffix array builder, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); + exit(status); +} int main(int argc, const char *argv[]) { - FILE *ifp, *ofp; + FILE *fp, *ofp; + const char *fname, *ofname; sauchar_t *T; saidx_t *SA; - saidx_t n; + LFS_OFF_T n; clock_t start, finish; -#if HAVE_SYS_STAT_H - struct stat s; -#endif + saint_t needclose = 3; - /* Check argument. */ - if(argc != 3) { - fprintf(stderr, - "mksary, a simple suffix array builder, version %s.\n" - , divsufsort_version()); - fprintf(stderr, - "usage: %s srcFILE dstSA\n\n" - , argv[0]); - exit(EXIT_FAILURE); - } - - /* Get a file's status information. */ -#if HAVE_SYS_STAT_H - if(stat(argv[1], &s) != 0) { - fprintf(stderr, "%s: Cannot stat file `%s': ", argv[0], argv[1]); - perror(NULL); - exit(EXIT_FAILURE); - } - n = s.st_size; -#endif + /* Check arguments. */ + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } /* Open a file for reading. */ - if((ifp = fopen(argv[1], "rb")) == NULL) { - fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[1]); - perror(NULL); - exit(EXIT_FAILURE); + if(strcmp(argv[1], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[1], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + fp = stdin; + fname = "stdin"; + needclose ^= 1; } /* Open a file for writing. */ - if((ofp = fopen(argv[2], "wb")) == NULL) { - fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]); + if(strcmp(argv[2], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { +#else + if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdout), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + ofp = stdout; + ofname = "stdout"; + needclose ^= 2; + } + + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); + rewind(fp); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(0x7fffffff <= n) { + fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); + exit(EXIT_FAILURE); + } + } else { + fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); perror(NULL); exit(EXIT_FAILURE); } -#if !HAVE_SYS_STAT_H - fseek(ifp, 0, SEEK_END); - n = ftell(ifp); - rewind(ifp); -#endif - - /* Allocate 5n bytes of memory. */ - if(((T = malloc(n * sizeof(sauchar_t))) == NULL) || - ((SA = malloc(n * sizeof(saidx_t))) == NULL)) { + /* Allocate 5blocksize bytes of memory. */ + T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); + SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); exit(EXIT_FAILURE); } /* Read n bytes of data. */ - if(fread(T, sizeof(sauchar_t), n, ifp) != n) { + if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { fprintf(stderr, "%s: %s `%s': ", argv[0], - (ferror(ifp) || !feof(ifp)) ? "Cannot read from" : "Unexpected EOF in", - argv[1]); + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + fname); perror(NULL); exit(EXIT_FAILURE); } - fclose(ifp); + if(needclose & 1) { fclose(fp); } /* Construct the suffix array. */ - fprintf(stderr, "%s: %d bytes ... ", argv[1], (int)n); + fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); start = clock(); - divsufsort(T, SA, n); + if(divsufsort(T, SA, (saidx_t)n) != 0) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } finish = clock(); - fprintf(stderr, "%.4f sec\n", - (double)(finish - start) / (double)CLOCKS_PER_SEC); + fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); /* Write the suffix array. */ - if(fwrite(SA, sizeof(saidx_t), n, ofp) != n) { - fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], argv[2]); + if(fwrite(SA, sizeof(saidx_t), (size_t)n, ofp) != (size_t)n) { + fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); perror(NULL); exit(EXIT_FAILURE); } - fclose(ofp); + if(needclose & 2) { fclose(ofp); } /* Deallocate memory. */ free(SA); diff --git a/examples/sasearch.c b/examples/sasearch.c index 026aff9..c4ecdc1 100644 --- a/examples/sasearch.c +++ b/examples/sasearch.c @@ -24,382 +24,142 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H # include "config.h" #endif -#include <divsufsort.h> #include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif #if HAVE_STDLIB_H # include <stdlib.h> #endif -#include <string.h> -#if HAVE_STRING_H -# if !STDC_HEADERS && HAVE_MEMORY_H -# include <memory.h> -# endif -# include <string.h> +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> #endif #if HAVE_STRINGS_H # include <strings.h> #endif -#if HAVE_SYS_STAT_H -# include <sys/stat.h> +#if HAVE_SYS_TYPES_H +# include <sys/types.h> #endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <divsufsort.h> +#include "lfs.h" -#define SA_SORT_LEXICOGRAPHICALORDER (1 << 0) -#define SA_PRINT_OFFSET (1 << 1) -#define SA_PRINT_FILENAME (1 << 2) -#define SA_HEX_MODE (1 << 3) - -typedef struct _searchoption_t searchoption_t; -struct _searchoption_t { - const char *fname; - saidx_t maxcount; - saidx_t blen, alen; - unsigned int flags; - void(*func)(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - saidx_t left, saidx_t size, searchoption_t *opt); -}; - -static -void -_print_suffix(const sauchar_t *T, saidx_t Tsize, saidx_t Psize, saidx_t pos, - const searchoption_t *option) { - saidx_t i; - saidx_t a, b, c, d; - - a = (option->blen < pos) ? pos - option->blen : 0; - b = pos; - c = pos + Psize; - d = ((c + option->alen) < Tsize) ? c + option->alen : Tsize; - - if(option->flags & SA_PRINT_FILENAME) { printf("%s:", option->fname); } - if(option->flags & SA_PRINT_OFFSET) { printf("%d:", a); } - - if(option->flags & SA_HEX_MODE) { - for(i = a; i < (d - 1); ++i) { - printf("%02x ", T[i]); - } - printf("%02x\n", T[d - 1]); - } else { - for(i = a; i < d; ++i) { - switch(T[i]) { - case '\n': printf("[\\n]"); break; - case '\r': printf("[\\r]"); break; - default: printf("%c", T[i]); - } - } - printf("\n"); - } -} - -static -void -_onlyprint_count(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - saidx_t left, saidx_t size, searchoption_t *option) { - if(0 < size) { - if(option->flags & SA_PRINT_FILENAME) { printf("%s:", option->fname); } - printf("%d\n", size); - } -} - -static -int -_idx_cmp(const void *p1, const void *p2) { - saidx_t i1 = *((saidx_t *)p1), i2 = *((saidx_t *)p2); - if(i1 < i2) { return -1; } - if(i1 > i2) { return 1; } - return 0; -/* return i1 - i2; */ -} - -static -void -_print_suffixes(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - saidx_t left, saidx_t size, searchoption_t *option) { - saidx_t *ary; - saidx_t i; - - if(option->flags & SA_SORT_LEXICOGRAPHICALORDER) { - for(i = 0; i < size; ++i) { - _print_suffix(T, Tsize, Psize, SA[left + i], option); - } - } else { - ary = malloc(size * sizeof(saidx_t)); - memcpy(ary, SA + left, size * sizeof(saidx_t)); - qsort(ary, size, sizeof(saidx_t), _idx_cmp); - for(i = 0; i < size; ++i) { - _print_suffix(T, Tsize, Psize, ary[i], option); - } - free(ary); - } -} - -static -void -_search_file(const sauchar_t *T, saidx_t Tsize, - const sauchar_t *P, saidx_t Psize, - const saidx_t *SA, saidx_t SAsize, - searchoption_t *option) { - saidx_t size, left; - size = sa_search(T, Tsize, P, Psize, SA, SAsize, &left); - if(0 <= option->maxcount) { - if(option->maxcount == 0) { return; } - if(option->maxcount < size) { size = option->maxcount; } - } - option->func(T, Tsize, P, Psize, SA, SAsize, left, size, option); -} - -static -void -_print_version(const char *pname) { - fprintf(stderr, -"%s, a SA-based full-text search tool, version %s\n\n", - pname, divsufsort_version()); -} - -static -void -_print_usage(const char *pname, int status) { - _print_version(pname); - fprintf(stderr, -"usage: %s [OPTION]... PATTERN FILE SAFILE\n" -"\nOutput control:\n" -" -m NUM stop after NUM matches\n" -" -b print the byte offset\n" -" -H print the filename\n" -" -c only print a count of matches\n" -" -S sort in lexicographical order\n" -"\nContext control:\n" -" -B NUM print NUM characters of leading context\n" -" -A NUM print NUM characters of trailing context\n" -" -C NUM print NUM characters of output context\n" -"\nMiscellaneous:\n" -" -h print this message\n" -" -v display version number\n" -"\n", pname); - exit(status); -} - -static -void -_print_tryhelp(const char *pname, int status) { - fprintf(stderr, "Try `%s --help' for more information.\n", pname); - exit(status); -} - static void -_print_version_and_license(const char *pname, int status) { - _print_version(pname); +print_help(const char *progname, int status) { fprintf(stderr, - " Copyright (c) 2003-2007 Yuta Mori All Rights Reserved.\n" - "\n" - " Permission is hereby granted, free of charge, to any person\n" - " obtaining a copy of this software and associated documentation\n" - " files (the \"Software\"), to deal in the Software without\n" - " restriction, including without limitation the rights to use,\n" - " copy, modify, merge, publish, distribute, sublicense, and/or sell\n" - " copies of the Software, and to permit persons to whom the\n" - " Software is furnished to do so, subject to the following\n" - " conditions:\n" - "\n" - " The above copyright notice and this permission notice shall be\n" - " included in all copies or substantial portions of the Software.\n" - "\n" - " THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n" - " EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES\n" - " OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n" - " NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n" - " HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,\n" - " WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n" - " FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR\n" - " OTHER DEALINGS IN THE SOFTWARE.\n"); + "sasearch, a simple SA-based full-text search tool, version %s\n", + divsufsort_version()); + fprintf(stderr, "usage: %s PATTERN FILE SAFILE\n\n", progname); exit(status); } - int main(int argc, const char *argv[]) { - int i; - searchoption_t option; - sauchar_t *P; - saidx_t Psize; - - if(argc <= 1) { _print_usage(argv[0], EXIT_SUCCESS); } - - option.maxcount = -1; - option.fname = NULL; - option.flags = 0; - option.func = _print_suffixes; - option.alen = option.blen = 10; - - for(i = 1; i < argc; ++i) { - if(argv[i][0] != '-') { break; } - - if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) { - _print_usage(argv[0], EXIT_SUCCESS); - } - if((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) { - _print_version_and_license(argv[0], EXIT_SUCCESS); - } - - if(strcmp(argv[i],"-m")==0) { - if((i + 1) == argc) { - fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]); - _print_tryhelp(argv[0], EXIT_FAILURE); - } - option.maxcount = atoi(argv[++i]); - } else if(strcmp(argv[i],"-b")==0) { - option.flags |= SA_PRINT_OFFSET; - } else if(strcmp(argv[i],"-H")==0) { - option.flags |= SA_PRINT_FILENAME; - } else if(strcmp(argv[i],"-c")==0) { - option.func = _onlyprint_count; - } else if(strcmp(argv[i],"-S")==0) { - option.flags |= SA_SORT_LEXICOGRAPHICALORDER; - } else if(strcmp(argv[i],"-B")==0) { - if((i + 1) == argc) { - fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]); - _print_tryhelp(argv[0], EXIT_FAILURE); - } - option.blen = atoi(argv[++i]); - } else if(strcmp(argv[i],"-A")==0) { - if((i + 1) == argc) { - fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]); - _print_tryhelp(argv[0], EXIT_FAILURE); - } - option.alen = atoi(argv[++i]); - } else if(strcmp(argv[i],"-C")==0) { - if((i + 1) == argc) { - fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]); - _print_tryhelp(argv[0], EXIT_FAILURE); - } - option.alen = option.blen = atoi(argv[++i]); - - } else if(strcmp(argv[i],"--hex")==0) { - option.flags |= SA_HEX_MODE; - - } else { - fprintf(stderr,"%s: invalid option -- %s\n", argv[0], argv[i]); - _print_tryhelp(argv[0], EXIT_FAILURE); - } - } - - if(i == argc) { return 0; } - - P = (sauchar_t *)argv[i]; - Psize = (saidx_t)strlen(argv[i]); - - if(option.flags & SA_HEX_MODE) { - sauchar_t *newP = malloc(Psize / 2 * sizeof(sauchar_t)); - saidx_t j, k; - unsigned char c, t; - if(newP == NULL) { - fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); - exit(EXIT_FAILURE); - } - for(j = 0, k = 0, c = 0; j < Psize; ++j) { - if((('0' <= P[j]) && (P[j] <= '9')) || (('a' <= P[j]) && (P[j] <= 'f'))) { - t = (('0' <= P[j]) && (P[j] <= '9')) ? P[j] - '0' : (P[j] - 'a' + 10); - c = (c << 4) | t; - if(k & 1) { newP[k / 2] = c; } - k += 1; - } - } - Psize = k / 2; - P = newP; - } - - for(i += 1; (i + 1) < argc; i += 2) { - FILE *fp; - sauchar_t *T; - saidx_t *SA; - saidx_t size; - -#if HAVE_SYS_STAT_H - struct stat s; - if(stat(argv[i], &s) != 0) { - fprintf(stderr, "%s: Cannot stat file `%s': ", argv[0], argv[i]); - perror(NULL); - exit(EXIT_FAILURE); - } - size = s.st_size; + FILE *fp; + const char *P; + sauchar_t *T; + saidx_t *SA; + LFS_OFF_T n; + size_t Psize; + saidx_t i, size, left; + + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 4) { print_help(argv[0], EXIT_FAILURE); } + + P = argv[1]; + Psize = strlen(P); + + /* Open a file for reading. */ +#if HAVE_FOPEN_S + if(fopen_s(&fp, argv[2], "rb") != 0) { +#else + if((fp = LFS_FOPEN(argv[2], "rb")) == NULL) { #endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } - option.fname = argv[i]; - /* Open a file for reading. */ - if((fp = fopen(argv[i], "rb")) == NULL) { - fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[i]); - perror(NULL); - exit(EXIT_FAILURE); - } -#if !HAVE_SYS_STAT_H - if(fseek(fp, 0, SEEK_END) != 0) { - fprintf(stderr, "%s: Cannot fseek on `%s': ", argv[0], argv[i]); - perror(NULL); - exit(EXIT_FAILURE); - } - if((size = ftell(fp)) == -1) { - fprintf(stderr, "%s: Cannot ftell on `%s': ", argv[0], argv[i]); - perror(NULL); - exit(EXIT_FAILURE); - } + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); rewind(fp); -#endif - - /* Allocate n+4(n+1) bytes of memory. */ - if(((T = malloc(size * sizeof(sauchar_t))) == NULL) || - ((SA = malloc((size + 1) * sizeof(saidx_t))) == NULL)) { - fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); - exit(EXIT_FAILURE); - } - - /* Read n * sizeof(sauchar_t) bytes of data. */ - if(fread(T, sizeof(sauchar_t), size, fp) != size) { - fprintf(stderr, "%s: %s `%s': ", - argv[0], - (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", - argv[i]); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], argv[2]); perror(NULL); exit(EXIT_FAILURE); } - fclose(fp); + } else { + fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } - /* Open a SA file for reading. */ - if((fp = fopen(argv[i + 1], "rb")) == NULL) { - fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[i + 1]); - perror(NULL); - exit(EXIT_FAILURE); - } - /* Read (n + 1) * sizeof(saidx_t) bytes of data. */ - SA[0] = size; - if(fread(SA + 1, sizeof(saidx_t), size, fp) != size) { - fprintf(stderr, "%s: %s `%s': ", - argv[0], - (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", - argv[i + 1]); - perror(NULL); - exit(EXIT_FAILURE); - } - fclose(fp); + /* Allocate 5n bytes of memory. */ + T = malloc((size_t)n * sizeof(sauchar_t)); + SA = malloc((size_t)n * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } - _search_file(T, size, P, Psize, SA, size + 1, &option); + /* Read n bytes of data. */ + if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + argv[2]); + perror(NULL); + exit(EXIT_FAILURE); + } + fclose(fp); - free(T); - free(SA); + /* Open the SA file for reading. */ +#if HAVE_FOPEN_S + if(fopen_s(&fp, argv[3], "rb") != 0) { +#else + if((fp = LFS_FOPEN(argv[3], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[3]); + perror(NULL); + exit(EXIT_FAILURE); } - if(option.flags & SA_HEX_MODE) { - free(P); + /* Read n * sizeof(saidx_t) bytes of data. */ + if(fread(SA, sizeof(saidx_t), (size_t)n, fp) != (size_t)n) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + argv[3]); + perror(NULL); + exit(EXIT_FAILURE); + } + fclose(fp); + + /* Search and print */ + size = sa_search(T, (saidx_t)n, + (const sauchar_t *)P, (saidx_t)Psize, + SA, (saidx_t)n, &left); + for(i = 0; i < size; ++i) { + fprintf(stdout, "%" PRIdSAIDX_T "\n", SA[left + i]); } + /* Deallocate memory. */ + free(SA); + free(T); + return 0; } diff --git a/examples/suftest.c b/examples/suftest.c index 90b7c83..71892ac 100644 --- a/examples/suftest.c +++ b/examples/suftest.c @@ -24,85 +24,116 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H # include "config.h" #endif -#include <divsufsort.h> #include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif #if HAVE_STDLIB_H # include <stdlib.h> #endif -#if HAVE_STRING_H -# if !STDC_HEADERS && HAVE_MEMORY_H -# include <memory.h> -# endif -# include <string.h> +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> #endif #if HAVE_STRINGS_H # include <strings.h> #endif -#include <time.h> -#if HAVE_SYS_STAT_H -# include <sys/stat.h> +#if HAVE_SYS_TYPES_H +# include <sys/types.h> #endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif +#include <time.h> +#include <divsufsort.h> +#include "lfs.h" +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "suftest, a suffixsort tester, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s FILE\n\n", progname); + exit(status); +} + int main(int argc, const char *argv[]) { FILE *fp; + const char *fname; sauchar_t *T; saidx_t *SA; - saidx_t n; + LFS_OFF_T n; clock_t start, finish; -#if HAVE_SYS_STAT_H - struct stat s; -#endif + saint_t needclose = 1; - /* Check argument. */ - if((argc != 2) || + /* Check arguments. */ + if((argc == 1) || (strcmp(argv[1], "-h") == 0) || - (strcmp(argv[1], "--help") == 0)) { - fprintf(stderr, - "suftest, a suffixsort tester, version %s.\n" - , divsufsort_version()); - fprintf(stderr, - "usage: %s FILE\n\n" - , argv[0]); - exit(EXIT_FAILURE); - } + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 2) { print_help(argv[0], EXIT_FAILURE); } - /* Get a file's status information. */ -#if HAVE_SYS_STAT_H - if(stat(argv[1], &s) != 0) { - fprintf(stderr, "%s: Cannot stat file `%s': ", argv[0], argv[1]); - perror(NULL); - exit(EXIT_FAILURE); - } - n = s.st_size; + /* Open a file for reading. */ + if(strcmp(argv[1], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[1], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } #endif + fp = stdin; + fname = "stdin"; + needclose = 0; + } - /* Open a file for reading. */ - if((fp = fopen(argv[1], "rb")) == NULL) { - fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[1]); + /* Get the file size. */ + if(LFS_FSEEK(fp, 0, SEEK_END) == 0) { + n = LFS_FTELL(fp); + rewind(fp); + if(n < 0) { + fprintf(stderr, "%s: Cannot ftell `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + if(0x7fffffff <= n) { + fprintf(stderr, "%s: Input file `%s' is too big.\n", argv[0], fname); + exit(EXIT_FAILURE); + } + } else { + fprintf(stderr, "%s: Cannot fseek `%s': ", argv[0], fname); perror(NULL); exit(EXIT_FAILURE); } -#if !HAVE_SYS_STAT_H - fseek(fp, 0, SEEK_END); - n = ftell(fp); - rewind(fp); -#endif - /* Allocate 5n bytes of memory. */ - if(((T = malloc(n * sizeof(sauchar_t))) == NULL) || - ((SA = malloc(n * sizeof(saidx_t))) == NULL)) { + T = (sauchar_t *)malloc((size_t)n * sizeof(sauchar_t)); + SA = (saidx_t *)malloc((size_t)n * sizeof(saidx_t)); + if((T == NULL) || (SA == NULL)) { fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); exit(EXIT_FAILURE); } /* Read n bytes of data. */ - if(fread(T, sizeof(sauchar_t), n, fp) != n) { + if(fread(T, sizeof(sauchar_t), (size_t)n, fp) != (size_t)n) { fprintf(stderr, "%s: %s `%s': ", argv[0], (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", @@ -110,20 +141,20 @@ main(int argc, const char *argv[]) { perror(NULL); exit(EXIT_FAILURE); } - fclose(fp); + if(needclose & 1) { fclose(fp); } /* Construct the suffix array. */ - fprintf(stderr, "%s: %d bytes ... ", argv[1], (int)n); + fprintf(stderr, "%s: %" PRIdOFF_T " bytes ... ", fname, n); start = clock(); - divsufsort(T, SA, n); + if(divsufsort(T, SA, (saidx_t)n) != 0) { + fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); + exit(EXIT_FAILURE); + } finish = clock(); - fprintf(stderr, "%.4f sec\n", - (double)(finish - start) / (double)CLOCKS_PER_SEC); + fprintf(stderr, "%.4f sec\n", (double)(finish - start) / (double)CLOCKS_PER_SEC); /* Check the suffix array. */ - if(sufcheck(T, SA, n, 1) != 0) { - exit(EXIT_FAILURE); - } + if(sufcheck(T, SA, (saidx_t)n, 1) != 0) { exit(EXIT_FAILURE); } /* Deallocate memory. */ free(SA); diff --git a/examples/unbwt.c b/examples/unbwt.c index a0aae45..c0f19e9 100644 --- a/examples/unbwt.c +++ b/examples/unbwt.c @@ -24,98 +24,184 @@ * OTHER DEALINGS IN THE SOFTWARE. */ -#ifdef HAVE_CONFIG_H +#if HAVE_CONFIG_H # include "config.h" #endif -#include <divsufsort.h> #include <stdio.h> +#if HAVE_STRING_H +# include <string.h> +#endif #if HAVE_STDLIB_H # include <stdlib.h> #endif -#if HAVE_STRING_H -# if !STDC_HEADERS && HAVE_MEMORY_H -# include <memory.h> -# endif -# include <string.h> +#if HAVE_MEMORY_H +# include <memory.h> +#endif +#if HAVE_STDDEF_H +# include <stddef.h> #endif #if HAVE_STRINGS_H # include <strings.h> #endif +#if HAVE_SYS_TYPES_H +# include <sys/types.h> +#endif +#if HAVE_IO_H && HAVE_FCNTL_H +# include <io.h> +# include <fcntl.h> +#endif #include <time.h> +#include <divsufsort.h> +#include "lfs.h" +static +size_t +read_int(FILE *fp, saidx_t *n) { + unsigned char c[4]; + size_t m = fread(c, sizeof(unsigned char), 4, fp); + if(m == 4) { + *n = (c[0] << 0) | (c[1] << 8) | + (c[2] << 16) | (c[3] << 24); + } + return m; +} + +static +void +print_help(const char *progname, int status) { + fprintf(stderr, + "unbwt, an inverse burrows-wheeler transform program, version %s.\n", + divsufsort_version()); + fprintf(stderr, "usage: %s INFILE OUTFILE\n\n", progname); + exit(status); +} + int main(int argc, const char *argv[]) { + FILE *fp, *ofp; + const char *fname, *ofname; sauchar_t *T; - saidx_t *A, m, n, blocksize, idx; - saint_t err; + saidx_t *A; + LFS_OFF_T n; + size_t m; + saidx_t pidx; clock_t start, finish; + saint_t err, blocksize, needclose = 3; + + /* Check arguments. */ + if((argc == 1) || + (strcmp(argv[1], "-h") == 0) || + (strcmp(argv[1], "--help") == 0)) { print_help(argv[0], EXIT_SUCCESS); } + if(argc != 3) { print_help(argv[0], EXIT_FAILURE); } + + /* Open a file for reading. */ + if(strcmp(argv[1], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&fp, fname = argv[1], "rb") != 0) { +#else + if((fp = LFS_FOPEN(fname = argv[1], "rb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], fname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdin), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + fp = stdin; + fname = "stdin"; + needclose ^= 1; + } - /* Check argument. */ - if(argc != 1) { - fprintf(stderr, - "unbwt, an inverse burrows-wheeler transform program, version %s.\n" - , divsufsort_version()); - fprintf(stderr, - "usage: %s < STDIN > STDOUT\n\n" - , argv[0]); - return 0; + /* Open a file for writing. */ + if(strcmp(argv[2], "-") != 0) { +#if HAVE_FOPEN_S + if(fopen_s(&ofp, ofname = argv[2], "wb") != 0) { +#else + if((ofp = LFS_FOPEN(ofname = argv[2], "wb")) == NULL) { +#endif + fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], ofname); + perror(NULL); + exit(EXIT_FAILURE); + } + } else { +#if HAVE__SETMODE && HAVE__FILENO + if(_setmode(_fileno(stdout), _O_BINARY) == -1) { + fprintf(stderr, "%s: Cannot set mode: ", argv[0]); + perror(NULL); + exit(EXIT_FAILURE); + } +#endif + ofp = stdout; + ofname = "stdout"; + needclose ^= 2; } - /* Read the blocksize from stdin. */ - if(fread(&blocksize, sizeof(saidx_t), 1, stdin) != 1) { - fprintf(stderr, "%s: %s `stdin': ", - argv[0], - (ferror(stdin) || !feof(stdin)) ? - "Cannot read from" : "Unexpected EOF in"); + /* Read the blocksize. */ + if(read_int(fp, &blocksize) != 4) { + fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); perror(NULL); exit(EXIT_FAILURE); } /* Allocate 5blocksize bytes of memory. */ - if(((T = malloc(blocksize * sizeof(sauchar_t))) == NULL) || - ((A = malloc(blocksize * sizeof(saidx_t))) == NULL)) { + T = (sauchar_t *)malloc(blocksize * sizeof(sauchar_t)); + A = (saidx_t *)malloc(blocksize * sizeof(saidx_t)); + if((T == NULL) || (A == NULL)) { fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]); exit(EXIT_FAILURE); } - fprintf(stderr, "UnBWT (blocksize %d) ... ", (int)blocksize); + fprintf(stderr, "UnBWT (blocksize %" PRIdSAINT_T ") ... ", blocksize); start = clock(); - for(n = 0; fread(&idx,sizeof(saidx_t),1,stdin)!=0; n += m) { + for(n = 0; (m = read_int(fp, &pidx)) != 0; n += m) { /* Read blocksize bytes of data. */ - if((m = fread(T, sizeof(sauchar_t), blocksize, stdin)) == 0) { - fprintf(stderr, "%s: Unexpected EOF in `stdin': ", argv[0]); + if((m != 4) || ((m = fread(T, sizeof(sauchar_t), blocksize, fp)) == 0)) { + fprintf(stderr, "%s: %s `%s': ", + argv[0], + (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in", + fname); perror(NULL); exit(EXIT_FAILURE); } /* Inverse Burrows-Wheeler Transform. */ - if((err = inverse_bw_transform(T, T, A, m, idx)) != 0) { - fprintf(stderr, "%s (inverse_bw_transform): %s.\n", + if((err = inverse_bw_transform(T, T, A, m, pidx)) != 0) { + fprintf(stderr, "%s (reverseBWT): %s.\n", argv[0], - (err == -1) ? "Invalid arguments" : "Cannot allocate memory"); + (err == -1) ? "Invalid data" : "Cannot allocate memory"); exit(EXIT_FAILURE); } /* Write m bytes of data. */ - if(fwrite(T, sizeof(sauchar_t), m, stdout) != m) { - fprintf(stderr, "%s: Cannot write to `stdout': ", argv[0]); + if(fwrite(T, sizeof(sauchar_t), m, ofp) != m) { + fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], ofname); perror(NULL); exit(EXIT_FAILURE); } } - if(ferror(stdin)) { - fprintf(stderr, "%s: Cannot read from `stdin': ", argv[0]); + if(ferror(fp)) { + fprintf(stderr, "%s: Cannot read from `%s': ", argv[0], fname); perror(NULL); exit(EXIT_FAILURE); } finish = clock(); - fprintf(stderr, "%d bytes: %.4f sec\n", - (int)n, (double)(finish - start) / (double)CLOCKS_PER_SEC); + fprintf(stderr, "%" PRIdOFF_T " bytes: %.4f sec\n", + n, (double)(finish - start) / (double)CLOCKS_PER_SEC); + + /* Close files */ + if(needclose & 1) { fclose(fp); } + if(needclose & 2) { fclose(ofp); } /* Deallocate memory. */ - free(T); free(A); + free(T); return 0; } |