aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryuta.256 <yuta.256@b7c3aa3b-274f-0410-ae0b-edc9d07c929d>2008-06-17 20:25:18 +0000
committeryuta.256 <yuta.256@b7c3aa3b-274f-0410-ae0b-edc9d07c929d>2008-06-17 20:25:18 +0000
commit18d7a90ad01a043f61d8af8b997fdeac10772cef (patch)
treed893b595fb41b967ef3f69e07979f359bd566d8c
downloadplatform_external_libdivsufsort-18d7a90ad01a043f61d8af8b997fdeac10772cef.tar.gz
platform_external_libdivsufsort-18d7a90ad01a043f61d8af8b997fdeac10772cef.tar.bz2
platform_external_libdivsufsort-18d7a90ad01a043f61d8af8b997fdeac10772cef.zip
Initial import.
-rw-r--r--AUTHORS3
-rw-r--r--COPYING22
-rw-r--r--ChangeLog175
-rw-r--r--ChangeLog.old67
-rw-r--r--INSTALL51
-rw-r--r--Makefile.am5
-rw-r--r--NEWS36
-rw-r--r--README155
-rw-r--r--configure.ac72
-rw-r--r--examples/Makefile.am6
-rw-r--r--examples/bwt.c152
-rw-r--r--examples/mksary.c132
-rw-r--r--examples/sasearch.c402
-rw-r--r--examples/suftest.c133
-rw-r--r--examples/unbwt.c121
-rw-r--r--include/Makefile.am4
-rw-r--r--include/divsufsort.h.in132
-rw-r--r--include/divsufsort_private.h.in162
-rw-r--r--lib/Makefile.am9
-rw-r--r--lib/divsufsort.c362
-rw-r--r--lib/libdivsufsort.sym8
-rw-r--r--lib/substringsort.c619
-rw-r--r--lib/trsort.c684
-rw-r--r--lib/utils.c378
24 files changed, 3890 insertions, 0 deletions
diff --git a/AUTHORS b/AUTHORS
new file mode 100644
index 0000000..b750b88
--- /dev/null
+++ b/AUTHORS
@@ -0,0 +1,3 @@
+-- AUTHORS for libdivsufsort
+
+Yuta Mori <yiv01157@nifty.com>
diff --git a/COPYING b/COPYING
new file mode 100644
index 0000000..3bad2dc
--- /dev/null
+++ b/COPYING
@@ -0,0 +1,22 @@
+Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
diff --git a/ChangeLog b/ChangeLog
new file mode 100644
index 0000000..1b89304
--- /dev/null
+++ b/ChangeLog
@@ -0,0 +1,175 @@
+2008-02-23 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/substringsort.c (_merge_backward): Bug fix.
+ * lib/trsort.c (_tr_introsort): Bug fix.
+
+2007-09-02 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/trsort.c (_ls_introsort): Important bug fix.
+
+2007-07-15 Yuta Mori <yiv01157@nifty.com>
+
+ A few bug fixes.
+
+ * lib/divsufsort.c (divbwt): Bug fix.
+ * lib/trsort.c (_tr_introsort): Bug fix.
+ * lib/utils.c (sa_search, sa_simplesearch): New functions.
+ * lib/libdivsufsort.sym: Update.
+ * include/divsufsort.h.in: Update.
+ * examples/sasearch.c: New file.
+ * examples/Makefile.am: Update.
+ * configure.ac: Update.
+ * NEWS: Update.
+ * README: Update.
+
+2007-04-14 Yuta Mori <yiv01157@nifty.com>
+
+ Change license to the MIT/X11 license.
+ Update all files for 1.2.0.
+
+ * lib/libdivsufsort.sym: New file for libtool.
+
+2007-04-07 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.7.
+
+2007-04-07 Yuta Mori <yiv01157@nifty.com>
+
+ Replace drsort with tandem repeat sorting algorithm and Larsson-Sadakane sorting algorithm.
+
+ * lib/trsort.c: New file.
+ * lib/drsort.c: Delete.
+ * lib/divsufsort.c: Update.
+ * lib/Makefile.am: Update.
+ * lib/divsufsort_private.h.in (LS_INSERTIONSORT_THRESHOLD, TR_INSERTIONSORT_THRESHOLD): New constants.
+ (DR_INSERTIONSORT_THRESHOLD): Delete.
+ (STACK_PUSH3, STACK_POP3): New macros.
+
+2007-03-31 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.6.
+
+2007-03-31 Yuta Mori <yiv01157@nifty.com>
+
+ Replace _ss_merge with new merge algorithms.
+
+ * lib/substringsort.c (_ss_merge): Delete.
+ * lib/substringsort.c (_block_swap, _merge_forward, _merge_backward, _merge): New functions.
+ (substringsort): Update.
+ * lib/divsufsort.c (_sort_typeBstar, divsufsort, divbwt): Update.
+ * include/divsufsort_private.h.in (LOCALMERGE_BUFFERSIZE): New constant.
+ (SS_MERGESORT_QUEUESIZE): Delete.
+
+2007-03-24 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.5.
+
+2007-03-23 Yuta Mori <yiv01157@nifty.com>
+
+ Replace breadth-first introsort with new multikey introsort.
+
+ * lib/substringsort.c (_compare): Update.
+ (_substring_partition): Update.
+ (_multikey_introsort): New function.
+ (_introsort, _bfintrosort): Delete.
+ (substringsort): Update.
+ * lib/divsufsort.c (_sort_typeBstar): Update.
+
+2007-03-21 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/substringsort.c (_introsort): Convert introsort to a non-recursive algorithm.
+ (substringsort): Update.
+ * lib/divsufsort.c (_sort_typeBstar): Update.
+
+2007-03-21 Yuta Mori <yiv01157@nifty.com>
+
+ * include/divsufsort_private.h.in (STACK_SIZE): Rename from SS_STACK_SIZE.
+ (SS_BLOCKSIZE): Rename from SS_MKQSORT_THRESHOLD.
+ (SS_MKQSORT_DMAX, SS_DSWAP, SS_STACK_PUSH, SS_STACK_POP): Delete.
+ (STACK_PUSH, STACK_POP): New macros.
+ (substringsort): Update prototype.
+
+2007-03-17 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.4.
+
+2007-03-17 Yuta Mori <yiv01157@nifty.com>
+
+ * substringsort.c (_fixdown, _heapsort, _lg): New function.
+ (_introsort): Rename from _quicksort. Change to use new partitioning algorithm.
+ (_bfintrosort): Rename from _bfquicksort.
+
+2007-03-10 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.3.
+
+2007-03-10 Yuta Mori <yiv01157@nifty.com>
+
+ Replace depth-first multikey quicksort with new breadth-first ternary quicksort.
+
+ * substringsort.c (_ss_compare_lcp, _ss_tqsort, _ss_mkqsort): Remove.
+ (_median3): Rename from _ss_median and rewrite.
+ (_pivot): Rename from _ss_pivot and rewrite.
+ (_median5, _substring_partition, _quicksort, _bfquicksort): New function.
+
+2007-03-03 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.2.
+
+2007-03-03 Yuta Mori <yiv01157@nifty.com>
+
+ * substringsort.c (_compare): Rename from _ss_compare and rewrite.
+ (_insertionsort): Rename from _ss_insertionsort and rewrite.
+
+2007-02-24 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.1.
+
+2007-02-24 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/substringsort.c (_ss_getc): Remove.
+
+2007-02-17 Yuta Mori <yiv01157@nifty.com>
+
+ Update files for 1.1.0.
+
+2007-02-17 Yuta Mori <yiv01157@nifty.com>
+
+ * utils.c (bwtcheck): Remove.
+
+2007-02-11 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/divsufsort.c,
+ include/divsufsort.h.in,
+ include/divsufsort_private.h.in:
+ Change to use a new improved two-stage sort algorithm (version 070210).
+
+2007-01-28 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/divsufsort.c (_sort): Fix a bug that using wrong index.
+
+2007-01-28 Yuta Mori <yiv01157@nifty.com>
+
+ * examples/bwt.c: Rename from examples/bwt2.c.
+ * examples/unbwt.c: Rename from examples/unbwt2.c.
+ * examples/bwt1.c: Delete.
+ * examples/unbwt1.c: Delete.
+
+2007-01-28 Yuta Mori <yiv01157@nifty.com>
+
+ * lib/divsufsort.c, include/divsufsort_private.h.in:
+ Change to use new improved two-stage sort algorithm (version 070128).
+
+2007-01-24 Yuta Mori <yiv01157@nifty.com>
+
+ Remove use of libtool.
+
+ * include/divsufsort_private.h.in: Rename from include/divsufsort_private.h.
+
+2007-01-24 Yuta Mori <yiv01157@nifty.com>
+
+ Initial import.
+
+;; Local Variables:
+;; coding: utf-8
+;; End:
diff --git a/ChangeLog.old b/ChangeLog.old
new file mode 100644
index 0000000..df8de8e
--- /dev/null
+++ b/ChangeLog.old
@@ -0,0 +1,67 @@
+Version 1.0.2 (2006-01-01):
+
+ * Release 1.0.2
+
+Version 1.0.2b (2005-12-11):
+
+ * lib/divsufsort.c (_construct_typeBstar): Completely rewrite.
+
+Version 1.0.2a (2005-12-04):
+
+ * lib/substringsort.c: Completely rewrite.
+ * lib/drsort.c: Completely rewrite.
+ * lib/divsufsort.c (_sort_typeBstar): Fix some bugs.
+
+Version 1.0.1 (2005-11-08):
+
+ * Release 1.0.1
+
+Version 1.0.1a (2005-11-06):
+
+ * configure.ac: Add AM_ENABLE_STATIC, AM_DISABLE_SHARED and AC_LIBTOOL_WIN32_DLL
+
+ * Makefile.am (EXTRA_DIST): Add ChangeLog.old.
+
+ * lib/divsufsort.c (_construct_typeBstar): Fix.
+
+ * AUTHORS: New file.
+ * ChangeLog: New file.
+ * ChangeLog.old: New file.
+ * INSTALL: New file.
+ * NEWS: New file.
+
+Version 1.0.0 (2005-10-31)
+ * Introduced autoconf and automake.
+ * Added new example programs.
+
+Version 0.2.1 (2005-08-27)
+ * divsufsort.c: Kao's algorithm was replaced with Improved Two-Stage algorithm.
+ * divsufsort.c: Reduced memory usage.
+ * substringsort.c: Added mergesort for sorting large groups of suffixes.
+
+Version 0.1.6 (2005-06-10)
+ * divsufsort.h: Renamed from libdivsufsort.h. (again...)
+ * divsufsort.c: Renamed from libdivsufsort.c. (again...)
+ * divsufsort.c: Reduced memory usage.
+ * substringsort.c, substringsort.h, drsort.c, drsort.h: Modify.
+ * mksary_mmap/makefile, mksary_mmap/mksary.c,
+ mksary_mmap/mmap.c, mksary_mmap/mmap.h: Removed.
+
+Version 0.1.5 (2005-04-07)
+ * libdivsufsort.c: ranksort and doublingsort were replaced with drsort.
+ * def.h, drsort.c, drsort.h: New file.
+ * doublingsort.c, doublingsort.h, ranksort.c, ranksort.h: Removed.
+
+Version 0.1.4 (2005-03-27)
+ * mksary/mksary.c, mksary_mmap/mksary.c, suftest.c: Added error handling.
+
+Version 0.1.3 (2005-01-28)
+ * mksary/makefile, mksary/mksary.c, mksary_mmap/makefile,
+ mksary_mmap/mksary.c, mksary_mmap/mmap.c, mksary_mmap/mmap.h: New file.
+ * libdivsufsort.c: Modify.
+
+Version 0.1.2 (2005-01-01)
+ * suftest.c: New file.
+ * libdivsufsort.c: Renamed from divsufsort.c.
+ * libdivsufsort.h: Renamed from divsufsort.h.
+
diff --git a/INSTALL b/INSTALL
new file mode 100644
index 0000000..03153c7
--- /dev/null
+++ b/INSTALL
@@ -0,0 +1,51 @@
+-- INSTALL for libdivsufsort
+
+Compilation and Installation:
+-----------------------------
+
+ The `configure' shell script attempts to guess correct values for
+various system-dependent variables used during compilation. It uses
+those values to create a `Makefile' in each directory in the
+libdivsufsort source tree. Finally, it creates a shell script
+`config.status' that you can run in the future to recreate the
+current configuration, and a file `config.log' containing compiler
+output (useful mainly for debugging `configure').
+
+ The file `configure.ac' is used to create `configure' by a program
+called `autoconf'. You only need `configure.ac' if you want to change
+it or regenerate `configure' using a newer version of `autoconf'.
+
+The simplest way to compile this package is:
+
+ 1. `cd' to the directory containing the package's source code and type
+ `./configure' to configure the package for your system. If you're
+ using `csh' on an old version of System V, you might need to type
+ `sh ./configure' instead to prevent `csh' from trying to execute
+ `configure' itself.
+
+ Running `configure' takes awhile. While running, it prints some
+ messages telling which features it is checking for.
+
+ 2. Type `make' to compile the package.
+
+ 3. Type `make install' to install the library and header files.
+
+ 4. You can remove the program binaries and object files from the
+ source code directory by typing `make clean'. To also remove the
+ files that `configure' created (so you can compile the package for
+ a different kind of computer), type `make distclean'.
+
+
+Compilers and Options:
+----------------------
+
+ Some systems require unusual options for compilation or linking
+that the `configure' script does not know about. You can give
+`configure' initial values for configuration parameters by setting
+variables in the command line or in the environment. Here
+is an example:
+
+ ./configure CC=c89 CFLAGS=-O2 LIBS=-lposix
+
+Run `./configure --help' for details on some of the pertinent
+environment variables.
diff --git a/Makefile.am b/Makefile.am
new file mode 100644
index 0000000..99af5e2
--- /dev/null
+++ b/Makefile.am
@@ -0,0 +1,5 @@
+# Makefile.am for libdivsufsort
+
+SUBDIRS = include lib examples
+
+EXTRA_DIST = ChangeLog.old
diff --git a/NEWS b/NEWS
new file mode 100644
index 0000000..ea79c45
--- /dev/null
+++ b/NEWS
@@ -0,0 +1,36 @@
+-- NEWS for libdivsufsort
+
+Changes in release 1.2.3
+
+ - Bug fixes.
+
+Changes in release 1.2.2
+
+ - An important bug fix.
+
+Changes in release 1.2.1
+
+ - A few bug fixes.
+ - New APIs: sa_search, sa_simplesearch.
+
+Changes in release 1.2.0
+
+ - Changed license to the MIT/X11 license, see COPYING.
+ - Improved the performance of the suffix array construction.
+ - Change to use a new improved two-stage sorting algorithm.
+ - Replace substringsort with a new sorting algorithm that uses
+ multikey introspective sorting algorithm and in-place/recursive
+ merging algorithm.
+ - Replace drsort with a new sorting algorithm that uses
+ multikey introspective sorting algorithm, Maniscalco's tandem
+ repeat sorting algorithm and Larsson-Sadakane sorting algorithm.
+ - New API: divbwt.
+
+Changes in release 1.0.2
+
+ - The performance of sorting has been improved.
+ - Fix some bugs.
+
+Changes in release 1.0.1
+
+ - The performance of sorting has been improved a little bit.
diff --git a/README b/README
new file mode 100644
index 0000000..183e0dc
--- /dev/null
+++ b/README
@@ -0,0 +1,155 @@
+libdivsufsort - A lightweight suffix-sorting library.
+-----------------------------------------------------
+
+Introduction:
+-------------
+
+libdivsufsort is an open-source library that provides a high
+performance and lightweight suffix-sorting and BWT-construction
+algorithm. For input size n, this algorithm runs in O(n log n)
+worst-case (and average) time using only 5n bytes of memory.
+
+The latest version of libdivsufsort is available at:
+ http://homepage3.nifty.com/wpage/software/libdivsufsort.html
+
+
+License:
+--------
+
+libdivsufsort is released under the MIT/X11 license. See the file
+COPYING for more details.
+
+
+APIs:
+-----
+
+ * Data types
+ typedef int32_t saint_t;
+ typedef int32_t saidx_t;
+ typedef uint8_t sauchar_t;
+
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n] The output array or suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ saint_t
+ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ saidx_t
+ divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ saint_t
+ bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
+ saidx_t n, saidx_t *idx);
+
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ saint_t
+ inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
+ saidx_t *A, saidx_t n, saidx_t idx);
+
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ saint_t
+ sufcheck(const sauchar_t *T, const saidx_t *SA,
+ saidx_t n, saint_t verbose);
+
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ saidx_t
+ sa_search(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t *idx);
+
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ saidx_t
+ sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+ const saidx_t *SA, saidx_t SAsize,
+ saint_t c, saidx_t *idx);
+
+ * Returns the version string of libdivsufsort.
+ * @return the version string.
+ const char *
+ divsufsort_version(void);
+
+
+Benchmark results:
+------------------
+
+http://homepage3.nifty.com/wpage/benchmark/index.html
+
+
+Algorithm:
+----------
+
+libdivsufsort uses the following algorithms for suffix sorting.
+ - The improved version of Itho-Tanaka two-stage sorting algorithm. [2][6]
+ - A substring sorting/encoding technique. [1][3]
+ - Maniscalco's tandem repeat sorting algorithm. [5]
+ - Larsson-Sadakane sorting algorithm. [4]
+
+
+References:
+-----------
+
+ 1. Stefan Burkhardt and Juha K"arkk"ainen. Fast lightweight suffix
+ array construction and checking. Proceedings of the 14th Annual
+ Symposium on Combinatorial Pattern Matching, LNCS 2676,
+ Springer, pp. 55-69, 2003.
+
+ 2. Hideo Itoh and Hozumi Tanaka, An Efficient Method for in Memory
+ Construction of Suffix Arrays, Proceedings of the IEEE String
+ Processing and Information Retrieval Symposium, pp. 81-88, 1999.
+
+ 3. Pang Ko and Srinivas Aluru, Space-efficient linear time
+ construction of suffix arrays, Proceedings of the 14th Annual
+ Symposium on Combinatorial Pattern Matching, pp. 200-210, 2003.
+
+ 4. Jesper Larsson and Kunihiko Sadakane, Faster suffix sorting.
+ Technical report LU-CS-TR:99-214, Department of Computer
+ Science, Lund University, Sweden, 1999.
+
+ 5. Michael Maniscalco, MSufSort.
+ http://www.michael-maniscalco.com/msufsort.htm
+
+ 6. Yuta Mori, Short description of improved two-stage suffix sorting
+ algorithm, 2005.
+ http://homepage3.nifty.com/wpage/software/itssort.txt
diff --git a/configure.ac b/configure.ac
new file mode 100644
index 0000000..ed090e5
--- /dev/null
+++ b/configure.ac
@@ -0,0 +1,72 @@
+# configure.ac for libdivsufsort
+
+AC_PREREQ(2.59)
+
+AC_INIT([libdivsufsort], [1.2.3], [yiv01157@nifty.com])
+AC_CONFIG_SRCDIR([include/divsufsort.h.in])
+AC_CONFIG_HEADER([include/config.h])
+AC_CONFIG_AUX_DIR([config])
+AM_INIT_AUTOMAKE([-Wall -Werror foreign 1.9 no-define dist-bzip2])
+
+# Library code modified: REVISION++
+# Interfaces changed/added/removed: CURRENT++ REVISION=0
+# Interfaces added: AGE++
+# Interfaces removed: AGE=0
+AC_SUBST(LT_CURRENT, 3)
+AC_SUBST(LT_REVISION, 2)
+AC_SUBST(LT_AGE, 1)
+
+# Checks for programs.
+AC_PROG_CC
+AC_PROG_INSTALL
+AC_PROG_MAKE_SET
+
+# Check for host type.
+AC_CANONICAL_HOST
+
+# Checks for compiler output filename suffixes.
+AC_OBJEXT
+AC_EXEEXT
+
+# Check for build configuration.
+AM_DISABLE_STATIC
+AC_LIBTOOL_WIN32_DLL
+AC_PROG_LIBTOOL
+case "$host_os" in
+ cygwin*)
+ AC_ARG_ENABLE(cygwin, AC_HELP_STRING([--disable-cygwin], [Don't use the CygWin libraries]))
+ if test "$enable_cygwin" != "no"; then
+ A_CFLAGS=''
+ else
+ A_CFLAGS='-mno-cygwin'
+ fi
+ LT_NOUNDEF='-no-undefined'
+ ;;
+ *) LT_NOUNDEF='' A_CFLAGS='' ;;
+esac
+AC_SUBST([LT_NOUNDEF])
+AC_SUBST([A_CFLAGS])
+
+# Checks for libraries.
+
+# Checks for header files.
+AC_HEADER_STDC
+AC_CHECK_HEADERS([inttypes.h memory.h stddef.h stdint.h stdlib.h string.h strings.h])
+AC_CHECK_HEADER([inttypes.h], [AC_SUBST([_HAVE_INTTYPES_H_], [1])], [AC_SUBST([_HAVE_INTTYPES_H_], [0])])
+AC_CHECK_HEADER([stdint.h], [AC_SUBST([_HAVE_STDINT_H_], [1])], [AC_SUBST([_HAVE_STDINT_H_], [0])])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_C_CONST
+AC_C_INLINE
+
+# Checks for library functions.
+AC_FUNC_MALLOC
+AC_FUNC_STAT
+
+AC_CONFIG_FILES([Makefile
+ include/Makefile
+ include/divsufsort.h
+ include/divsufsort_private.h
+ lib/Makefile
+ examples/Makefile])
+AC_OUTPUT
diff --git a/examples/Makefile.am b/examples/Makefile.am
new file mode 100644
index 0000000..f098500
--- /dev/null
+++ b/examples/Makefile.am
@@ -0,0 +1,6 @@
+# Makefile.am for libdivsufsort
+
+noinst_PROGRAMS = suftest mksary sasearch bwt unbwt
+LDADD = $(top_builddir)/lib/libdivsufsort.la
+
+AM_CFLAGS = @A_CFLAGS@
diff --git a/examples/bwt.c b/examples/bwt.c
new file mode 100644
index 0000000..41591f8
--- /dev/null
+++ b/examples/bwt.c
@@ -0,0 +1,152 @@
+/*
+ * bwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <divsufsort.h>
+#include <stdio.h>
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#include <time.h>
+
+
+static
+saidx_t
+_str2size(const char *str) {
+ saidx_t s[3];
+ saidx_t t;
+ int i, c;
+ for(i = 0, t = s[0] = s[1] = s[2] = 0; (c = str[i]) != '\0'; ++i) {
+ if(('0' <= c) && (c <= '9')) {
+ t = (t * 10) + (c - '0');
+ } else {
+ switch(c) {
+ case 'm':
+ case 'M':
+ s[0] += t << 20;
+ break;
+ case 'k':
+ case 'K':
+ s[1] += t << 10;
+ break;
+ case 'b':
+ case 'B':
+ s[2] += t;
+ break;
+ }
+ t = 0;
+ }
+ }
+ return s[0] + s[1] + s[2];
+}
+
+int
+main(int argc, const char *argv[]) {
+ sauchar_t *T;
+ saidx_t *SA;
+ saidx_t m, n, blocksize, idx;
+ clock_t start,finish;
+
+ /* Check argument. */
+ if(((argc != 1) && (argc != 2)) ||
+ ((argc == 2) && (strcmp(argv[1], "-h") == 0)) ||
+ ((argc == 2) && (strcmp(argv[1], "--help") == 0))) {
+ fprintf(stderr,
+ "bwt, a burrows-wheeler transform program, version %s.\n"
+ , divsufsort_version());
+ fprintf(stderr,
+ "usage: %s [BLOCKSIZE] < STDIN > STDOUT\n\n"
+ , argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ blocksize = (argc == 2) ? _str2size(argv[1]) : 0;
+ if(blocksize <= 0) {
+ fseek(stdin, 0, SEEK_END);
+ blocksize = ftell(stdin);
+ if(blocksize < 0) { blocksize = BUFSIZ; }
+ rewind(stdin);
+ }
+
+ /* Allocate blocksize+4*(blocksize+1) bytes of memory. */
+ if(((T = malloc(blocksize * sizeof(sauchar_t))) == NULL) ||
+ ((SA = malloc((blocksize + 1) * sizeof(saidx_t))) == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Write the blocksize. */
+ if(fwrite(&blocksize, sizeof(saidx_t), 1, stdout) != 1) {
+ fprintf(stderr, "%s: Cannot write to `stdout': ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ fprintf(stderr, " BWT (blocksize %d) ... ", (int)blocksize);
+ start=clock();
+ for(n = 0; 0 < (m = fread(T, sizeof(sauchar_t), blocksize, stdin)); n += m) {
+ /* Burrows-Wheeler Transform. */
+ idx = divbwt(T, T, SA, m);
+ if(idx < 0) {
+ fprintf(stderr, "%s (bw_transform): %s.\n",
+ argv[0],
+ (idx == -1) ? "Invalid arguments" : "Cannot allocate memory");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Write the bwted data. */
+ if((fwrite(&idx, sizeof(saidx_t), 1, stdout) != 1) ||
+ (fwrite(T, sizeof(sauchar_t), m, stdout) != m)) {
+ fprintf(stderr, "%s: Cannot write to `stdout': ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ }
+ if(ferror(stdin)) {
+ fprintf(stderr, "%s: Cannot read from `stdin': ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ finish = clock();
+ fprintf(stderr, "%d bytes: %.4f sec\n",
+ (int)n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Deallocate memory. */
+ free(T);
+ free(SA);
+
+ return 0;
+}
diff --git a/examples/mksary.c b/examples/mksary.c
new file mode 100644
index 0000000..a548582
--- /dev/null
+++ b/examples/mksary.c
@@ -0,0 +1,132 @@
+/*
+ * mksary.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <divsufsort.h>
+#include <stdio.h>
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#include <time.h>
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+
+
+int
+main(int argc, const char *argv[]) {
+ FILE *ifp, *ofp;
+ sauchar_t *T;
+ saidx_t *SA;
+ saidx_t n;
+ clock_t start, finish;
+#if HAVE_SYS_STAT_H
+ struct stat s;
+#endif
+
+ /* Check argument. */
+ if(argc != 3) {
+ fprintf(stderr,
+ "mksary, a simple suffix array builder, version %s.\n"
+ , divsufsort_version());
+ fprintf(stderr,
+ "usage: %s srcFILE dstSA\n\n"
+ , argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Get a file's status information. */
+#if HAVE_SYS_STAT_H
+ if(stat(argv[1], &s) != 0) {
+ fprintf(stderr, "%s: Cannot stat file `%s': ", argv[0], argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ n = s.st_size;
+#endif
+
+ /* Open a file for reading. */
+ if((ifp = fopen(argv[1], "rb")) == NULL) {
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Open a file for writing. */
+ if((ofp = fopen(argv[2], "wb")) == NULL) {
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[2]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+#if !HAVE_SYS_STAT_H
+ fseek(ifp, 0, SEEK_END);
+ n = ftell(ifp);
+ rewind(ifp);
+#endif
+
+ /* Allocate n+4(n+1) bytes of memory. */
+ if(((T = malloc(n * sizeof(sauchar_t))) == NULL) ||
+ ((SA = malloc((n + 1) * sizeof(saidx_t))) == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n bytes of data. */
+ if(fread(T, sizeof(sauchar_t), n, ifp) != n) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(ifp) || !feof(ifp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(ifp);
+
+ /* Construct the suffix array. */
+ fprintf(stderr, "%s: %d bytes ... ", argv[1], (int)n);
+ start = clock();
+ divsufsort(T, SA, n);
+ finish = clock();
+ fprintf(stderr, "%.4f sec\n",
+ (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Write the suffix array. */
+ if(fwrite(SA, sizeof(saidx_t), n + 1, ofp) != (n + 1)) {
+ fprintf(stderr, "%s: Cannot write to `%s': ", argv[0], argv[2]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(ofp);
+
+ /* Deallocate memory. */
+ free(SA);
+ free(T);
+
+ return 0;
+}
diff --git a/examples/sasearch.c b/examples/sasearch.c
new file mode 100644
index 0000000..63a3932
--- /dev/null
+++ b/examples/sasearch.c
@@ -0,0 +1,402 @@
+/*
+ * sasearch.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <divsufsort.h>
+#include <stdio.h>
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#include <string.h>
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+
+
+#define SA_SORT_LEXICOGRAPHICALORDER (1 << 0)
+#define SA_PRINT_OFFSET (1 << 1)
+#define SA_PRINT_FILENAME (1 << 2)
+#define SA_HEX_MODE (1 << 3)
+
+typedef struct _searchoption_t searchoption_t;
+struct _searchoption_t {
+ const char *fname;
+ saidx_t maxcount;
+ saidx_t blen, alen;
+ unsigned int flags;
+ void(*func)(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t left, saidx_t size, searchoption_t *opt);
+};
+
+static
+void
+_print_suffix(const sauchar_t *T, saidx_t Tsize, saidx_t Psize, saidx_t pos,
+ const searchoption_t *option) {
+ saidx_t i;
+ saidx_t a, b, c, d;
+
+ a = (option->blen < pos) ? pos - option->blen : 0;
+ b = pos;
+ c = pos + Psize;
+ d = ((c + option->alen) < Tsize) ? c + option->alen : Tsize;
+
+ if(option->flags & SA_PRINT_FILENAME) { printf("%s:", option->fname); }
+ if(option->flags & SA_PRINT_OFFSET) { printf("%d:", a); }
+
+ if(option->flags & SA_HEX_MODE) {
+ for(i = a; i < (d - 1); ++i) {
+ printf("%02x ", T[i]);
+ }
+ printf("%02x\n", T[d - 1]);
+ } else {
+ for(i = a; i < d; ++i) {
+ switch(T[i]) {
+ case '\n': printf("[\\n]"); break;
+ case '\r': printf("[\\r]"); break;
+ default: printf("%c", T[i]);
+ }
+ }
+ printf("\n");
+ }
+}
+
+static
+void
+_onlyprint_count(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t left, saidx_t size, searchoption_t *option) {
+ if(0 < size) {
+ if(option->flags & SA_PRINT_FILENAME) { printf("%s:", option->fname); }
+ printf("%d\n", size);
+ }
+}
+
+static
+int
+_idx_cmp(const void *p1, const void *p2) {
+ saidx_t i1 = *((saidx_t *)p1), i2 = *((saidx_t *)p2);
+ if(i1 < i2) { return -1; }
+ if(i1 > i2) { return 1; }
+ return 0;
+/* return i1 - i2; */
+}
+
+static
+void
+_print_suffixes(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t left, saidx_t size, searchoption_t *option) {
+ saidx_t *ary;
+ saidx_t i;
+
+ if(option->flags & SA_SORT_LEXICOGRAPHICALORDER) {
+ for(i = 0; i < size; ++i) {
+ _print_suffix(T, Tsize, Psize, SA[left + i], option);
+ }
+ } else {
+ ary = malloc(size * sizeof(saidx_t));
+ memcpy(ary, SA + left, size * sizeof(saidx_t));
+ qsort(ary, size, sizeof(saidx_t), _idx_cmp);
+ for(i = 0; i < size; ++i) {
+ _print_suffix(T, Tsize, Psize, ary[i], option);
+ }
+ free(ary);
+ }
+}
+
+static
+void
+_search_file(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ searchoption_t *option) {
+ saidx_t size, left;
+ size = sa_search(T, Tsize, P, Psize, SA, SAsize, &left);
+ if(0 <= option->maxcount) {
+ if(option->maxcount == 0) { return; }
+ if(option->maxcount < size) { size = option->maxcount; }
+ }
+ option->func(T, Tsize, P, Psize, SA, SAsize, left, size, option);
+}
+
+static
+void
+_print_version(const char *pname) {
+ fprintf(stderr,
+"%s, a SA-based full-text search tool, version %s\n\n",
+ pname, divsufsort_version());
+}
+
+static
+void
+_print_usage(const char *pname, int status) {
+ _print_version(pname);
+ fprintf(stderr,
+"usage: %s [OPTION]... PATTERN FILE SAFILE\n"
+"\nOutput control:\n"
+" -m NUM stop after NUM matches\n"
+" -b print the byte offset\n"
+" -H print the filename\n"
+" -c only print a count of matches\n"
+" -S sort in lexicographical order\n"
+"\nContext control:\n"
+" -B NUM print NUM characters of leading context\n"
+" -A NUM print NUM characters of trailing context\n"
+" -C NUM print NUM characters of output context\n"
+"\nMiscellaneous:\n"
+" -h print this message\n"
+" -v display version number\n"
+"\n", pname);
+ exit(status);
+}
+
+static
+void
+_print_tryhelp(const char *pname, int status) {
+ fprintf(stderr, "Try `%s --help' for more information.\n", pname);
+ exit(status);
+}
+
+static
+void
+_print_version_and_license(const char *pname, int status) {
+ _print_version(pname);
+ fprintf(stderr,
+ " Copyright (c) 2003-2007 Yuta Mori All Rights Reserved.\n"
+ "\n"
+ " Permission is hereby granted, free of charge, to any person\n"
+ " obtaining a copy of this software and associated documentation\n"
+ " files (the \"Software\"), to deal in the Software without\n"
+ " restriction, including without limitation the rights to use,\n"
+ " copy, modify, merge, publish, distribute, sublicense, and/or sell\n"
+ " copies of the Software, and to permit persons to whom the\n"
+ " Software is furnished to do so, subject to the following\n"
+ " conditions:\n"
+ "\n"
+ " The above copyright notice and this permission notice shall be\n"
+ " included in all copies or substantial portions of the Software.\n"
+ "\n"
+ " THE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND,\n"
+ " EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES\n"
+ " OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND\n"
+ " NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT\n"
+ " HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,\n"
+ " WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING\n"
+ " FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR\n"
+ " OTHER DEALINGS IN THE SOFTWARE.\n");
+ exit(status);
+}
+
+
+int
+main(int argc, const char *argv[]) {
+ int i;
+ searchoption_t option;
+
+ if(argc <= 1) { _print_usage(argv[0], EXIT_SUCCESS); }
+
+ option.maxcount = -1;
+ option.fname = NULL;
+ option.flags = 0;
+ option.func = _print_suffixes;
+ option.alen = option.blen = 10;
+
+ for(i = 1; i < argc; ++i) {
+ if(argv[i][0] != '-') { break; }
+
+ if((strcmp(argv[i], "-h") == 0) || (strcmp(argv[i], "--help") == 0)) {
+ _print_usage(argv[0], EXIT_SUCCESS);
+ }
+ if((strcmp(argv[i], "-v") == 0) || (strcmp(argv[i], "--version") == 0)) {
+ _print_version_and_license(argv[0], EXIT_SUCCESS);
+ }
+
+ if(strcmp(argv[i],"-m")==0) {
+ if((i + 1) == argc) {
+ fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]);
+ _print_tryhelp(argv[0], EXIT_FAILURE);
+ }
+ option.maxcount = atoi(argv[++i]);
+ } else if(strcmp(argv[i],"-b")==0) {
+ option.flags |= SA_PRINT_OFFSET;
+ } else if(strcmp(argv[i],"-H")==0) {
+ option.flags |= SA_PRINT_FILENAME;
+ } else if(strcmp(argv[i],"-c")==0) {
+ option.func = _onlyprint_count;
+ } else if(strcmp(argv[i],"-S")==0) {
+ option.flags |= SA_SORT_LEXICOGRAPHICALORDER;
+ } else if(strcmp(argv[i],"-B")==0) {
+ if((i + 1) == argc) {
+ fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]);
+ _print_tryhelp(argv[0], EXIT_FAILURE);
+ }
+ option.blen = atoi(argv[++i]);
+ } else if(strcmp(argv[i],"-A")==0) {
+ if((i + 1) == argc) {
+ fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]);
+ _print_tryhelp(argv[0], EXIT_FAILURE);
+ }
+ option.alen = atoi(argv[++i]);
+ } else if(strcmp(argv[i],"-C")==0) {
+ if((i + 1) == argc) {
+ fprintf(stderr,"%s: option requires an argument -- %s\n", argv[0], argv[i]);
+ _print_tryhelp(argv[0], EXIT_FAILURE);
+ }
+ option.alen = option.blen = atoi(argv[++i]);
+
+ } else if(strcmp(argv[i],"--hex")==0) {
+ option.flags |= SA_HEX_MODE;
+
+ } else {
+ fprintf(stderr,"%s: invalid option -- %s\n", argv[0], argv[i]);
+ _print_tryhelp(argv[0], EXIT_FAILURE);
+ }
+ }
+
+ if(i == argc) { return 0; }
+
+ sauchar_t *P = (sauchar_t *)argv[i];
+ saidx_t Psize = (saidx_t)strlen(argv[i]);
+
+ if(option.flags & SA_HEX_MODE) {
+ sauchar_t *newP = malloc(Psize / 2 * sizeof(sauchar_t));
+ saidx_t j, k;
+ unsigned char c, t;
+ if(newP == NULL) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+ for(j = 0, k = 0, c = 0; j < Psize; ++j) {
+ if((('0' <= P[j]) && (P[j] <= '9')) || (('a' <= P[j]) && (P[j] <= 'f'))) {
+ t = (('0' <= P[j]) && (P[j] <= '9')) ? P[j] - '0' : (P[j] - 'a' + 10);
+ c = (c << 4) | t;
+ if(k & 1) { newP[k / 2] = c; }
+ k += 1;
+ }
+ }
+ Psize = k / 2;
+ P = newP;
+ }
+
+ for(i += 1; (i + 1) < argc; i += 2) {
+ FILE *fp;
+ sauchar_t *T;
+ saidx_t *SA;
+ saidx_t size;
+
+#if HAVE_SYS_STAT_H
+ struct stat s;
+ if(stat(argv[i], &s) != 0) {
+ fprintf(stderr, "%s: Cannot stat file `%s': ", argv[0], argv[i]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ size = s.st_size;
+#endif
+
+ option.fname = argv[i];
+ /* Open a file for reading. */
+ if((fp = fopen(argv[i], "rb")) == NULL) {
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[i]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+#if !HAVE_SYS_STAT_H
+ if(fseek(fp, 0, SEEK_END) != 0) {
+ fprintf(stderr, "%s: Cannot fseek on `%s': ", argv[0], argv[i]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ if((size = ftell(fp)) == -1) {
+ fprintf(stderr, "%s: Cannot ftell on `%s': ", argv[0], argv[i]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ rewind(fp);
+#endif
+
+ /* Allocate n+4(n+1) bytes of memory. */
+ if(((T = malloc(size * sizeof(sauchar_t))) == NULL) ||
+ ((SA = malloc((size + 1) * sizeof(saidx_t))) == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n * sizeof(sauchar_t) bytes of data. */
+ if(fread(T, sizeof(sauchar_t), size, fp) != size) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[i]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(fp);
+
+ /* Open a SA file for reading. */
+ if((fp = fopen(argv[i + 1], "rb")) == NULL) {
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[i + 1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ /* Read (n + 1) * sizeof(saidx_t) bytes of data. */
+ if(fread(SA, sizeof(saidx_t), size + 1, fp) != (size + 1)) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[i + 1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(fp);
+
+ _search_file(T, size, P, Psize, SA, size + 1, &option);
+
+ free(T);
+ free(SA);
+ }
+
+ if(option.flags & SA_HEX_MODE) {
+ free(P);
+ }
+
+ return 0;
+}
diff --git a/examples/suftest.c b/examples/suftest.c
new file mode 100644
index 0000000..9d9e2d4
--- /dev/null
+++ b/examples/suftest.c
@@ -0,0 +1,133 @@
+/*
+ * suftest.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <divsufsort.h>
+#include <stdio.h>
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#include <time.h>
+#if HAVE_SYS_STAT_H
+# include <sys/stat.h>
+#endif
+
+
+int
+main(int argc, const char *argv[]) {
+ FILE *fp;
+ sauchar_t *T;
+ saidx_t *SA;
+ saidx_t n;
+ clock_t start, finish;
+#if HAVE_SYS_STAT_H
+ struct stat s;
+#endif
+
+ /* Check argument. */
+ if((argc != 2) ||
+ (strcmp(argv[1], "-h") == 0) ||
+ (strcmp(argv[1], "--help") == 0)) {
+ fprintf(stderr,
+ "suftest, a suffixsort tester, version %s.\n"
+ , divsufsort_version());
+ fprintf(stderr,
+ "usage: %s FILE\n\n"
+ , argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Get a file's status information. */
+#if HAVE_SYS_STAT_H
+ if(stat(argv[1], &s) != 0) {
+ fprintf(stderr, "%s: Cannot stat file `%s': ", argv[0], argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ n = s.st_size;
+#endif
+
+ /* Open a file for reading. */
+ if((fp = fopen(argv[1], "rb")) == NULL) {
+ fprintf(stderr, "%s: Cannot open file `%s': ", argv[0], argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+#if !HAVE_SYS_STAT_H
+ fseek(fp, 0, SEEK_END);
+ n = ftell(fp);
+ rewind(fp);
+#endif
+
+ /* Allocate n+4(n+1) bytes of memory. */
+ if(((T = malloc(n * sizeof(sauchar_t))) == NULL) ||
+ ((SA = malloc((n + 1) * sizeof(saidx_t))) == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Read n bytes of data. */
+ if(fread(T, sizeof(sauchar_t), n, fp) != n) {
+ fprintf(stderr, "%s: %s `%s': ",
+ argv[0],
+ (ferror(fp) || !feof(fp)) ? "Cannot read from" : "Unexpected EOF in",
+ argv[1]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ fclose(fp);
+
+ /* Construct the suffix array. */
+ fprintf(stderr, "%s: %d bytes ... ", argv[1], (int)n);
+ start = clock();
+ divsufsort(T, SA, n);
+ finish = clock();
+ fprintf(stderr, "%.4f sec\n",
+ (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Check the suffix array. */
+ if(sufcheck(T, SA, n, 3) != 0) {
+ exit(EXIT_FAILURE);
+ }
+
+ /* Deallocate memory. */
+ free(SA);
+ free(T);
+
+ return 0;
+}
diff --git a/examples/unbwt.c b/examples/unbwt.c
new file mode 100644
index 0000000..e989053
--- /dev/null
+++ b/examples/unbwt.c
@@ -0,0 +1,121 @@
+/*
+ * unbwt.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <divsufsort.h>
+#include <stdio.h>
+#if HAVE_STDLIB_H
+# include <stdlib.h>
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#include <time.h>
+
+
+int
+main(int argc, const char *argv[]) {
+ sauchar_t *T;
+ saidx_t *A, m, n, blocksize, idx;
+ saint_t err;
+ clock_t start, finish;
+
+ /* Check argument. */
+ if(argc != 1) {
+ fprintf(stderr,
+ "unbwt, an inverse burrows-wheeler transform program, version %s.\n"
+ , divsufsort_version());
+ fprintf(stderr,
+ "usage: %s < STDIN > STDOUT\n\n"
+ , argv[0]);
+ return 0;
+ }
+
+ /* Read the blocksize from stdin. */
+ if(fread(&blocksize, sizeof(saidx_t), 1, stdin) != 1) {
+ fprintf(stderr, "%s: %s `stdin': ",
+ argv[0],
+ (ferror(stdin) || !feof(stdin)) ?
+ "Cannot read from" : "Unexpected EOF in");
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Allocate blocksize+4(blocksize+1) bytes of memory. */
+ if(((T = malloc(blocksize * sizeof(sauchar_t))) == NULL) ||
+ ((A = malloc((blocksize + 1) * sizeof(saidx_t))) == NULL)) {
+ fprintf(stderr, "%s: Cannot allocate memory.\n", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ fprintf(stderr, "UnBWT (blocksize %d) ... ", (int)blocksize);
+ start = clock();
+ for(n = 0; fread(&idx,sizeof(saidx_t),1,stdin)!=0; n += m) {
+ /* Read blocksize bytes of data. */
+ if((m = fread(T, sizeof(sauchar_t), blocksize, stdin)) == 0) {
+ fprintf(stderr, "%s: Unexpected EOF in `stdin': ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+
+ /* Inverse Burrows-Wheeler Transform. */
+ if((err = inverse_bw_transform(T, T, A, m, idx)) != 0) {
+ fprintf(stderr, "%s (inverse_bw_transform): %s.\n",
+ argv[0],
+ (err == -1) ? "Invalid arguments" : "Cannot allocate memory");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Write m bytes of data. */
+ if(fwrite(T, sizeof(sauchar_t), m, stdout) != m) {
+ fprintf(stderr, "%s: Cannot write to `stdout': ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ }
+ if(ferror(stdin)) {
+ fprintf(stderr, "%s: Cannot read from `stdin': ", argv[0]);
+ perror(NULL);
+ exit(EXIT_FAILURE);
+ }
+ finish = clock();
+ fprintf(stderr, "%d bytes: %.4f sec\n",
+ (int)n, (double)(finish - start) / (double)CLOCKS_PER_SEC);
+
+ /* Deallocate memory. */
+ free(T);
+ free(A);
+
+ return 0;
+}
diff --git a/include/Makefile.am b/include/Makefile.am
new file mode 100644
index 0000000..3bd3b45
--- /dev/null
+++ b/include/Makefile.am
@@ -0,0 +1,4 @@
+# Makefile.am for libdivsufsort
+
+nodist_include_HEADERS = divsufsort.h
+EXTRA_DIST = divsufsort_private.h.in divsufsort.h.in
diff --git a/include/divsufsort.h.in b/include/divsufsort.h.in
new file mode 100644
index 0000000..742880c
--- /dev/null
+++ b/include/divsufsort.h.in
@@ -0,0 +1,132 @@
+/*
+ * divsufsort.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_H_
+#define _DIVSUFSORT_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#if @_HAVE_INTTYPES_H_@ /* _HAVE_INTTYPES_H_ */
+# include <inttypes.h>
+#else
+# if @_HAVE_STDINT_H_@ /* _HAVE_STDINT_H_ */
+# include <stdint.h>
+# else
+
+#ifndef _UINT8_T
+#define _UINT8_T
+typedef unsigned char uint8_t;
+#endif /* _UINT8_T */
+#ifndef _INT32_T
+#define _INT32_T
+typedef int int32_t;
+#endif /* _INT32_T */
+
+# endif
+#endif
+
+/*- Datatypes -*/
+typedef int32_t saint_t;
+typedef int32_t saidx_t;
+typedef uint8_t sauchar_t;
+
+
+/*- Prototypes -*/
+
+/**
+ * Constructs the suffix array of a given string.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n] The output array of suffixes.
+ * @param n The length of the given string.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
+
+/**
+ * Constructs the burrows-wheeler transformed string of a given string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @return The primary index if no error occurred, -1 or -2 otherwise.
+ */
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n);
+
+/**
+ * Returns the version of the divsufsort library.
+ * @return The version number string.
+ */
+const char *
+divsufsort_version(void);
+
+
+/* Burrows-Wheeler transform. */
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U,
+ saidx_t *SA /* can NULL */,
+ saidx_t n, saidx_t *idx);
+
+/* Inverse Burrows-Wheeler transform. */
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U,
+ saidx_t *A /* can NULL */,
+ saidx_t n, saidx_t idx);
+
+/**
+ * Checks the correctness of a given suffix array.
+ * @param T[0..n-1] The input string.
+ * @param SA[0..n] The input suffix array.
+ * @param n The length of the given string.
+ * @param verbose The verbose mode.
+ * @return 0 if no error occurred.
+ */
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA, saidx_t n, saint_t verbose);
+
+
+/* Search for the pattern P in the string T. */
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t *left);
+
+/* Search for the character c in the string T. */
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+ const saidx_t *SA, saidx_t SAsize,
+ saint_t c, saidx_t *left);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_H_ */
diff --git a/include/divsufsort_private.h.in b/include/divsufsort_private.h.in
new file mode 100644
index 0000000..f0d5e02
--- /dev/null
+++ b/include/divsufsort_private.h.in
@@ -0,0 +1,162 @@
+/*
+ * divsufsort_private.h for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef _DIVSUFSORT_PRIVATE_H_
+#define _DIVSUFSORT_PRIVATE_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif /* __cplusplus */
+
+#ifdef HAVE_CONFIG_H
+# include "config.h"
+#endif
+#include <assert.h>
+#include <stdio.h>
+#if STDC_HEADERS
+# include <stdlib.h>
+# include <stddef.h>
+#else
+# if HAVE_STDLIB_H
+# include <stdlib.h>
+# endif
+#endif
+#if HAVE_STRING_H
+# if !STDC_HEADERS && HAVE_MEMORY_H
+# include <memory.h>
+# endif
+# include <string.h>
+#endif
+#if HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#if HAVE_INTTYPES_H
+# include <inttypes.h>
+#else
+# if HAVE_STDINT_H
+# include <stdint.h>
+# endif
+#endif
+#include "divsufsort.h"
+
+
+/*- Constants -*/
+#if !defined(UINT8_MAX)
+# define UINT8_MAX (255)
+#endif /* UINT8_MAX */
+#if defined(ALPHABET_SIZE) && (ALPHABET_SIZE < 1)
+# undef ALPHABET_SIZE
+#endif
+#if !defined(ALPHABET_SIZE)
+# define ALPHABET_SIZE (UINT8_MAX + 1)
+#endif
+#if defined(STACK_SIZE) && (STACK_SIZE < 32)
+# undef STACK_SIZE
+#endif
+#if !defined(STACK_SIZE)
+# define STACK_SIZE (64)
+#endif
+#if defined(LOCALMERGE_BUFFERSIZE) && (LOCALMERGE_BUFFERSIZE < 32)
+# undef LOCALMERGE_BUFFERSIZE
+#endif
+#if !defined(LOCALMERGE_BUFFERSIZE)
+# define LOCALMERGE_BUFFERSIZE (256)
+#endif
+/* for divsufsort.c */
+#define BUCKET_A_SIZE (ALPHABET_SIZE)
+#define BUCKET_B_SIZE (ALPHABET_SIZE * ALPHABET_SIZE)
+/* for sssort.c */
+#define SS_INSERTIONSORT_THRESHOLD (8)
+#define SS_BLOCKSIZE (1024)
+/* for trsort.c */
+#define LS_INSERTIONSORT_THRESHOLD (8)
+#define TR_INSERTIONSORT_THRESHOLD (8)
+
+
+/*- Macros -*/
+#ifndef SWAP
+# define SWAP(a,b) do { t = (a); (a) = (b); (b) = t; } while(0)
+#endif /* SWAP */
+#ifndef MIN
+# define MIN(a,b) (((a) < (b)) ? (a) : (b))
+#endif /* MIN */
+#ifndef MAX
+# define MAX(a,b) (((a) > (b)) ? (a) : (b))
+#endif /* MAX */
+#define STACK_PUSH3(_a, _b, _c)\
+ do {\
+ assert(ssize < STACK_SIZE);\
+ stack[ssize].a = (_a), stack[ssize].b = (_b),\
+ stack[ssize++].c = (_c);\
+ } while(0)
+#define STACK_PUSH(_a, _b, _c, _d)\
+ do {\
+ assert(ssize < STACK_SIZE);\
+ stack[ssize].a = (_a), stack[ssize].b = (_b),\
+ stack[ssize].c = (_c), stack[ssize++].d = (_d);\
+ } while(0)
+#define STACK_POP3(_a, _b, _c)\
+ do {\
+ assert(0 <= ssize);\
+ if(ssize == 0) { return; }\
+ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+ (_c) = stack[ssize].c;\
+ } while(0)
+#define STACK_POP(_a, _b, _c, _d)\
+ do {\
+ assert(0 <= ssize);\
+ if(ssize == 0) { return; }\
+ (_a) = stack[--ssize].a, (_b) = stack[ssize].b,\
+ (_c) = stack[ssize].c, (_d) = stack[ssize].d;\
+ } while(0)
+/* for divsufsort.c */
+#define BUCKET_A(c0) bucket_A[(c0)]
+#if ALPHABET_SIZE == 256
+#define BUCKET_B(c0, c1) (bucket_B[((c1) << 8) | (c0)])
+#define BUCKET_BSTAR(c0, c1) (bucket_B[((c0) << 8) | (c1)])
+#else
+#define BUCKET_B(c0, c1) (bucket_B[(c1) * ALPHABET_SIZE + (c0)])
+#define BUCKET_BSTAR(c0, c1) (bucket_B[(c0) * ALPHABET_SIZE + (c1)])
+#endif
+
+
+/*- Private Prototypes -*/
+/* substringsort.c */
+void
+substringsort(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *first, saidx_t *last,
+ saidx_t *buf, saidx_t bufsize,
+ saidx_t depth, saint_t lastsuffix);
+/* trsort.c */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth);
+
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif /* __cplusplus */
+
+#endif /* _DIVSUFSORT_PRIVATE_H_ */
diff --git a/lib/Makefile.am b/lib/Makefile.am
new file mode 100644
index 0000000..cf34c43
--- /dev/null
+++ b/lib/Makefile.am
@@ -0,0 +1,9 @@
+# Makefile.am for libdivsufsort
+
+lib_LTLIBRARIES = libdivsufsort.la
+libdivsufsort_la_SOURCES = divsufsort.c substringsort.c trsort.c utils.c
+libdivsufsort_la_LDFLAGS = -version-info @LT_CURRENT@:@LT_REVISION@:@LT_AGE@ @LT_NOUNDEF@ -export-symbols libdivsufsort.sym
+
+AM_CFLAGS = @A_CFLAGS@
+
+EXTRA_DIST = libdivsufsort.sym
diff --git a/lib/divsufsort.c b/lib/divsufsort.c
new file mode 100644
index 0000000..8615246
--- /dev/null
+++ b/lib/divsufsort.c
@@ -0,0 +1,362 @@
+/*
+ * divsufsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+/* Sorts suffixes of type B*. */
+static
+saidx_t
+_sort_typeBstar(const sauchar_t *T, saidx_t *SA,
+ saidx_t *bucket_A, saidx_t *bucket_B,
+ saidx_t n) {
+ saidx_t *PAb, *ISAb, *buf;
+ saidx_t i, j, k, t, m, bufsize;
+ saint_t c0, c1;
+
+ /* Initialize bucket arrays. */
+ for(i = 0; i < BUCKET_A_SIZE; ++i) { bucket_A[i] = 0; }
+ for(i = 0; i < BUCKET_B_SIZE; ++i) { bucket_B[i] = 0; }
+
+ /* Count the number of occurrences of the first one or two characters of each
+ type A, B and B* suffix. Moreover, store the beginning position of all
+ type B* suffixes into the array SA. */
+ for(i = n - 1, m = n; 0 <= i;) {
+ /* type A suffix. */
+ do { ++BUCKET_A(T[i]); } while((0 <= --i) && (T[i] >= T[i + 1]));
+ if(0 <= i) {
+ /* type B* suffix. */
+ ++BUCKET_BSTAR(T[i], T[i + 1]);
+ SA[--m] = i;
+ /* type B suffix. */
+ for(--i; (0 <= i) && (T[i] <= T[i + 1]); --i) {
+ ++BUCKET_B(T[i], T[i + 1]);
+ }
+ }
+ }
+ m = n - m;
+/*
+note:
+ A type B* suffix is lexicographically smaller than a type B suffix that
+ begins with the same first two characters.
+*/
+
+ /* Calculate the index of start/end point of each bucket. */
+ for(c0 = 0, i = 0, j = 0; c0 < ALPHABET_SIZE; ++c0) {
+ t = i + BUCKET_A(c0);
+ BUCKET_A(c0) = i + j; /* start point */
+ i = t + BUCKET_B(c0, c0);
+ for(c1 = c0 + 1; c1 < ALPHABET_SIZE; ++c1) {
+ j += BUCKET_BSTAR(c0, c1);
+ BUCKET_BSTAR(c0, c1) = j; /* end point */
+ i += BUCKET_B(c0, c1);
+ }
+ }
+
+ if(0 < m) {
+ /* Sort the type B* suffixes by their first two characters. */
+ PAb = SA + n - m; ISAb = SA + m;
+ PAb[m] = n - 2; /* for sentinel. */
+ for(i = m - 2; 0 <= i; --i) {
+ t = PAb[i], c0 = T[t], c1 = T[t + 1];
+ SA[--BUCKET_BSTAR(c0, c1)] = i;
+ }
+ t = PAb[m - 1], c0 = T[t], c1 = T[t + 1];
+ SA[--BUCKET_BSTAR(c0, c1)] = m - 1;
+
+ /* Sort the type B* substrings using sssort. */
+ buf = SA + m, bufsize = n - (2 * m);
+ if(bufsize <= LOCALMERGE_BUFFERSIZE) {
+ if((buf = malloc(LOCALMERGE_BUFFERSIZE * sizeof(saidx_t))) == NULL) {
+ return -1;
+ }
+ bufsize = LOCALMERGE_BUFFERSIZE;
+ }
+ for(c0 = ALPHABET_SIZE - 1, j = m; 0 < j; --c0) {
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; j = i, --c1) {
+ i = BUCKET_BSTAR(c0, c1);
+ if(1 < (j - i)) {
+ substringsort(T, PAb, SA + i, SA + j,
+ buf, bufsize, 2, *(SA + i) == (m - 1));
+ }
+ }
+ }
+ if(bufsize == LOCALMERGE_BUFFERSIZE) { free(buf); }
+
+ /* Compute ranks of type B* substrings. */
+ for(i = m - 1; 0 <= i; --i) {
+ if(0 <= SA[i]) {
+ j = i;
+ do { ISAb[SA[i]] = i; } while((0 <= --i) && (0 <= SA[i]));
+ SA[i + 1] = i - j;
+ if(i <= 0) { break; }
+ }
+ j = i;
+ do { ISAb[SA[i] = ~SA[i]] = j; } while(SA[--i] < 0);
+ ISAb[SA[i]] = j;
+ }
+
+ /* Construct the inverse suffix array of type B* suffixes using trsort. */
+ trsort(ISAb, SA, m, 1);
+
+ /* Set the sorted order of tyoe B* suffixes. */
+ for(i = n - 1, j = m; 0 <= i;) {
+ for(--i; (0 <= i) && (T[i] >= T[i + 1]); --i) { }
+ if(0 <= i) {
+ SA[ISAb[--j]] = i;
+ for(--i; (0 <= i) && (T[i] <= T[i + 1]); --i) { }
+ }
+ }
+
+ /* Calculate the index of start/end point of each bucket. */
+ for(c0 = ALPHABET_SIZE - 1, i = n, k = m - 1; 0 <= c0; --c0) {
+ for(c1 = ALPHABET_SIZE - 1; c0 < c1; --c1) {
+ t = i - BUCKET_B(c0, c1);
+ BUCKET_B(c0, c1) = i + 1; /* end point */
+
+ /* Move all type B* suffixes to the correct position. */
+ for(i = t, j = BUCKET_BSTAR(c0, c1);
+ j <= k;
+ --i, --k) { SA[i] = SA[k]; }
+ }
+ t = i - BUCKET_B(c0, c0);
+ BUCKET_B(c0, c0) = i + 1; /* end point */
+ if(c0 < (ALPHABET_SIZE - 1)) {
+ BUCKET_BSTAR(c0, c0 + 1) = t + 1; /* start point */
+ }
+ i = BUCKET_A(c0);
+ }
+ }
+
+ return m;
+}
+
+/* Constructs the suffix array by using the sorted order of type B* suffixes. */
+static
+void
+_construct_SA(const sauchar_t *T, saidx_t *SA,
+ saidx_t *bucket_A, saidx_t *bucket_B,
+ saidx_t n, saidx_t m) {
+ saidx_t *i, *j, *t;
+ saidx_t s;
+ saint_t c0, c1, c2;
+
+ /** An implementation version of MSufSort3's second stage. **/
+
+ if(0 < m) {
+ /* Construct the sorted order of type B suffixes by using
+ the sorted order of type B* suffixes. */
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+ /* Scan the suffix array from right to left. */
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+ j = SA + BUCKET_A(c1 + 1), t = NULL, c2 = -1;
+ i <= j;
+ --j) {
+ if(0 <= (s = *j)) {
+ if((0 <= --s) && ((c0 = T[s]) <= c1)) {
+ *j = ~(s + 1);
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+ if(c2 == c0) { *--t = s; }
+ else {
+ if(0 <= c2) { BUCKET_B(c2, c1) = t - SA; }
+ *(t = SA + BUCKET_B(c2 = c0, c1) - 1) = s;
+ }
+ }
+ } else {
+ *j = ~s;
+ }
+ }
+ }
+ }
+
+ /* Construct the suffix array by using
+ the sorted order of type B suffixes. */
+ SA[0] = n;
+ *(t = SA + BUCKET_A(c2 = T[n - 1]) + 1) = n - 1;
+ /* Scan the suffix array from left to right. */
+ for(i = SA + 1, j = SA + n; i <= j; ++i) {
+ if(0 <= (s = *i)) {
+ if((0 <= --s) && ((c0 = T[s]) >= T[s + 1])) {
+ if((0 < s) && (T[s - 1] < c0)) { s = ~s; }
+ if(c0 == c2) { *++t = s; }
+ else {
+ BUCKET_A(c2) = t - SA;
+ *(t = SA + BUCKET_A(c2 = c0) + 1) = s;
+ }
+ }
+ } else {
+ *i = ~s;
+ }
+ }
+}
+
+/* Constructs the burrows-wheeler transformed string directly
+ by using the sorted order of type B* suffixes. */
+static
+saidx_t
+_construct_BWT(const sauchar_t *T, saidx_t *SA,
+ saidx_t *bucket_A, saidx_t *bucket_B,
+ saidx_t n, saidx_t m) {
+ saidx_t *i, *j, *t, *orig;
+ saidx_t s;
+ saint_t c0, c1, c2;
+
+ /** An implementation version of MSufSort3's semidirect BWT. **/
+
+ if(0 < m) {
+ /* Construct the sorted order of type B suffixes by using
+ the sorted order of type B* suffixes. */
+ for(c1 = ALPHABET_SIZE - 2; 0 <= c1; --c1) {
+ /* Scan the suffix array from right to left. */
+ for(i = SA + BUCKET_BSTAR(c1, c1 + 1),
+ j = SA + BUCKET_A(c1 + 1), t = NULL, c2 = -1;
+ i <= j;
+ --j) {
+ if(0 <= (s = *j)) {
+ if((0 <= --s) && ((c0 = T[s]) <= c1)) {
+ *j = ~((saidx_t)c0);
+ if((0 < s) && (T[s - 1] > c0)) { s = ~s; }
+ if(c0 == c2) { *--t = s; }
+ else {
+ if(0 <= c2) { BUCKET_B(c2, c1) = t - SA; }
+ *(t = SA + BUCKET_B(c2 = c0, c1) - 1) = s;
+ }
+ }
+ } else {
+ *j = ~s;
+ }
+ }
+ }
+ }
+
+ /* Construct the BWTed string by using
+ the sorted order of type B suffixes. */
+ c0 = T[s = n - 1];
+ SA[0] = c0;
+ if(T[s - 1] < c0) { s = ~((saidx_t)T[s - 1]); }
+ *(t = SA + BUCKET_A(c2 = c0) + 1) = s;
+ /* Scan the suffix array from left to right. */
+ for(i = SA + 1, j = SA + n, orig = SA - 1; i <= j; ++i) {
+ if(0 <= (s = *i)) {
+ if((0 <= --s) && ((c0 = T[s]) >= T[s + 1])) {
+ *i = c0;
+ if((0 < s) && (T[s - 1] < c0)) { s = ~((saidx_t)T[s - 1]); }
+ if(c0 == c2) { *++t = s; }
+ else {
+ BUCKET_A(c2) = t - SA;
+ *(t = SA + BUCKET_A(c2 = c0) + 1) = s;
+ }
+ } else if(s < 0) { orig = i; }
+ } else {
+ *i = ~s;
+ }
+ }
+
+ return orig - SA;
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+saint_t
+divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
+ saidx_t *bucket_A, *bucket_B;
+ saidx_t m;
+ saint_t err = 0;
+
+ /* Check arguments. */
+ if((T == NULL) || (SA == NULL) || (n < 0)) { return -1; }
+ else if(n == 0) { SA[0] = 0; return 0; }
+ else if(n == 1) { SA[0] = 1; SA[1] = 0; return 0; }
+
+ bucket_A = malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+ bucket_B = malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+ /* Suffixsort. */
+ if((bucket_A != NULL) && (bucket_B != NULL)) {
+ m = _sort_typeBstar(T, SA, bucket_A, bucket_B, n);
+ if(0 <= m) {
+ _construct_SA(T, SA, bucket_A, bucket_B, n, m);
+ } else {
+ err = -3;
+ }
+ } else {
+ err = -2;
+ }
+
+ free(bucket_B);
+ free(bucket_A);
+
+ return err;
+}
+
+saidx_t
+divbwt(const sauchar_t *T, sauchar_t *U, saidx_t *A, saidx_t n) {
+ saidx_t *B;
+ saidx_t *bucket_A, *bucket_B;
+ saidx_t m, pidx = -1, i;
+ saint_t err = 0;
+
+ /* Check arguments. */
+ if((T == NULL) || (U == NULL) || (n < 0)) { return -1; }
+ else if(n <= 1) { if(n == 1) { U[0] = T[0]; } return n; }
+
+ if((B = A) == NULL) { B = malloc((n + 1) * sizeof(saidx_t)); }
+ bucket_A = malloc(BUCKET_A_SIZE * sizeof(saidx_t));
+ bucket_B = malloc(BUCKET_B_SIZE * sizeof(saidx_t));
+
+ /* Burrows-Wheeler Transform. */
+ if((B != NULL) && (bucket_A != NULL) && (bucket_B != NULL)) {
+ m = _sort_typeBstar(T, B, bucket_A, bucket_B, n);
+ if(0 <= m) {
+ pidx = _construct_BWT(T, B, bucket_A, bucket_B, n, m);
+
+ /* Copy to output string. */
+ for(i = 0; i < pidx; ++i) { U[i] = B[i]; }
+ for(; i < n; ++i) { U[i] = B[i + 1]; }
+ } else {
+ err = -3;
+ }
+ } else {
+ err = -2;
+ }
+
+ free(bucket_B);
+ free(bucket_A);
+ if(A == NULL) { free(B); }
+
+ return pidx;
+}
+
+
+const char *
+divsufsort_version(void) {
+ return PACKAGE_VERSION;
+}
diff --git a/lib/libdivsufsort.sym b/lib/libdivsufsort.sym
new file mode 100644
index 0000000..2dcda95
--- /dev/null
+++ b/lib/libdivsufsort.sym
@@ -0,0 +1,8 @@
+divsufsort
+divbwt
+divsufsort_version
+bw_transform
+inverse_bw_transform
+sufcheck
+sa_search
+sa_simplesearch
diff --git a/lib/substringsort.c b/lib/substringsort.c
new file mode 100644
index 0000000..e628f94
--- /dev/null
+++ b/lib/substringsort.c
@@ -0,0 +1,619 @@
+/*
+ * substringsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+/* Compares two suffixes. */
+static inline
+saint_t
+_compare(const sauchar_t *T,
+ const saidx_t *p1, const saidx_t *p2,
+ saidx_t depth) {
+ const sauchar_t *U1, *U2, *U1n, *U2n;
+
+ for(U1 = T + depth + *p1,
+ U2 = T + depth + *p2,
+ U1n = T + *(p1 + 1) + 2,
+ U2n = T + *(p2 + 1) + 2;
+ (U1 < U1n) && (U2 < U2n) && (*U1 == *U2);
+ ++U1, ++U2) {
+ }
+
+ return U1 < U1n ?
+ (U2 < U2n ? *U1 - *U2 : 1) :
+ (U2 < U2n ? -1 : 0);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Insertionsort for small size groups */
+static
+void
+_insertionsort(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *last, saidx_t depth) {
+ saidx_t *i, *j;
+ saidx_t t;
+ saint_t r;
+
+ for(i = last - 2; first <= i; --i) {
+ for(t = *i, j = i + 1; 0 < (r = _compare(T, PA + t, PA + *j, depth));) {
+ do { *(j - 1) = *j; } while((++j < last) && (*j < 0));
+ if(last <= j) { break; }
+ }
+ if(r == 0) { *j = ~*j; }
+ *(j - 1) = t;
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static inline
+void
+_fixdown(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *SA, saidx_t i, saidx_t size) {
+ saidx_t j, k;
+ saidx_t v;
+ saint_t c, d, e;
+
+ for(v = SA[i], c = Td[PA[v]]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+ d = Td[PA[SA[k = j++]]];
+ if(d < (e = Td[PA[SA[j]]])) { k = j; d = e; }
+ if(d <= c) { break; }
+ }
+ SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+_heapsort(const sauchar_t *Td, const saidx_t *PA, saidx_t *SA, saidx_t size) {
+ saidx_t i, m;
+ saidx_t t;
+
+ m = size;
+ if((size % 2) == 0) {
+ m--;
+ if(Td[PA[SA[m / 2]]] < Td[PA[SA[m]]]) { SWAP(SA[m], SA[m / 2]); }
+ }
+
+ for(i = m / 2 - 1; 0 <= i; --i) { _fixdown(Td, PA, SA, i, m); }
+
+ if((size % 2) == 0) {
+ SWAP(SA[0], SA[m]);
+ _fixdown(Td, PA, SA, 0, m);
+ }
+
+ for(i = m - 1; 0 < i; --i) {
+ t = SA[0];
+ SA[0] = SA[i];
+ _fixdown(Td, PA, SA, 0, i);
+ SA[i] = t;
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static inline
+saidx_t *
+_median3(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+ saidx_t *t;
+ if(Td[PA[*v1]] > Td[PA[*v2]]) { SWAP(v1, v2); }
+ if(Td[PA[*v2]] > Td[PA[*v3]]) {
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { return v1; }
+ else { return v3; }
+ }
+ return v2;
+}
+
+/* Returns the median of five elements. */
+static inline
+saidx_t *
+_median5(const sauchar_t *Td, const saidx_t *PA,
+ saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+ saidx_t *t;
+ if(Td[PA[*v2]] > Td[PA[*v3]]) { SWAP(v2, v3); }
+ if(Td[PA[*v4]] > Td[PA[*v5]]) { SWAP(v4, v5); }
+ if(Td[PA[*v2]] > Td[PA[*v4]]) { SWAP(v2, v4); SWAP(v3, v5); }
+ if(Td[PA[*v1]] > Td[PA[*v3]]) { SWAP(v1, v3); }
+ if(Td[PA[*v1]] > Td[PA[*v4]]) { SWAP(v1, v4); SWAP(v3, v5); }
+ if(Td[PA[*v3]] > Td[PA[*v4]]) { return v4; }
+ return v3;
+}
+
+/* Returns the pivot element. */
+static inline
+saidx_t *
+_pivot(const sauchar_t *Td, const saidx_t *PA, saidx_t *first, saidx_t *last) {
+ saidx_t *middle;
+ saidx_t t;
+
+ t = last - first;
+ middle = first + t / 2;
+
+ if(t <= 512) {
+ if(t <= 32) {
+ return _median3(Td, PA, first, middle, last - 1);
+ } else {
+ t >>= 2;
+ return _median5(Td, PA,
+ first, first + t, middle,
+ last - 1 - t, last - 1);
+ }
+ }
+ t >>= 3;
+ return _median3(Td, PA,
+ _median3(Td, PA, first, first + t, first + (t << 1)),
+ _median3(Td, PA, middle - t, middle, middle + t),
+ _median3(Td, PA, last - 1 - (t << 1), last - 1 - t, last - 1));
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static inline
+saidx_t
+_lg(saidx_t n) {
+static const int log2table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+ /* for 16 bits */
+ return ((n & 0xff00) != 0) ?
+ 8 + log2table[(n >> 8) & 0xff] :
+ log2table[n & 0xff];
+
+ /* for 32 bits */
+/*
+ return (n & 0xffff0000) ?
+ ((n & 0xff000000) ?
+ 24 + log2table[(n >> 24) & 0xff] :
+ 16 + log2table[(n >> 16) & 0xff]) :
+ ((n & 0x0000ff00) ?
+ 8 + log2table[(n >> 8) & 0xff] :
+ 0 + log2table[(n >> 0) & 0xff]);
+*/
+}
+
+/* Binary partition for substrings. */
+static inline
+saidx_t *
+_substring_partition(const saidx_t *PA,
+ saidx_t *first, saidx_t *last, saidx_t depth) {
+ saidx_t *a, *b;
+ saidx_t t;
+ for(a = first - 1, b = last;;) {
+ for(; (++a < b) && ((PA[*a] + depth) >= (PA[*a + 1] + 1));) { *a = ~*a; }
+ for(; (a < --b) && ((PA[*b] + depth) < (PA[*b + 1] + 1));) { }
+ if(b <= a) { break; }
+ t = ~*b;
+ *b = *a;
+ *a = t;
+ }
+ if(first < a) { *first = ~*first; }
+ return a;
+}
+
+/* Multikey introsort for medium size groups. */
+static
+void
+_multikey_introsort(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *last,
+ saidx_t depth) {
+ struct { saidx_t *a, *b, c, d; } stack[STACK_SIZE];
+ const sauchar_t *Td;
+ saidx_t *a, *b, *c, *d, *e, *f;
+ saidx_t s, t;
+ saidx_t ssize;
+ saidx_t limit;
+ saint_t v, x = 0;
+
+ for(ssize = 0, limit = _lg(last - first);;) {
+
+ if((last - first) <= SS_INSERTIONSORT_THRESHOLD) {
+ if(1 < (last - first)) { _insertionsort(T, PA, first, last, depth); }
+ STACK_POP(first, last, depth, limit);
+ continue;
+ }
+
+ Td = T + depth;
+ if(limit-- == 0) { _heapsort(Td, PA, first, last - first); }
+ if(limit < 0) {
+ for(a = first + 1, v = Td[PA[*first]]; a < last; ++a) {
+ if((x = Td[PA[*a]]) != v) {
+ if(1 < (a - first)) { break; }
+ v = x;
+ first = a;
+ }
+ }
+ if(Td[PA[*first] - 1] < v) {
+ first = _substring_partition(PA, first, a, depth);
+ }
+ if((a - first) <= (last - a)) {
+ if(1 < (a - first)) {
+ STACK_PUSH(a, last, depth, -1);
+ last = a, depth += 1, limit = _lg(a - first);
+ } else {
+ first = a, limit = -1;
+ }
+ } else {
+ if(1 < (last - a)) {
+ STACK_PUSH(first, a, depth + 1, _lg(a - first));
+ first = a, limit = -1;
+ } else {
+ last = a, depth += 1, limit = _lg(a - first);
+ }
+ }
+ continue;
+ }
+
+ /* choose pivot */
+ a = _pivot(Td, PA, first, last);
+ v = Td[PA[*a]];
+ SWAP(*first, *a);
+
+ /* partition */
+ for(b = first; (++b < last) && ((x = Td[PA[*b]]) == v);) { }
+ if(((a = b) < last) && (x < v)) {
+ for(; (++b < last) && ((x = Td[PA[*b]]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ }
+ for(c = last; (b < --c) && ((x = Td[PA[*c]]) == v);) { }
+ if((b < (d = c)) && (x > v)) {
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+ for(; b < c;) {
+ SWAP(*b, *c);
+ for(; (++b < c) && ((x = Td[PA[*b]]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ for(; (b < --c) && ((x = Td[PA[*c]]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+
+ if(a <= d) {
+ c = b - 1;
+
+ if((s = a - first) > (t = b - a)) { s = t; }
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+ a = first + (b - a), c = last - (d - c);
+ b = (v <= Td[PA[*a] - 1]) ? a : _substring_partition(PA, a, c, depth);
+
+ if((a - first) <= (last - c)) {
+ if((last - c) <= (c - b)) {
+ STACK_PUSH(b, c, depth + 1, _lg(c - b));
+ STACK_PUSH(c, last, depth, limit);
+ last = a;
+ } else if((a - first) <= (c - b)) {
+ STACK_PUSH(c, last, depth, limit);
+ STACK_PUSH(b, c, depth + 1, _lg(c - b));
+ last = a;
+ } else {
+ STACK_PUSH(c, last, depth, limit);
+ STACK_PUSH(first, a, depth, limit);
+ first = b, last = c, depth += 1, limit = _lg(c - b);
+ }
+ } else {
+ if((a - first) <= (c - b)) {
+ STACK_PUSH(b, c, depth + 1, _lg(c - b));
+ STACK_PUSH(first, a, depth, limit);
+ first = c;
+ } else if((last - c) <= (c - b)) {
+ STACK_PUSH(first, a, depth, limit);
+ STACK_PUSH(b, c, depth + 1, _lg(c - b));
+ first = c;
+ } else {
+ STACK_PUSH(first, a, depth, limit);
+ STACK_PUSH(c, last, depth, limit);
+ first = b, last = c, depth += 1, limit = _lg(c - b);
+ }
+ }
+ } else {
+ limit += 1;
+ if(Td[PA[*first] - 1] < v) {
+ first = _substring_partition(PA, first, last, depth);
+ limit = _lg(last - first);
+ }
+ depth += 1;
+ }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Block swapping */
+static inline
+void
+_block_swap(saidx_t *first1, saidx_t *first2, saidx_t size) {
+ saidx_t *a, *b;
+ saidx_t i, t;
+ for(i = size, a = first1, b = first2; 0 < i; --i, ++a, ++b) {
+ SWAP(*a, *b);
+ }
+}
+
+/* Merge-forward with internal buffer. */
+static
+void
+_merge_forward(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *buf, saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t depth) {
+ saidx_t *bufend;
+ saidx_t *i, *j, *k;
+ saidx_t t;
+ saint_t r;
+
+ bufend = buf + (middle - first) - 1;
+ _block_swap(buf, first, middle - first);
+
+ for(t = *first, i = first, j = buf, k = middle;;) {
+ r = _compare(T, PA + *j, PA + *k, depth);
+ if(r < 0) {
+ do {
+ *i++ = *j;
+ if(bufend <= j) { *j = t; return; }
+ *j++ = *i;
+ } while(*j < 0);
+ } else if(r > 0) {
+ do {
+ *i++ = *k, *k++ = *i;
+ if(last <= k) {
+ while(j < bufend) { *i++ = *j, *j++ = *i; }
+ *i = *j, *j = t;
+ return;
+ }
+ } while(*k < 0);
+ } else {
+ *k = ~*k;
+ do {
+ *i++ = *j;
+ if(bufend <= j) { *j = t; return; }
+ *j++ = *i;
+ } while(*j < 0);
+
+ do {
+ *i++ = *k, *k++ = *i;
+ if(last <= k) {
+ while(j < bufend) { *i++ = *j, *j++ = *i; }
+ *i = *j, *j = t;
+ return;
+ }
+ } while(*k < 0);
+ }
+ }
+}
+
+/* Merge-backward with internal buffer. */
+static
+void
+_merge_backward(const sauchar_t *T, const saidx_t *PA, saidx_t *buf,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t depth) {
+ const saidx_t *p1, *p2;
+ saidx_t *bufend;
+ saidx_t *i, *j, *k;
+ saidx_t t;
+ saint_t r;
+ saint_t x;
+
+ bufend = buf + (last - middle);
+ _block_swap(buf, middle, last - middle);
+
+ x = 0;
+ if(*(bufend - 1) < 0) { x |= 1; p1 = PA + ~*(bufend - 1); }
+ else { p1 = PA + *(bufend - 1); }
+ if(*(middle - 1) < 0) { x |= 2; p2 = PA + ~*(middle - 1); }
+ else { p2 = PA + *(middle - 1); }
+ for(t = *(last - 1), i = last - 1, j = bufend - 1, k = middle - 1;;) {
+
+ r = _compare(T, p1, p2, depth);
+ if(r > 0) {
+ if(x & 1) { do { *i-- = *j; *j-- = *i; } while(*j < 0); x ^= 1; }
+ *i-- = *j;
+ if(j <= buf) { *j = t; return; }
+ *j-- = *i;
+
+ if(*j < 0) { x |= 1; p1 = PA + ~*j; }
+ else { p1 = PA + *j; }
+ } else if(r < 0) {
+ if(x & 2) { do { *i-- = *k; *k-- = *i; } while(*k < 0); x ^= 2; }
+ *i-- = *k; *k-- = *i;
+ if(k < first) {
+ while(buf < j) { *i-- = *j; *j-- = *i; }
+ *i = *j, *j = t;
+ return;
+ }
+
+ if(*k < 0) { x |= 2; p2 = PA + ~*k; }
+ else { p2 = PA + *k; }
+ } else {
+ if(x & 1) { do { *i-- = *j; *j-- = *i; } while(*j < 0); x ^= 1; }
+ *i-- = ~*j;
+ if(j <= buf) { *j = t; return; }
+ *j-- = *i;
+
+ if(x & 2) { do { *i-- = *k; *k-- = *i; } while(*k < 0); x ^= 2; }
+ *i-- = *k; *k-- = *i;
+ if(k < first) {
+ while(buf < j) { *i-- = *j; *j-- = *i; }
+ *i = *j, *j = t;
+ return;
+ }
+
+ if(*j < 0) { x |= 1; p1 = PA + ~*j; }
+ else { p1 = PA + *j; }
+ if(*k < 0) { x |= 2; p2 = PA + ~*k; }
+ else { p2 = PA + *k; }
+ }
+ }
+}
+
+/* Faster merge (based on divide and conquer technique). */
+static
+void
+_merge(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *middle, saidx_t *last,
+ saidx_t *buf, saidx_t bufsize,
+ saidx_t depth) {
+#define GETIDX(a) ((0 <= (a)) ? (a) : (~(a)))
+#define MERGE_CHECK_EQUAL(a)\
+ do {\
+ if((0 <= *(a)) &&\
+ (_compare(T, PA + GETIDX(*((a) - 1)), PA + *(a), depth) == 0)) {\
+ *(a) = ~*(a);\
+ }\
+ } while(0)
+ struct { saidx_t *a, *b, *c; int d; } stack[STACK_SIZE];
+ saidx_t *i, *j;
+ saidx_t m, len, half;
+ saidx_t ssize;
+ int check, next;
+
+ for(check = 0, ssize = 0;;) {
+
+ if((last - middle) <= bufsize) {
+ if((first < middle) && (middle < last)) {
+ _merge_backward(T, PA, buf, first, middle, last, depth);
+ }
+ if(check & 1) { MERGE_CHECK_EQUAL(first); }
+ if(check & 2) { MERGE_CHECK_EQUAL(last); }
+ STACK_POP(first, middle, last, check);
+ continue;
+ }
+
+ if((middle - first) <= bufsize) {
+ if(first < middle) {
+ _merge_forward(T, PA, buf, first, middle, last, depth);
+ }
+ if(check & 1) { MERGE_CHECK_EQUAL(first); }
+ if(check & 2) { MERGE_CHECK_EQUAL(last); }
+ STACK_POP(first, middle, last, check);
+ continue;
+ }
+
+ for(m = 0, len = MIN(middle - first, last - middle), half = len >> 1;
+ 0 < len;
+ len = half, half >>= 1) {
+ if(_compare(T, PA + GETIDX(*(middle + m + half)),
+ PA + GETIDX(*(middle - m - half - 1)), depth) < 0) {
+ m += half + 1;
+ half -= (len & 1) ^ 1;
+ }
+ }
+
+ if(0 < m) {
+ _block_swap(middle - m, middle, m);
+ i = j = middle, next = 0;
+ if((middle + m) < last) {
+ if(*(middle + m) < 0) {
+ for(; *(i - 1) < 0; --i) { }
+ *(middle + m) = ~*(middle + m);
+ }
+ for(j = middle; *j < 0; ++j) { }
+ next = 1;
+ }
+ if((i - first) <= (last - j)) {
+ STACK_PUSH(j, middle + m, last, (check & 2) | (next & 1));
+ middle -= m, last = i, check = (check & 1);
+ } else {
+ if((i == middle) && (middle == j)) { next <<= 1; }
+ STACK_PUSH(first, middle - m, i, (check & 1) | (next & 2));
+ first = j, middle += m, check = (check & 2) | (next & 1);
+ }
+ } else {
+ if(check & 1) { MERGE_CHECK_EQUAL(first); }
+ MERGE_CHECK_EQUAL(middle);
+ if(check & 2) { MERGE_CHECK_EQUAL(last); }
+ STACK_POP(first, middle, last, check);
+ }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Substring sort */
+void
+substringsort(const sauchar_t *T, const saidx_t *PA,
+ saidx_t *first, saidx_t *last,
+ saidx_t *buf, saidx_t bufsize,
+ saidx_t depth, saint_t lastsuffix) {
+ saidx_t *a, *b;
+ saidx_t *curbuf;
+ saidx_t i, j, k;
+ saidx_t curbufsize;
+
+ if(lastsuffix != 0) { ++first; }
+ for(a = first, i = 0; (a + SS_BLOCKSIZE) < last; a += SS_BLOCKSIZE, ++i) {
+ _multikey_introsort(T, PA, a, a + SS_BLOCKSIZE, depth);
+ curbuf = a + SS_BLOCKSIZE;
+ curbufsize = last - (a + SS_BLOCKSIZE);
+ if(curbufsize <= bufsize) { curbufsize = bufsize, curbuf = buf; }
+ for(b = a, k = SS_BLOCKSIZE, j = i; j & 1; b -= k, k <<= 1, j >>= 1) {
+ _merge(T, PA, b - k, b, b + k, curbuf, curbufsize, depth);
+ }
+ }
+ _multikey_introsort(T, PA, a, last, depth);
+ for(k = SS_BLOCKSIZE; i != 0; k <<= 1, i >>= 1) {
+ if(i & 1) {
+ _merge(T, PA, a - k, a, last, buf, bufsize, depth);
+ a -= k;
+ }
+ }
+
+ if(lastsuffix != 0) {
+ /* Insert last type B* suffix. */
+ for(a = first, i = *(first - 1);
+ (a < last) && ((*a < 0) || (0 < _compare(T, PA + i, PA + *a, depth)));
+ ++a) {
+ *(a - 1) = *a;
+ }
+ *(a - 1) = i;
+ }
+}
diff --git a/lib/trsort.c b/lib/trsort.c
new file mode 100644
index 0000000..9e5670e
--- /dev/null
+++ b/lib/trsort.c
@@ -0,0 +1,684 @@
+/*
+ * trsort.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Functions -*/
+
+static inline
+void
+_fixdown(const saidx_t *ISAd, saidx_t *SA, saidx_t i, saidx_t size) {
+ saidx_t j, k;
+ saidx_t v;
+ saidx_t c, d, e;
+
+ for(v = SA[i], c = ISAd[v]; (j = 2 * i + 1) < size; SA[i] = SA[k], i = k) {
+ d = ISAd[SA[k = j++]];
+ if(d < (e = ISAd[SA[j]])) { k = j; d = e; }
+ if(d <= c) { break; }
+ }
+ SA[i] = v;
+}
+
+/* Simple top-down heapsort. */
+static
+void
+_heapsort(const saidx_t *ISAd, saidx_t *SA, saidx_t size) {
+ saidx_t i, m;
+ saidx_t t;
+
+ m = size;
+ if((size % 2) == 0) {
+ m--;
+ if(ISAd[SA[m / 2]] < ISAd[SA[m]]) {
+ SWAP(SA[m], SA[m / 2]);
+ }
+ }
+
+ for(i = m / 2 - 1; 0 <= i; --i) {
+ _fixdown(ISAd, SA, i, m);
+ }
+
+ if((size % 2) == 0) {
+ SWAP(SA[0], SA[m]);
+ _fixdown(ISAd, SA, 0, m);
+ }
+
+ for(i = m - 1; 0 < i; --i) {
+ t = SA[0];
+ SA[0] = SA[i];
+ _fixdown(ISAd, SA, 0, i);
+ SA[i] = t;
+ }
+}
+
+/* Simple insertionsort for small size groups. */
+static
+void
+_insertionsort(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+ saidx_t *a, *b;
+ saidx_t t, r;
+
+ for(a = first + 1; a < last; ++a) {
+ for(t = *a, b = a - 1; 0 > (r = ISAd[t] - ISAd[*b]);) {
+ do { *(b + 1) = *b; } while((first <= --b) && (*b < 0));
+ if(b < first) { break; }
+ }
+ if(r == 0) { *b = ~*b; }
+ *(b + 1) = t;
+ }
+}
+
+static inline
+saidx_t
+_lg(saidx_t n) {
+static const int log2table[256]= {
+ -1,0,1,1,2,2,2,2,3,3,3,3,3,3,3,3,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
+ 5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,5,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,6,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,
+ 7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7
+};
+ /* for 16 bits */
+/*
+ return ((n & 0xff00) != 0) ?
+ 8 + log2table[(n >> 8) & 0xff] :
+ log2table[n & 0xff];
+*/
+ /* for 32 bits */
+ return (n & 0xffff0000) ?
+ ((n & 0xff000000) ?
+ 24 + log2table[(n >> 24) & 0xff] :
+ 16 + log2table[(n >> 16) & 0xff]) :
+ ((n & 0x0000ff00) ?
+ 8 + log2table[(n >> 8) & 0xff] :
+ 0 + log2table[(n >> 0) & 0xff]);
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Returns the median of three elements. */
+static inline
+saidx_t *
+_median3(const saidx_t *ISAd, saidx_t *v1, saidx_t *v2, saidx_t *v3) {
+ saidx_t *t;
+ if(ISAd[*v1] > ISAd[*v2]) { SWAP(v1, v2); }
+ if(ISAd[*v2] > ISAd[*v3]) {
+ if(ISAd[*v1] > ISAd[*v3]) { return v1; }
+ else { return v3; }
+ }
+ return v2;
+}
+
+/* Returns the median of five elements. */
+static inline
+saidx_t *
+_median5(const saidx_t *ISAd,
+ saidx_t *v1, saidx_t *v2, saidx_t *v3, saidx_t *v4, saidx_t *v5) {
+ saidx_t *t;
+ if(ISAd[*v2] > ISAd[*v3]) { SWAP(v2, v3); }
+ if(ISAd[*v4] > ISAd[*v5]) { SWAP(v4, v5); }
+ if(ISAd[*v2] > ISAd[*v4]) { SWAP(v2, v4); SWAP(v3, v5); }
+ if(ISAd[*v1] > ISAd[*v3]) { SWAP(v1, v3); }
+ if(ISAd[*v1] > ISAd[*v4]) { SWAP(v1, v4); SWAP(v3, v5); }
+ if(ISAd[*v3] > ISAd[*v4]) { return v4; }
+ return v3;
+}
+
+/* Returns the pivot element. */
+static inline
+saidx_t *
+_pivot(const saidx_t *ISAd, saidx_t *first, saidx_t *last) {
+ saidx_t *middle;
+ saidx_t t;
+
+ t = last - first;
+ middle = first + t / 2;
+
+ if(t <= 512) {
+ if(t <= 32) {
+ return _median3(ISAd, first, middle, last - 1);
+ } else {
+ t >>= 2;
+ return _median5(ISAd,
+ first, first + t,
+ middle,
+ last - 1 - t, last - 1);
+ }
+ }
+ t >>= 3;
+ return _median3(ISAd,
+ _median3(ISAd, first, first + t, first + (t << 1)),
+ _median3(ISAd, middle - t, middle, middle + t),
+ _median3(ISAd, last - 1 - (t << 1), last - 1 - t, last - 1));
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Update group numbers. */
+static
+void
+_ls_updategroup(saidx_t *ISA, const saidx_t *SA,
+ saidx_t *first, saidx_t *last) {
+ saidx_t *a, *b;
+ saidx_t t;
+
+ for(a = first; a < last; ++a) {
+ if(0 <= *a) {
+ b = a;
+ do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+ *b = b - a;
+ if(last <= a) { break; }
+ }
+ b = a;
+ do { *a = ~*a; } while(*++a < 0);
+ t = a - SA;
+ do { ISA[*b] = t; } while(++b <= a);
+ }
+}
+
+/* Introspective sort. */
+static
+void
+_ls_introsort(saidx_t *ISA, saidx_t *ISAd, const saidx_t *SA,
+ saidx_t *first, saidx_t *last) {
+ struct { saidx_t *a, *b, c; } stack[STACK_SIZE];
+ saidx_t *a, *b, *c, *d, *e, *f;
+ saidx_t s, t;
+ saidx_t limit;
+ saidx_t v, x = 0;
+ int ssize;
+
+ for(ssize = 0, limit = _lg(last - first);;) {
+
+ if((last - first) <= LS_INSERTIONSORT_THRESHOLD) {
+ if(1 < (last - first)) {
+ _insertionsort(ISAd, first, last);
+ _ls_updategroup(ISA, SA, first, last);
+ } else if((last - first) == 1) { *first = -1; }
+ STACK_POP3(first, last, limit);
+ continue;
+ }
+
+ if(limit-- == 0) {
+ _heapsort(ISAd, first, last - first);
+ for(a = last - 1; first < a; a = b) {
+ for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+ }
+ _ls_updategroup(ISA, SA, first, last);
+ STACK_POP3(first, last, limit);
+ continue;
+ }
+
+ /* choose pivot */
+ a = _pivot(ISAd, first, last);
+ SWAP(*first, *a);
+ v = ISAd[*first];
+
+ /* Two-stage double-index controlled ternary partition */
+ for(b = first; (++b < last) && ((x = ISAd[*b]) == v);) { }
+ if(((a = b) < last) && (x < v)) {
+ for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ }
+ for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+ if((b < (d = c)) && (x > v)) {
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+ for(; b < c;) {
+ SWAP(*b, *c);
+ for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+
+ if(a <= d) {
+ c = b - 1;
+
+ if((s = a - first) > (t = b - a)) { s = t; }
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+ a = first + (b - a), b = last - (d - c);
+
+ /* update ranks */
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+ if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+ if((b - a) == 1) { *a = - 1; }
+
+ if((a - first) <= (last - b)) {
+ if(first < a) {
+ STACK_PUSH3(b, last, limit);
+ last = a;
+ } else {
+ first = b;
+ }
+ } else {
+ if(b < last) {
+ STACK_PUSH3(first, a, limit);
+ first = b;
+ } else {
+ last = a;
+ }
+ }
+ } else {
+ STACK_POP3(first, last, limit);
+ }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/* Larsson-Sadakane sort */
+static
+void
+_lssort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
+ saidx_t *ISAd;
+ saidx_t *first, *last;
+ saidx_t t, skip;
+
+ for(ISAd = ISA + depth; -n < *SA; ISAd += (ISAd - ISA)) {
+ first = SA;
+ skip = 0;
+ do {
+ if((t = *first) < 0) { first -= t; skip += t; }
+ else {
+ if(skip != 0) { *(first + skip) = skip; skip = 0; }
+ last = SA + ISA[t] + 1;
+ _ls_introsort(ISA, ISAd, SA, first, last);
+ first = last;
+ }
+ } while(first < (SA + n));
+ if(skip != 0) { *(first + skip) = skip; }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+static
+void
+_tr_partition(const saidx_t *ISAd, const saidx_t *SA,
+ saidx_t *first, saidx_t *last,
+ saidx_t **pa, saidx_t **pb, saidx_t v) {
+ saidx_t *a, *b, *c, *d, *e, *f;
+ saidx_t t, s;
+ saidx_t x = 0;
+
+ for(b = first - 1; (++b < last) && ((x = ISAd[*b]) == v);) { }
+ if(((a = b) < last) && (x < v)) {
+ for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ }
+ for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+ if((b < (d = c)) && (x > v)) {
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+ for(; b < c;) {
+ SWAP(*b, *c);
+ for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+
+ if(a <= d) {
+ c = b - 1;
+ if((s = a - first) > (t = b - a)) { s = t; }
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ first += (b - a), last -= (d - c);
+ }
+ *pa = first, *pb = last;
+}
+
+static
+void
+_tr_copy(saidx_t *ISA, const saidx_t *SA,
+ saidx_t *first, saidx_t *a, saidx_t *b, saidx_t *last,
+ saidx_t depth) {
+ /* sort suffixes of middle partition
+ by using sorted order of suffixes of left and right partition. */
+ saidx_t *c, *d, *e;
+ saidx_t s, v;
+
+ v = b - SA - 1;
+ for(c = first, d = a - 1; c <= d; ++c) {
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+ *++d = s;
+ ISA[s] = d - SA;
+ }
+ }
+ for(c = last - 1, e = d + 1, d = b; e < d; --c) {
+ if((0 <= (s = *c - depth)) && (ISA[s] == v)) {
+ *--d = s;
+ ISA[s] = d - SA;
+ }
+ }
+}
+
+/* Multikey introsort. */
+static
+void
+_tr_introsort(saidx_t *ISA, const saidx_t *ISAd,
+ saidx_t *SA, saidx_t *first, saidx_t *last,
+ saidx_t *budget, saidx_t *chance, saidx_t size) {
+#define UPDATE_BUDGET(n)\
+ {\
+ (*budget) -= (n);\
+ if(*budget <= 0) {\
+ if(--(*chance) == 0) { break; }\
+ (*budget) += size;\
+ }\
+ }
+ struct { const saidx_t *a; saidx_t *b, *c, d; }stack[STACK_SIZE];
+ saidx_t *a, *b, *c, *d, *e, *f;
+ saidx_t s, t;
+ saidx_t v, x = 0;
+ saidx_t limit, next;
+ int ssize;
+
+ for(ssize = 0, limit = _lg(last - first);;) {
+
+ if(limit < 0) {
+ if(limit == -1) {
+ /* tandem repeat partition */
+ UPDATE_BUDGET(last - first);
+ _tr_partition(ISAd - 1, SA, first, last, &a, &b, last - SA - 1);
+
+ /* update ranks */
+ if(a < last) {
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+ }
+ if(b < last) {
+ for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; }
+ }
+
+ /* push */
+ STACK_PUSH(NULL, a, b, 0);
+ STACK_PUSH(ISAd - 1, first, last, -2);
+ if((a - first) <= (last - b)) {
+ if(1 < (a - first)) {
+ STACK_PUSH(ISAd, b, last, _lg(last - b));
+ last = a, limit = _lg(a - first);
+ } else if(1 < (last - b)) {
+ first = b, limit = _lg(last - b);
+ } else {
+ STACK_POP(ISAd, first, last, limit);
+ }
+ } else {
+ if(1 < (last - b)) {
+ STACK_PUSH(ISAd, first, a, _lg(a - first));
+ first = b, limit = _lg(last - b);
+ } else if(1 < (a - first)) {
+ last = a, limit = _lg(a - first);
+ } else {
+ STACK_POP(ISAd, first, last, limit);
+ }
+ }
+ } else if(limit == -2) {
+ /* tandem repeat copy */
+ a = stack[--ssize].b, b = stack[ssize].c;
+ _tr_copy(ISA, SA, first, a, b, last, ISAd - ISA);
+ STACK_POP(ISAd, first, last, limit);
+ } else {
+ /* sorted partition */
+ if(0 <= *first) {
+ a = first;
+ do { ISA[*a] = a - SA; } while((++a < last) && (0 <= *a));
+ first = a;
+ }
+ if(first < last) {
+ a = first; do { *a = ~*a; } while(*++a < 0);
+ next = (ISA[*a] != ISAd[*a]) ? _lg(a - first + 1) : -1;
+ if(++a < last) { for(b = first, v = a - SA - 1; b < a; ++b) { ISA[*b] = v; } }
+
+ /* push */
+ if((a - first) <= (last - a)) {
+ STACK_PUSH(ISAd, a, last, -3);
+ ISAd += 1, last = a, limit = next;
+ } else {
+ if(1 < (last - a)) {
+ STACK_PUSH(ISAd + 1, first, a, next);
+ first = a, limit = -3;
+ } else {
+ ISAd += 1, last = a, limit = next;
+ }
+ }
+ } else {
+ STACK_POP(ISAd, first, last, limit);
+ }
+ }
+ continue;
+ }
+
+ if((last - first) <= TR_INSERTIONSORT_THRESHOLD) {
+ UPDATE_BUDGET(last - first);
+// UPDATE_BUDGET((last - first) * (last - first) / 2);
+ _insertionsort(ISAd, first, last);
+ limit = -3;
+ continue;
+ }
+
+ if(limit-- == 0) {
+ UPDATE_BUDGET(last - first);
+// UPDATE_BUDGET((last - first) * _lg(last - first));
+ _heapsort(ISAd, first, last - first);
+ for(a = last - 1; first < a; a = b) {
+ for(x = ISAd[*a], b = a - 1; (first <= b) && (ISAd[*b] == x); --b) { *b = ~*b; }
+ }
+ limit = -3;
+ continue;
+ }
+
+ UPDATE_BUDGET(last - first);
+
+ /* choose pivot */
+ a = _pivot(ISAd, first, last);
+ SWAP(*first, *a);
+ v = ISAd[*first];
+
+ /* partition */
+ for(b = first; (++b < last) && ((x = ISAd[*b]) == v);) { }
+ if(((a = b) < last) && (x < v)) {
+ for(; (++b < last) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ }
+ for(c = last; (b < --c) && ((x = ISAd[*c]) == v);) { }
+ if((b < (d = c)) && (x > v)) {
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+ for(; b < c;) {
+ SWAP(*b, *c);
+ for(; (++b < c) && ((x = ISAd[*b]) <= v);) {
+ if(x == v) { SWAP(*b, *a); ++a; }
+ }
+ for(; (b < --c) && ((x = ISAd[*c]) >= v);) {
+ if(x == v) { SWAP(*c, *d); --d; }
+ }
+ }
+
+ if(a <= d) {
+ c = b - 1;
+
+ if((s = a - first) > (t = b - a)) { s = t; }
+ for(e = first, f = b - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+ if((s = d - c) > (t = last - d - 1)) { s = t; }
+ for(e = b, f = last - s; 0 < s; --s, ++e, ++f) { SWAP(*e, *f); }
+
+ a = first + (b - a), b = last - (d - c);
+ next = (ISA[*a] != v) ? _lg(b - a) : -1;
+
+ /* update ranks */
+ for(c = first, v = a - SA - 1; c < a; ++c) { ISA[*c] = v; }
+ if(b < last) { for(c = a, v = b - SA - 1; c < b; ++c) { ISA[*c] = v; } }
+
+ /* push */
+ if((a - first) <= (last - b)) {
+ if((last - b) <= (b - a)) {
+ if(1 < (a - first)) {
+ STACK_PUSH(ISAd + 1, a, b, next);
+ STACK_PUSH(ISAd, b, last, limit);
+ last = a;
+ } else if(1 < (last - b)) {
+ STACK_PUSH(ISAd + 1, a, b, next);
+ first = b;
+ } else if(1 < (b - a)) {
+ ISAd += 1, first = a, last = b, limit = next;
+ } else {
+ STACK_POP(ISAd, first, last, limit);
+ }
+ } else if((a - first) <= (b - a)) {
+ if(1 < (a - first)) {
+ STACK_PUSH(ISAd, b, last, limit);
+ STACK_PUSH(ISAd + 1, a, b, next);
+ last = a;
+ } else if(1 < (b - a)) {
+ STACK_PUSH(ISAd, b, last, limit);
+ ISAd += 1, first = a, last = b, limit = next;
+ } else {
+ first = b;
+ }
+ } else {
+ if(1 < (b - a)) {
+ STACK_PUSH(ISAd, b, last, limit);
+ STACK_PUSH(ISAd, first, a, limit);
+ ISAd += 1, first = a, last = b, limit = next;
+ } else {
+ STACK_PUSH(ISAd, b, last, limit);
+ last = a;
+ }
+ }
+ } else {
+ if((a - first) <= (b - a)) {
+ if(1 < (last - b)) {
+ STACK_PUSH(ISAd + 1, a, b, next);
+ STACK_PUSH(ISAd, first, a, limit);
+ first = b;
+ } else if(1 < (a - first)) {
+ STACK_PUSH(ISAd + 1, a, b, next);
+ last = a;
+ } else if(1 < (b - a)) {
+ ISAd += 1, first = a, last = b, limit = next;
+ } else {
+ STACK_POP(ISAd, first, last, limit);
+ }
+ } else if((last - b) <= (b - a)) {
+ if(1 < (last - b)) {
+ STACK_PUSH(ISAd, first, a, limit);
+ STACK_PUSH(ISAd + 1, a, b, next);
+ first = b;
+ } else if(1 < (b - a)) {
+ STACK_PUSH(ISAd, first, a, limit);
+ ISAd += 1, first = a, last = b, limit = next;
+ } else {
+ last = a;
+ }
+ } else {
+ if(1 < (b - a)) {
+ STACK_PUSH(ISAd, first, a, limit);
+ STACK_PUSH(ISAd, b, last, limit);
+ ISAd += 1, first = a, last = b, limit = next;
+ } else {
+ STACK_PUSH(ISAd, first, a, limit);
+ first = b;
+ }
+ }
+ }
+ } else {
+ limit += 1, ISAd += 1;
+ }
+ }
+
+ for(s = 0; s < ssize; ++s) {
+ if(stack[s].d == -3) {
+ _ls_updategroup(ISA, SA, stack[s].b, stack[s].c);
+ }
+ }
+}
+
+
+/*---------------------------------------------------------------------------*/
+
+/*- Function -*/
+
+/* Tandem repeat sort */
+void
+trsort(saidx_t *ISA, saidx_t *SA, saidx_t n, saidx_t depth) {
+ saidx_t *first, *last;
+ saidx_t t, skip;
+ saidx_t budget;
+ saidx_t chance;
+
+ if(-n < *SA) {
+ first = SA;
+ skip = 0;
+ budget = n;
+/* chance = _lg(n); */
+/* chance = _lg(n) / 2 + 1; */
+ chance = _lg(n) * 2 / 3 + 1;
+ do {
+ if((t = *first) < 0) { first -= t; skip += t; }
+ else {
+ skip = 0;
+ last = SA + ISA[t] + 1;
+ if(1 < (last - first)) {
+ _tr_introsort(ISA, ISA + depth, SA, first, last, &budget, &chance, n);
+ if(chance == 0) {
+ /* Switch to Larsson-Sadakane sorting algorithm. */
+ if(SA < first) { *SA = -(first - SA); }
+ _lssort(ISA, SA, n, depth);
+ break;
+ }
+ }
+ first = last;
+ }
+ } while(first < (SA + n));
+ }
+}
diff --git a/lib/utils.c b/lib/utils.c
new file mode 100644
index 0000000..086b1ca
--- /dev/null
+++ b/lib/utils.c
@@ -0,0 +1,378 @@
+/*
+ * utils.c for libdivsufsort
+ * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use,
+ * copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following
+ * conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+ * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+ * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+ * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+ * OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#include "divsufsort_private.h"
+
+
+/*- Private Function -*/
+
+/* Binary search for inverse bwt. */
+static
+saidx_t
+_binarysearch(const saidx_t *A, saidx_t len, saidx_t val) {
+ saidx_t half, m;
+ for(m = 0, half = len >> 1;
+ 0 < len;
+ len = half, half >>= 1) {
+ if(A[m + half] < val) {
+ m += half + 1;
+ half -= ((len & 1) == 0);
+ }
+ }
+ return m;
+}
+
+
+/*- Functions -*/
+
+/* Burrows-Wheeler transform. */
+saint_t
+bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
+ saidx_t n, saidx_t *idx) {
+ saidx_t *A, i, p;
+ saint_t c;
+
+ /* Check arguments. */
+ if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
+ if(n <= 1) {
+ *idx = n;
+ return 0;
+ }
+
+ if((A = SA) == NULL) {
+ i = divbwt(T, U, NULL, n);
+ if(0 <= i) { *idx = i; i = 0; }
+ return (saint_t)i;
+ }
+
+ /* BW transform. */
+ if(T == U) {
+ for(i = 0; 0 <= (p = A[i] - 1); ++i) {
+ c = T[i];
+ U[i] = (i <= p) ? T[p] : A[p];
+ A[i] = c;
+ }
+ *idx = i;
+ for( ; i < n; ++i) {
+ p = A[i + 1] - 1;
+ c = T[i];
+ U[i] = (i <= p) ? T[p] : A[p];
+ A[i] = c;
+ }
+ } else {
+ for(i = 0; A[i] != 0; ++i) { U[i] = T[A[i] - 1]; }
+ *idx = i;
+ for( ; i < n; ++i) { U[i] = T[A[i + 1] - 1]; }
+ }
+
+ if(SA == NULL) {
+ /* Deallocate memory. */
+ free(A);
+ }
+
+ return 0;
+}
+
+/* Inverse Burrows-Wheeler transform. */
+saint_t
+inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
+ saidx_t n, saidx_t idx) {
+ saidx_t C[ALPHABET_SIZE];
+ saidx_t D[ALPHABET_SIZE];
+ saidx_t *B;
+ saidx_t i, k, t, sum;
+
+ /* Check arguments. */
+ if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
+ (n < idx) || ((0 < n) && (idx == 0))) {
+ return -1;
+ }
+ if(n <= 1) { return 0; }
+
+ if((B = A) == NULL) {
+ /* Allocate (n+1)*sizeof(saidx_t) bytes of memory. */
+ if((B = malloc((n + 1) * sizeof(saidx_t))) == NULL) { return -2; }
+ }
+
+ /* Inverse BW transform. */
+ for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
+ for(i = 0; i < n; ++i) { ++C[T[i]]; }
+ for(i = 0, sum = 0; i < ALPHABET_SIZE; ++i) {
+ t = C[i];
+ C[i] = sum;
+ sum += t;
+ }
+ B[0] = idx;
+ for(i = 0; i < idx; ++i) { B[++C[T[i]]] = i; }
+ for( ; i < n; ++i) { B[++C[T[i]]] = i + 1; }
+ for(i = 0, k = 0, t = 0; i < ALPHABET_SIZE; ++i) {
+ if(t != C[i]) {
+ D[k] = i;
+ t = C[k] = C[i];
+ ++k;
+ }
+ }
+ for(i = 0, t = 0; i < n; ++i) {
+ U[i] = D[_binarysearch(C, k, t = B[t])];
+ }
+
+ if(A == NULL) {
+ /* Deallocate memory. */
+ free(B);
+ }
+
+ return 0;
+}
+
+/* Checks the suffix array SA of the string T. */
+saint_t
+sufcheck(const sauchar_t *T, const saidx_t *SA,
+ saidx_t n, saint_t verbose) {
+ saidx_t C[ALPHABET_SIZE];
+ saidx_t i = 0, p, t = 0;
+ saint_t c;
+ saint_t err = 0;
+
+ if(1 <= verbose) { fprintf(stderr, "sufchecker: "); }
+
+ /* Check arguments. */
+ if((T == NULL) || (SA == NULL) || (n < 0)) { err = -1; }
+
+ /* ranges. */
+ if(err == 0) {
+ for(i = 0; i <= n; ++i) {
+ if((SA[i] < 0) || (n < SA[i])) {
+ err = -2;
+ break;
+ }
+ }
+ }
+
+ /* first characters. */
+ if(err == 0) {
+ for(i = 1; i < n; ++i) {
+ if(T[SA[i]] > T[SA[i + 1]]) {
+ err = -3;
+ break;
+ }
+ }
+ }
+
+ /* suffixes. */
+ if(err == 0) {
+ for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
+ for(i = 0; i < n; ++i) { ++C[T[i]]; }
+ for(i = 0, p = 1; i < ALPHABET_SIZE; ++i) {
+ t = C[i];
+ C[i] = p;
+ p += t;
+ }
+
+ for(i = 0; i <= n; ++i) {
+ p = SA[i];
+ if(0 < p) {
+ c = T[--p];
+ t = C[c];
+ } else {
+ p = n;
+ c = -1;
+ t = 0;
+ }
+ if(p != SA[t]) {
+ err = -4;
+ break;
+ }
+ if(0 <= c) {
+ ++C[c];
+ if((n < C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
+ }
+ }
+ }
+
+ if(1 <= verbose) {
+ if(err == 0) {
+ fprintf(stderr, "Done.\n");
+ } else if(verbose == 1) {
+ fprintf(stderr, "Error.\n");
+ } else if(err == -1) {
+ fprintf(stderr, "Invalid arguments.\n");
+ } else if(err == -2) {
+ fprintf(stderr, "Out of the range [0,%d].\n SA[%d]=%d\n",
+ (int)n, (int)i, (int)SA[i]);
+ } else if(err == -3) {
+ fprintf(stderr, "Suffixes in wrong order.\n"
+ " T[SA[%d]=%d]=%d > T[SA[%d]=%d]=%d\n",
+ (int)i, (int)SA[i], (int)T[SA[i]],
+ (int)i + 1, (int)SA[i + 1], (int)T[SA[i + 1]]);
+ } else if(err == -4) {
+ fprintf(stderr, "Suffix in wrong position.\n");
+ if(0 <= t) { fprintf(stderr, " SA[%d]=%d or\n", (int)t, (int)SA[t]); }
+ fprintf(stderr, " SA[%d]=%d\n", (int)i, (int)SA[i]);
+ }
+ }
+
+ return err;
+}
+
+
+static
+int
+_compare(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ saidx_t suf, saidx_t *match) {
+ saidx_t i, j;
+ saint_t r;
+ for(i = suf + *match, j = *match, r = 0;
+ (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
+ *match = j;
+ return (r == 0) ? -(j != Psize) : r;
+}
+
+/* Search for the pattern P in the string T. */
+saidx_t
+sa_search(const sauchar_t *T, saidx_t Tsize,
+ const sauchar_t *P, saidx_t Psize,
+ const saidx_t *SA, saidx_t SAsize,
+ saidx_t *idx) {
+ saidx_t size, lsize, rsize, half;
+ saidx_t match, lmatch, rmatch;
+ saidx_t llmatch, lrmatch, rlmatch, rrmatch;
+ saidx_t i, j, k;
+ saint_t r;
+
+ if(idx != NULL) { *idx = -1; }
+ if((T == NULL) || (P == NULL) || (SA == NULL) ||
+ (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
+ if((Tsize == 0) || (SAsize == 0)) { return 0; }
+ if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
+
+ for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
+ 0 < size;
+ size = half, half >>= 1) {
+ match = MIN(lmatch, rmatch);
+ r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
+ if(r < 0) {
+ i += half + 1;
+ half -= (size & 1) ^ 1;
+ lmatch = match;
+ } else if(r > 0) {
+ rmatch = match;
+ } else {
+ lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+ /* left part */
+ for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
+ 0 < lsize;
+ lsize = half, half >>= 1) {
+ lmatch = MIN(llmatch, lrmatch);
+ r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
+ if(r < 0) {
+ j += half + 1;
+ half -= (lsize & 1) ^ 1;
+ llmatch = lmatch;
+ } else {
+ lrmatch = lmatch;
+ }
+ }
+
+ /* right part */
+ for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
+ 0 < rsize;
+ rsize = half, half >>= 1) {
+ rmatch = MIN(rlmatch, rrmatch);
+ r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
+ if(r <= 0) {
+ k += half + 1;
+ half -= (rsize & 1) ^ 1;
+ rlmatch = rmatch;
+ } else {
+ rrmatch = rmatch;
+ }
+ }
+
+ break;
+ }
+ }
+
+ if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+ return k - j;
+}
+
+/* Search for the character c in the string T. */
+saidx_t
+sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
+ const saidx_t *SA, saidx_t SAsize,
+ saint_t c, saidx_t *idx) {
+ saidx_t size, lsize, rsize, half;
+ saidx_t i, j, k, p;
+ saint_t r;
+
+ if(idx != NULL) { *idx = -1; }
+ if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
+ if((Tsize == 0) || (SAsize == 0)) { return 0; }
+
+ for(i = j = k = 0, size = SAsize, half = size >> 1;
+ 0 < size;
+ size = half, half >>= 1) {
+ p = SA[i + half];
+ r = (p < Tsize) ? T[p] - c : -1;
+ if(r < 0) {
+ i += half + 1;
+ half -= (size & 1) ^ 1;
+ } else if(r == 0) {
+ lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
+
+ /* left part */
+ for(half = lsize >> 1;
+ 0 < lsize;
+ lsize = half, half >>= 1) {
+ p = SA[j + half];
+ r = (p < Tsize) ? T[p] - c : -1;
+ if(r < 0) {
+ j += half + 1;
+ half -= (lsize & 1) ^ 1;
+ }
+ }
+
+ /* right part */
+ for(half = rsize >> 1;
+ 0 < rsize;
+ rsize = half, half >>= 1) {
+ p = SA[k + half];
+ r = (p < Tsize) ? T[p] - c : -1;
+ if(r <= 0) {
+ k += half + 1;
+ half -= (rsize & 1) ^ 1;
+ }
+ }
+
+ break;
+ }
+ }
+
+ if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
+ return k - j;
+}