aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryuta.256 <yuta.256@b7c3aa3b-274f-0410-ae0b-edc9d07c929d>2008-08-23 02:41:29 +0000
committeryuta.256 <yuta.256@b7c3aa3b-274f-0410-ae0b-edc9d07c929d>2008-08-23 02:41:29 +0000
commita8a3c1e46cac46c776edd6b00269e2dcd760200c (patch)
treee017f22aa52463f2b43e1657ddd3df7bfc618e7a
parent5f60ac8a026473087ecf1315f9436ae5871e4f69 (diff)
downloadplatform_external_libdivsufsort-a8a3c1e46cac46c776edd6b00269e2dcd760200c.tar.gz
platform_external_libdivsufsort-a8a3c1e46cac46c776edd6b00269e2dcd760200c.tar.bz2
platform_external_libdivsufsort-a8a3c1e46cac46c776edd6b00269e2dcd760200c.zip
Update files for 2.0.0.
-rw-r--r--INSTALL4
-rw-r--r--NEWS7
-rw-r--r--README119
-rw-r--r--include/divsufsort.h.cmake46
-rw-r--r--pkgconfig/CMakeLists.txt1
-rw-r--r--pkgconfig/libdivsufsort.pc.cmake2
6 files changed, 158 insertions, 21 deletions
diff --git a/INSTALL b/INSTALL
index 2c7bcd0..ba7ee09 100644
--- a/INSTALL
+++ b/INSTALL
@@ -5,8 +5,8 @@ Requirements:
=============
* CMake version 2.4.2 or newer (http://www.cmake.org/)
- * A C compiler
- * Make
+ * An ANSI C compiler
+ * GNU Make
Compilation and Installation (with Unix Makefiles):
diff --git a/NEWS b/NEWS
index ea79c45..f240fd1 100644
--- a/NEWS
+++ b/NEWS
@@ -1,5 +1,12 @@
-- NEWS for libdivsufsort
+Changes in release 2.0.0
+
+ - The build system was changed to CMake. (http://www.cmake.org/)
+ - Improved the performance of the suffix-sorting algorithm.
+ - Added OpenMP support.
+ - Added 64-bit version of divsufsort.
+
Changes in release 1.2.3
- Bug fixes.
diff --git a/README b/README
index 36e95fd..8bb4ef0 100644
--- a/README
+++ b/README
@@ -4,13 +4,16 @@ libdivsufsort - A lightweight suffix-sorting library.
Introduction:
-------------
-libdivsufsort is an open-source library that provides a high
-performance and lightweight suffix-sorting and BWT-construction
-algorithm. For input size n, this algorithm runs in O(n log n)
-worst-case (and average) time using only 5n bytes of memory.
+The libdivsufsort project provides a fast, lightweight, and robust
+C API library to construct the suffix array and the Burrows-Wheeler
+transformed string for any input string of a constant-size alphabet.
+
+The suffix-sorting algorithm runs in O(n log n) worst-case time
+using only 5n+O(1) bytes of memory space, where n is the length of
+the input string.
The latest version of libdivsufsort is available at:
- http://libdivsufsort.googlecode.com/
+ http://libdivsufsort.googlecode.com/
License:
@@ -30,7 +33,7 @@ APIs:
* Constructs the suffix array of a given string.
* @param T[0..n-1] The input string.
- * @param SA[0..n] The output array or suffixes.
+ * @param SA[0..n-1] The output array or suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
saint_t
@@ -39,7 +42,7 @@ APIs:
* Constructs the burrows-wheeler transformed string of a given string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
- * @param A[0..n] The temporary array. (can be NULL)
+ * @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @return The primary index if no error occurred, -1 or -2 otherwise.
saidx_t
@@ -48,7 +51,7 @@ APIs:
* Constructs the burrows-wheeler transformed string of a given string and suffix array.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
- * @param SA[0..n] The suffix array. (can be NULL)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
* @param n The length of the given string.
* @param idx The output primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
@@ -59,7 +62,7 @@ APIs:
* Inverse BW-transforms a given BWTed string.
* @param T[0..n-1] The input string.
* @param U[0..n-1] The output string. (can be T)
- * @param A[0..n] The temporary array. (can be NULL)
+ * @param A[0..n-1] The temporary array. (can be NULL)
* @param n The length of the given string.
* @param idx The primary index.
* @return 0 if no error occurred, -1 or -2 otherwise.
@@ -69,7 +72,7 @@ APIs:
* Checks the correctness of a given suffix array.
* @param T[0..n-1] The input string.
- * @param SA[0..n] The input suffix array.
+ * @param SA[0..n-1] The input suffix array.
* @param n The length of the given string.
* @param verbose The verbose mode.
* @return 0 if no error occurred.
@@ -111,10 +114,102 @@ APIs:
divsufsort_version(void);
-Benchmark results:
+Benchmark:
------------------
-http://homepage3.nifty.com/wpage/benchmark/index.html
+= Specifications =
+Processor: 2.66 GHz Intel Core 2 Duo E6750
+L1 Cache: (32 Kb + 32 Kb) x 2
+L2 Cache: 4 Mb
+RAM: 2 Gb main memory
+Operating system: Windows XP Home SP 3 (with Cygwin)
+Compiler: GCC version 4.3.1
+
+= Programs =
+Archon4r0 kvark's sorting algorithm http://forum.compression.ru/viewtopic.php?t=352
+BPR Bucket-Pointer Refinement algorithm http://bibiserv.techfak.uni-bielefeld.de/bpr/
+DC Difference-Cover algorithm (v = 32) http://www.cs.helsinki.fi/juha.karkkainen/publications/cpm03.tar.gz
+DS Deep-Shallow sorting algorithm http://www.mfn.unipmn.it/~manzini/lightweight/
+divsufsort1 libdivsufsort version 1.2.3 http://libdivsufsort.googlecode.com/
+divsufsort2 libdivsufsort version 2.0.0 http://libdivsufsort.googlecode.com/
+KA Ko-Aluru algorithm http://ko.pang.cn.googlepages.com/software2
+KS Kärkkäinen-Sanders algorithm http://www.mpi-inf.mpg.de/~sanders/programs/suffix/
+MSufSort3 MSufSort version 3.1.1 beta http://www.michael-maniscalco.com/msufsort.htm
+qsufsort Larsson-Sadakane algorithm http://www.larsson.dogma.net/research.html
+sais Induced Sorting algorithm http://yuta.256.googlepages.com/sais
+
+All programs were compiled with gcc/g++ using '-O3 -fomit-frame-pointer -DNDEBUG'
+optimization options. The times are the average of five runs, in seconds, and were
+measured using the standard Unix/Cygwin 'time' command. (user + system) The spaces
+were measured using the 'memusage' command.
+
+= Testfiles =
+Manzini's Large Corpus http://www.mfn.unipmn.it/~manzini/lightweight/corpus/
+The Gauntlet http://www.michael-maniscalco.com/testset/gauntlet/
+
+= Running times =
+
+== Manzini's Corpus ==
+Files Size Archon4r0 BPR DC DS divsufsort1 divsufsort2 KA KS MSufSort3 qsufsort sais
+chr22.dna 34553758 6.030 6.196 22.694 7.514 5.404 5.362 16.980 50.006 7.132 10.642 10.796
+etext99 105277340 22.160 32.582 79.872 34.264 18.758 18.064 73.236 202.684 24.106 56.612 38.748
+gcc-3.0.tar 86630400 13.856 20.692 61.690 35.822 10.382 10.084 40.908 135.174 14.952 40.766 20.990
+howto 39422105 5.806 8.326 25.432 8.288 5.472 5.320 20.694 64.834 5.672 16.366 11.388
+jdk13c 69728899 18.106 22.252 61.234 32.182 9.260 9.010 34.172 101.096 11.314 39.792 16.396
+linux-2.4.5.tar 116254720 18.174 26.226 82.830 25.912 14.672 14.290 58.586 194.412 19.890 54.054 29.614
+rctail96 114711151 32.490 55.826 119.026 62.502 18.500 17.914 70.072 190.562 21.060 70.456 33.248
+rfc 116421901 20.736 35.404 91.284 29.666 16.116 15.658 64.390 196.500 17.936 61.436 32.224
+sprot34.dat 109617186 22.832 36.720 93.122 32.096 17.894 17.404 68.084 187.594 23.352 56.946 34.092
+w3c2 104201579 27.264 29.384 89.352 54.682 13.866 13.486 52.660 162.582 17.090 77.804 25.498
+totals 896819039 187.454 273.608 726.536 322.928 130.324 126.592 499.782 1485.444 162.504 484.874 252.994
+
+== The Gauntlet ==
+Files Size Archon4r0 BPR DC DS divsufsort1 divsufsort2 KA KS MSufSort3 qsufsort sais
+abac 200000 0.044 0.064 0.104 27.914 0.042 0.036 0.058 0.048 0.050 0.062 0.044
+abba 10500600 3.270 5.124 10.766 30.702 1.714 1.602 2.570 7.952 3.514 15.272 1.460
+book1x20 15375420 4.392 3.530 13.872 97.468 2.312 2.154 7.442 15.756 3.542 22.376 3.912
+fib_s14930352 14930352 12.728 10.830 18.524 179.040 3.638 3.588 3.544 10.232 6.700 18.224 2.542
+fss10 12078908 11.390 8.974 15.130 85.328 2.828 2.824 3.344 8.646 4.618 14.754 2.076
+fss9 2851443 1.002 1.210 1.644 5.256 0.410 0.416 0.618 1.290 0.554 2.836 0.336
+houston 3840000 0.344 0.708 2.226 118.960 0.118 0.128 0.520 0.744 0.242 1.230 0.238
+paper5x80 981924 0.110 0.154 0.454 0.806 0.092 0.090 0.210 0.256 0.144 0.448 0.110
+test1 2097152 0.332 2.132 1.108 8.680 0.268 0.280 0.376 1.066 1.302 2.762 0.202
+test2 2097152 0.710 0.616 1.110 8.682 0.180 0.176 0.374 1.076 3.354 2.768 0.206
+test3 2097152 0.488 213.154 1.164 1.772 0.220 0.226 0.388 1.082 0.922 3.246 0.212
+totals 67050103 34.810 246.496 66.102 564.608 11.822 11.520 19.444 48.148 24.942 83.978 11.338
+
+= Space (in MiBytes) =
+
+== Manzini's Corpus ==
+Files Size Archon4r0 BPR DC DS divsufsort1 divsufsort2 KA KS MSufSort3 qsufsort sais
+chr22.dna 34553758 174.66 296.88 193.60 165.18 165.02 165.02 289.97 428.39 199.72 263.62 164.77
+etext99 105277340 531.13 915.48 589.85 503.23 502.25 502.25 907.34 1305.20 604.45 803.20 502.00
+gcc-3.0.tar 86630400 437.14 756.43 485.38 415.87 413.34 413.34 709.50 1074.01 497.79 660.94 413.09
+howto 39422105 199.20 367.53 220.88 188.45 188.23 188.23 331.54 488.75 227.67 300.77 187.98
+jdk13c 69728899 351.96 603.99 390.68 333.40 332.74 332.74 609.71 864.48 401.04 531.99 332.49
+linux-2.4.5.tar 116254720 586.46 1061.83 651.36 555.76 554.60 554.60 977.81 1441.30 667.39 886.95 554.35
+rctail96 114711151 578.68 987.64 642.71 548.32 547.24 547.24 1004.98 1422.16 658.43 875.18 546.99
+rfc 116421901 587.30 1005.85 652.29 556.53 555.39 555.39 956.52 1443.37 668.26 888.23 555.14
+sprot34.dat 109617186 553.01 941.95 614.17 524.03 522.95 522.95 930.06 1359.01 629.26 836.31 522.70
+w3c2 104201579 525.71 958.37 583.82 498.09 497.12 497.12 912.00 1291.87 598.82 795.00 496.87
+totals 896819039 4525.25 7895.95 5024.74 4288.86 4278.88 4278.88 7629.43 11118.54 5152.83 6842.19 4276.38
+mean - 5.29 9.23 5.88 5.01 5.00 5.00 8.92 13.00 6.02 8.00 5.00
+
+== The Gauntlet ==
+Files Size Archon4r0 BPR DC DS divsufsort1 divsufsort2 KA KS MSufSort3 qsufsort sais
+abac 200000 1.51 1.73 1.12 0.98 1.21 1.20 1.75 2.48 3.15 1.53 0.95
+abba 10500600 53.43 90.19 58.83 50.21 50.32 50.32 86.20 130.18 62.09 80.11 50.07
+book1x20 15375420 78.00 134.00 86.15 73.52 73.57 73.57 132.42 190.62 89.99 117.31 73.32
+fib_s14930352 14930352 75.75 128.15 83.65 71.71 71.44 71.44 117.16 185.10 87.43 113.91 71.19
+fss10 12078908 61.38 103.68 67.68 58.05 57.85 57.85 107.05 149.75 71.12 92.16 57.60
+fss9 2851443 14.87 24.48 15.98 13.71 13.85 13.85 25.27 35.35 18.32 21.76 13.60
+houston 3840000 19.85 36.96 21.52 18.46 18.56 18.56 28.79 47.58 23.98 29.30 18.31
+paper5x80 981924 5.45 11.40 5.50 4.72 4.93 4.93 8.59 12.17 7.63 7.49 4.68
+test1 2097152 11.07 82.00 11.75 10.10 10.25 10.25 18.34 25.99 14.01 16.00 10.00
+test2 2097152 11.07 82.00 11.75 10.10 10.25 10.25 18.34 25.99 14.01 16.00 10.00
+test3 2097152 11.07 82.00 11.75 10.05 10.25 10.25 18.34 26.00 14.63 16.00 10.12
+totals 67050103 343.45 776.59 375.68 321.61 322.48 322.47 562.25 831.21 406.36 511.57 319.84
+mean - 5.37 12.14 5.88 5.03 5.04 5.04 8.79 13.00 6.35 8.00 5.00
Algorithm:
diff --git a/include/divsufsort.h.cmake b/include/divsufsort.h.cmake
index a3b8968..bcaba7c 100644
--- a/include/divsufsort.h.cmake
+++ b/include/divsufsort.h.cmake
@@ -96,14 +96,30 @@ const char *
divsufsort@W64BIT@_version(void);
-/* Burrows-Wheeler transform. */
+/**
+ * Constructs the burrows-wheeler transformed string of a given string and suffix array.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param SA[0..n-1] The suffix array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The output primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
DIVSUFSORT_API
saint_t
bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
saidx@W64BIT@_t *SA /* can NULL */,
saidx@W64BIT@_t n, saidx@W64BIT@_t *idx);
-/* Inverse Burrows-Wheeler transform. */
+/**
+ * Inverse BW-transforms a given BWTed string.
+ * @param T[0..n-1] The input string.
+ * @param U[0..n-1] The output string. (can be T)
+ * @param A[0..n-1] The temporary array. (can be NULL)
+ * @param n The length of the given string.
+ * @param idx The primary index.
+ * @return 0 if no error occurred, -1 or -2 otherwise.
+ */
DIVSUFSORT_API
saint_t
inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
@@ -113,7 +129,7 @@ inverse_bw_transform@W64BIT@(const sauchar_t *T, sauchar_t *U,
/**
* Checks the correctness of a given suffix array.
* @param T[0..n-1] The input string.
- * @param SA[0..n] The input suffix array.
+ * @param SA[0..n-1] The input suffix array.
* @param n The length of the given string.
* @param verbose The verbose mode.
* @return 0 if no error occurred.
@@ -122,8 +138,17 @@ DIVSUFSORT_API
saint_t
sufcheck@W64BIT@(const sauchar_t *T, const saidx@W64BIT@_t *SA, saidx@W64BIT@_t n, saint_t verbose);
-
-/* Search for the pattern P in the string T. */
+/**
+ * Search for the pattern P in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param P[0..Psize-1] The input pattern string.
+ * @param Psize The length of the given pattern string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
DIVSUFSORT_API
saidx@W64BIT@_t
sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
@@ -131,7 +156,16 @@ sa_search@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
const saidx@W64BIT@_t *SA, saidx@W64BIT@_t SAsize,
saidx@W64BIT@_t *left);
-/* Search for the character c in the string T. */
+/**
+ * Search for the character c in the string T.
+ * @param T[0..Tsize-1] The input string.
+ * @param Tsize The length of the given string.
+ * @param SA[0..SAsize-1] The input suffix array.
+ * @param SAsize The length of the given suffix array.
+ * @param c The input character.
+ * @param idx The output index.
+ * @return The count of matches if no error occurred, -1 otherwise.
+ */
DIVSUFSORT_API
saidx@W64BIT@_t
sa_simplesearch@W64BIT@(const sauchar_t *T, saidx@W64BIT@_t Tsize,
diff --git a/pkgconfig/CMakeLists.txt b/pkgconfig/CMakeLists.txt
index 06470be..1f2f8bc 100644
--- a/pkgconfig/CMakeLists.txt
+++ b/pkgconfig/CMakeLists.txt
@@ -1,4 +1,5 @@
## generate libdivsufsort.pc ##
+set(prefix "${CMAKE_INSTALL_PREFIX}")
set(W64BIT "")
configure_file("${CMAKE_CURRENT_SOURCE_DIR}/libdivsufsort.pc.cmake" "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" @ONLY)
install(FILES "${CMAKE_CURRENT_BINARY_DIR}/libdivsufsort.pc" DESTINATION lib/pkgconfig)
diff --git a/pkgconfig/libdivsufsort.pc.cmake b/pkgconfig/libdivsufsort.pc.cmake
index 1645a8b..84f2272 100644
--- a/pkgconfig/libdivsufsort.pc.cmake
+++ b/pkgconfig/libdivsufsort.pc.cmake
@@ -1,4 +1,4 @@
-prefix=@CMAKE_INSTALL_PREFIX@
+prefix=@prefix@
exec_prefix=${prefix}
libdir=${exec_prefix}/lib
includedir=${prefix}/include