summaryrefslogtreecommitdiffstats
path: root/jni/filters/kmeans.h
diff options
context:
space:
mode:
Diffstat (limited to 'jni/filters/kmeans.h')
-rw-r--r--jni/filters/kmeans.h232
1 files changed, 232 insertions, 0 deletions
diff --git a/jni/filters/kmeans.h b/jni/filters/kmeans.h
new file mode 100644
index 000000000..24506058a
--- /dev/null
+++ b/jni/filters/kmeans.h
@@ -0,0 +1,232 @@
+/*
+ * Copyright (C) 2012 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef KMEANS_H
+#define KMEANS_H
+
+#include <cstdlib>
+#include <math.h>
+
+// Helper functions
+
+template <typename T, typename N>
+inline void sum(T values[], int len, int dimension, int stride, N dst[]) {
+ int x, y;
+ // zero out dst vector
+ for (x = 0; x < dimension; x++) {
+ dst[x] = 0;
+ }
+ for (x = 0; x < len; x+= stride) {
+ for (y = 0; y < dimension; y++) {
+ dst[y] += values[x + y];
+ }
+ }
+}
+
+template <typename T, typename N>
+inline void set(T val1[], N val2[], int dimension) {
+ int x;
+ for (x = 0; x < dimension; x++) {
+ val1[x] = val2[x];
+ }
+}
+
+template <typename T, typename N>
+inline void add(T val[], N dst[], int dimension) {
+ int x;
+ for (x = 0; x < dimension; x++) {
+ dst[x] += val[x];
+ }
+}
+
+template <typename T, typename N>
+inline void divide(T dst[], N divisor, int dimension) {
+ int x;
+ if (divisor == 0) {
+ return;
+ }
+ for (x = 0; x < dimension; x++) {
+ dst[x] /= divisor;
+ }
+}
+
+/**
+ * Calculates euclidean distance.
+ */
+
+template <typename T, typename N>
+inline N euclideanDist(T val1[], T val2[], int dimension) {
+ int x;
+ N sum = 0;
+ for (x = 0; x < dimension; x++) {
+ N diff = (N) val1[x] - (N) val2[x];
+ sum += diff * diff;
+ }
+ return sqrt(sum);
+}
+
+// K-Means
+
+
+/**
+ * Picks k random starting points from the data set.
+ */
+template <typename T>
+void initialPickHeuristicRandom(int k, T values[], int len, int dimension, int stride, T dst[],
+ unsigned int seed) {
+ int x, z, num_vals, cntr;
+ num_vals = len / stride;
+ cntr = 0;
+ srand(seed);
+ unsigned int r_vals[k];
+ unsigned int r;
+
+ for (x = 0; x < k; x++) {
+
+ // ensure randomly chosen value is unique
+ int r_check = 0;
+ while (r_check == 0) {
+ r = (unsigned int) rand() % num_vals;
+ r_check = 1;
+ for (z = 0; z < x; z++) {
+ if (r == r_vals[z]) {
+ r_check = 0;
+ }
+ }
+ }
+ r_vals[x] = r;
+ r *= stride;
+
+ // set dst to be randomly chosen value
+ set<T,T>(dst + cntr, values + r, dimension);
+ cntr += stride;
+ }
+}
+
+/**
+ * Finds index of closet centroid to a value
+ */
+template <typename T, typename N>
+inline int findClosest(T values[], T oldCenters[], int dimension, int stride, int pop_size) {
+ int best_ind = 0;
+ N best_len = euclideanDist <T, N>(values, oldCenters, dimension);
+ int y;
+ for (y = stride; y < pop_size; y+=stride) {
+ N l = euclideanDist <T, N>(values, oldCenters + y, dimension);
+ if (l < best_len) {
+ best_len = l;
+ best_ind = y;
+ }
+ }
+ return best_ind;
+}
+
+/**
+ * Calculates new centroids by averaging value clusters for old centroids.
+ */
+template <typename T, typename N>
+int calculateNewCentroids(int k, T values[], int len, int dimension, int stride, T oldCenters[],
+ T dst[]) {
+ int x, pop_size;
+ pop_size = k * stride;
+ int popularities[k];
+ N tmp[pop_size];
+
+ //zero popularities
+ memset(popularities, 0, sizeof(int) * k);
+ // zero dst, and tmp
+ for (x = 0; x < pop_size; x++) {
+ tmp[x] = 0;
+ }
+
+ // put summation for each k in tmp
+ for (x = 0; x < len; x+=stride) {
+ int best = findClosest<T, N>(values + x, oldCenters, dimension, stride, pop_size);
+ add<T, N>(values + x, tmp + best, dimension);
+ popularities[best / stride]++;
+
+ }
+
+ int ret = 0;
+ int y;
+ // divide to get centroid and set dst to result
+ for (x = 0; x < pop_size; x+=stride) {
+ divide<N, int>(tmp + x, popularities[x / stride], dimension);
+ for (y = 0; y < dimension; y++) {
+ if ((dst + x)[y] != (T) ((tmp + x)[y])) {
+ ret = 1;
+ }
+ }
+ set(dst + x, tmp + x, dimension);
+ }
+ return ret;
+}
+
+template <typename T, typename N>
+void runKMeansWithPicks(int k, T finalCentroids[], T values[], int len, int dimension, int stride,
+ int iterations, T initialPicks[]){
+ int k_len = k * stride;
+ int x;
+
+ // zero newCenters
+ for (x = 0; x < k_len; x++) {
+ finalCentroids[x] = 0;
+ }
+
+ T * c1 = initialPicks;
+ T * c2 = finalCentroids;
+ T * temp;
+ int ret = 1;
+ for (x = 0; x < iterations; x++) {
+ ret = calculateNewCentroids<T, N>(k, values, len, dimension, stride, c1, c2);
+ temp = c1;
+ c1 = c2;
+ c2 = temp;
+ if (ret == 0) {
+ x = iterations;
+ }
+ }
+ set<T, T>(finalCentroids, c1, dimension);
+}
+
+/**
+ * Runs the k-means algorithm on dataset values with some initial centroids.
+ */
+template <typename T, typename N>
+void runKMeans(int k, T finalCentroids[], T values[], int len, int dimension, int stride,
+ int iterations, unsigned int seed){
+ int k_len = k * stride;
+ T initialPicks [k_len];
+ initialPickHeuristicRandom<T>(k, values, len, dimension, stride, initialPicks, seed);
+
+ runKMeansWithPicks<T, N>(k, finalCentroids, values, len, dimension, stride,
+ iterations, initialPicks);
+}
+
+/**
+ * Sets each value in values to the closest centroid.
+ */
+template <typename T, typename N>
+void applyCentroids(int k, T centroids[], T values[], int len, int dimension, int stride) {
+ int x, pop_size;
+ pop_size = k * stride;
+ for (x = 0; x < len; x+= stride) {
+ int best = findClosest<T, N>(values + x, centroids, dimension, stride, pop_size);
+ set<T, T>(values + x, centroids + best, dimension);
+ }
+}
+
+#endif // KMEANS_H