cskmeans/ClusterMetrics.cs

92 lines
3.9 KiB
C#

using System;
using System.Linq;
public static class ClusterMetrics
{
// Easiest math-heavy source I could find:
// https://en.wikipedia.org/wiki/Silhouette_(clustering)
public static double Silhouette(this (int[] clusters, double[][] data) pair, string hint = "?")
{
var clusters = pair.clusters;
var datat = pair.data.Transpose();
var K = clusters.Max() + 1;
var nSamples = clusters.Length;
var kSize = new int[K];
var ais = new double[nSamples];
var bis = new double[nSamples];
var sis = new double[nSamples];
for (int s = 0; s < nSamples; s++)
kSize[clusters[s]] += 1;
for (int i = 0; i < nSamples; i++)
if (kSize[clusters[i]] > 1)
{
double distSum = 0;
for (int j = 0; j < nSamples; j++)
if (i != j && clusters[i] == clusters[j])
distSum += (datat[i], datat[j]).EuclideanDistance();
ais[i] = distSum / (kSize[clusters[i]] - 1);
if (i % 100 == 0)
{
Console.Write("\x1b[2K");
Console.Write("> Silhouette " + hint + " a[" + i + "]" + " " + Math.Round(100d / 2 * i / nSamples, 0) + "% " + DateTime.Now);
Console.Write("\r");
}
}
for (int i = 0; i < nSamples; i++)
if (kSize[clusters[i]] > 1)
{
int I = clusters[i];
double[] ds = new double[K];
for (int j = 0; j < nSamples; j++)
if (clusters[j] != I)
ds[clusters[j]] += (datat[i], datat[j]).EuclideanDistance();
double[] JS = new double[K];
for (int k = 0; k < K; k++)
JS[k] = ds[k] / kSize[k];
JS[I] = double.PositiveInfinity;
bis[i] = JS.Min();
if (i % 100 == 0)
{
Console.Write("\x1b[2K");
Console.Write("> Silhouette " + hint + " b[" + i + "]" + " " + Math.Round(100d / 2 * i / nSamples + (100 / 2), 0) + "% " + DateTime.Now);
Console.Write("\r");
}
}
for (int i = 0; i < nSamples; i++)
{
if (ais[i] == bis[i])
sis[i] = 0;
else if (ais[i] < bis[i])
sis[i] = 1 - ais[i] / bis[i];
else if (ais[i] > bis[i])
sis[i] = bis[i] / ais[i] - 1;
}
Console.Write("\r");
Console.Write("\x1b[2K");
// if (sis.Sum() / sis.Length == 0)
// {
// Console.WriteLine("> ais=[" + string.Join(",", ais) + "]");
// Console.WriteLine("> bis=[" + string.Join(",", bis) + "]");
// Console.WriteLine("> sis=[" + string.Join(",", sis) + "]");
// }
return sis.Sum() / sis.Length;
}
public static double DistancesToWSS(this double[] distances) => distances.Sum(x => x * x) / distances.Length;
public static double OutlierScore(this (double distance2, double wss, double sil) inpdata, double param0) =>
OutlierScores(([inpdata.distance2], inpdata.wss, inpdata.sil), param0)[0];
public static double[] OutlierScores(this (double[] distance2, double wss, double sil) inpdata, double param0)
{
var (distances, wss, sil) = inpdata;
distances = distances.Copy1D();
double adjWss = Math.Max(0, Math.Min(1, wss));
double adjSil = Math.Max(0, Math.Min(1, sil));
double ratSil = Math.Max(0, Math.Min(1, adjWss / Math.Max(0.000000001, Math.Pow(adjSil, param0))));
for (int s = 0; s < distances.Length; s++)
distances[s] = Math.Sqrt(Math.Max(0, distances[s] - adjWss));
for (int s = 0; s < distances.Length; s++)
distances[s] /= Math.Max(0.000000001, ratSil);
return distances;
}
}