92 lines
3.9 KiB
C#
92 lines
3.9 KiB
C#
using System;
|
|
using System.Linq;
|
|
|
|
public static class ClusterMetrics
|
|
{
|
|
// Easiest math-heavy source I could find:
|
|
// https://en.wikipedia.org/wiki/Silhouette_(clustering)
|
|
public static double Silhouette(this (int[] clusters, double[][] data) pair, string hint = "?")
|
|
{
|
|
var clusters = pair.clusters;
|
|
var datat = pair.data.Transpose();
|
|
var K = clusters.Max() + 1;
|
|
var nSamples = clusters.Length;
|
|
var kSize = new int[K];
|
|
var ais = new double[nSamples];
|
|
var bis = new double[nSamples];
|
|
var sis = new double[nSamples];
|
|
for (int s = 0; s < nSamples; s++)
|
|
kSize[clusters[s]] += 1;
|
|
for (int i = 0; i < nSamples; i++)
|
|
if (kSize[clusters[i]] > 1)
|
|
{
|
|
double distSum = 0;
|
|
for (int j = 0; j < nSamples; j++)
|
|
if (i != j && clusters[i] == clusters[j])
|
|
distSum += (datat[i], datat[j]).EuclideanDistance();
|
|
ais[i] = distSum / (kSize[clusters[i]] - 1);
|
|
if (i % 100 == 0)
|
|
{
|
|
Console.Write("\x1b[2K");
|
|
Console.Write("> Silhouette " + hint + " a[" + i + "]" + " " + Math.Round(100d / 2 * i / nSamples, 0) + "% " + DateTime.Now);
|
|
Console.Write("\r");
|
|
}
|
|
}
|
|
for (int i = 0; i < nSamples; i++)
|
|
if (kSize[clusters[i]] > 1)
|
|
{
|
|
int I = clusters[i];
|
|
double[] ds = new double[K];
|
|
for (int j = 0; j < nSamples; j++)
|
|
if (clusters[j] != I)
|
|
ds[clusters[j]] += (datat[i], datat[j]).EuclideanDistance();
|
|
double[] JS = new double[K];
|
|
for (int k = 0; k < K; k++)
|
|
JS[k] = ds[k] / kSize[k];
|
|
JS[I] = double.PositiveInfinity;
|
|
bis[i] = JS.Min();
|
|
if (i % 100 == 0)
|
|
{
|
|
Console.Write("\x1b[2K");
|
|
Console.Write("> Silhouette " + hint + " b[" + i + "]" + " " + Math.Round(100d / 2 * i / nSamples + (100 / 2), 0) + "% " + DateTime.Now);
|
|
Console.Write("\r");
|
|
}
|
|
}
|
|
for (int i = 0; i < nSamples; i++)
|
|
{
|
|
if (ais[i] == bis[i])
|
|
sis[i] = 0;
|
|
else if (ais[i] < bis[i])
|
|
sis[i] = 1 - ais[i] / bis[i];
|
|
else if (ais[i] > bis[i])
|
|
sis[i] = bis[i] / ais[i] - 1;
|
|
}
|
|
Console.Write("\r");
|
|
Console.Write("\x1b[2K");
|
|
// if (sis.Sum() / sis.Length == 0)
|
|
// {
|
|
// Console.WriteLine("> ais=[" + string.Join(",", ais) + "]");
|
|
// Console.WriteLine("> bis=[" + string.Join(",", bis) + "]");
|
|
// Console.WriteLine("> sis=[" + string.Join(",", sis) + "]");
|
|
// }
|
|
return sis.Sum() / sis.Length;
|
|
}
|
|
|
|
public static double DistancesToWSS(this double[] distances) => distances.Sum(x => x * x) / distances.Length;
|
|
|
|
public static double OutlierScore(this (double distance2, double wss, double sil) inpdata, double param0) =>
|
|
OutlierScores(([inpdata.distance2], inpdata.wss, inpdata.sil), param0)[0];
|
|
public static double[] OutlierScores(this (double[] distance2, double wss, double sil) inpdata, double param0)
|
|
{
|
|
var (distances, wss, sil) = inpdata;
|
|
distances = distances.Copy1D();
|
|
double adjWss = Math.Max(0, Math.Min(1, wss));
|
|
double adjSil = Math.Max(0, Math.Min(1, sil));
|
|
double ratSil = Math.Max(0, Math.Min(1, adjWss / Math.Max(0.000000001, Math.Pow(adjSil, param0))));
|
|
for (int s = 0; s < distances.Length; s++)
|
|
distances[s] = Math.Sqrt(Math.Max(0, distances[s] - adjWss));
|
|
for (int s = 0; s < distances.Length; s++)
|
|
distances[s] /= Math.Max(0.000000001, ratSil);
|
|
return distances;
|
|
}
|
|
} |