MIF_E31221222/sigap-website/prisma/utils/crime-score-calculator.ts

283 lines
9.4 KiB
TypeScript

import { kmeans } from 'ml-kmeans';
interface NormalizationParams {
year: number;
crimes: { min: number; max: number; range: number };
density: { min: number; max: number; range: number };
unemployment: { min: number; max: number; range: number };
}
interface KMeansModel {
centroids: number[][];
clusters: Record<string, 'low' | 'medium' | 'high'>;
normalization?: NormalizationParams;
}
interface DistrictData {
numberOfCrimes: number;
populationDensity: number;
unemploymentRate: number;
}
export class CrimeScoreCalculator {
private kmeansModels: Record<number, KMeansModel> = {};
/**
* Runs K-means clustering on district crime data for a specific year
* @param districtData Object mapping district IDs to their crime statistics
* @param year The year for which to run clustering
* @returns True if clustering was successful, false otherwise
*/
public async runKMeansClustering(
districtData: Record<string, DistrictData>,
year: number
): Promise<boolean> {
// Convert to array format needed by kmeans library
const data: number[][] = [];
const districtIds: string[] = [];
// Extract all values for each feature to calculate statistics
const allCrimes: number[] = [];
const allDensities: number[] = [];
const allUnemployment: number[] = [];
// First pass: collect all values
for (const [districtId, values] of Object.entries(districtData)) {
allCrimes.push(values.numberOfCrimes);
allDensities.push(values.populationDensity);
allUnemployment.push(values.unemploymentRate);
districtIds.push(districtId);
}
// Calculate statistics for normalization
// Find min and max for each feature
const crimeStats = {
min: Math.min(...allCrimes),
max: Math.max(...allCrimes),
range: 0
};
crimeStats.range = crimeStats.max - crimeStats.min || 1; // Avoid division by zero
const densityStats = {
min: Math.min(...allDensities),
max: Math.max(...allDensities),
range: 0
};
densityStats.range = densityStats.max - densityStats.min || 1;
const unemploymentStats = {
min: Math.min(...allUnemployment),
max: Math.max(...allUnemployment),
range: 0
};
unemploymentStats.range =
unemploymentStats.max - unemploymentStats.min || 1;
// Store normalization params for later prediction
const normalizationParams: NormalizationParams = {
year,
crimes: crimeStats,
density: densityStats,
unemployment: unemploymentStats
};
// Second pass: normalize using min-max scaling
for (const [districtId, values] of Object.entries(districtData)) {
// Min-max scaling: (value - min) / range -> scales to [0,1]
const normalizedCrimes =
(values.numberOfCrimes - crimeStats.min) / crimeStats.range;
const normalizedDensity =
(values.populationDensity - densityStats.min) / densityStats.range;
const normalizedUnemployment =
(values.unemploymentRate - unemploymentStats.min) /
unemploymentStats.range;
data.push([normalizedCrimes, normalizedDensity, normalizedUnemployment]);
}
if (data.length === 0) {
console.error(`❌ No data for K-means clustering for year ${year}`);
return false;
}
try {
// Run K-means with 3 clusters (low, medium, high)
const result = kmeans(data, 3, {
initialization: 'kmeans++',
maxIterations: 100
});
// Determine which cluster corresponds to which label (low, medium, high)
const clusterCentroids = result.centroids;
// Sort clusters by the sum of their centroids (higher sum = higher crime rate)
const clusterSums = clusterCentroids.map((centroid) =>
centroid.reduce((sum, val) => sum + val, 0)
);
const sortedIndices = clusterSums
.map((sum, index) => ({ sum, index }))
.sort((a, b) => a.sum - b.sum)
.map((item) => item.index);
// Map sorted indices to labels
const labelMap: Record<number, 'low' | 'medium' | 'high'> = {
[sortedIndices[0]]: 'low', // Lowest crime rate cluster
[sortedIndices[1]]: 'medium', // Middle crime rate cluster
[sortedIndices[2]]: 'high' // Highest crime rate cluster
};
console.log(
`🏙️ Year ${year} cluster levels: Low=${sortedIndices[0]}, Medium=${sortedIndices[1]}, High=${sortedIndices[2]}`
);
// Create mapping from district ID to cluster label
const clusters: Record<string, 'low' | 'medium' | 'high'> = {};
for (let i = 0; i < districtIds.length; i++) {
const clusterId = result.clusters[i];
clusters[districtIds[i]] = labelMap[clusterId];
}
// Verify that all districts have a cluster assigned
const clusterCount = Object.keys(clusters).length;
const districtCount = Object.keys(districtData).length;
if (clusterCount !== districtCount) {
console.error(
`❌ K-means clustering failed to assign clusters to all districts. Expected ${districtCount}, got ${clusterCount}`
);
return false;
}
// Store the K-means model and normalization params for this year
this.kmeansModels[year] = {
centroids: clusterCentroids,
clusters: clusters,
normalization: normalizationParams
};
return true;
} catch (error) {
console.error(
`❌ Error running K-means clustering for year ${year}:`,
error
);
return false;
}
}
/**
* Calculate security score based on crime count, population density, and unemployment
* Score ranges from 0-100, where higher score means SAFER area (more security)
* This is the opposite of the crime severity - high security = low crime severity
*/
public calculateSecurityScore(
crimeCount: number,
populationDensity: number,
unemploymentRate: number,
year: number
): number {
// Ensure inputs are valid numbers with no fallbacks
if (
isNaN(crimeCount) ||
isNaN(populationDensity) ||
isNaN(unemploymentRate)
) {
console.error(
`❌ Invalid inputs for security score calculation: crimeCount=${crimeCount}, populationDensity=${populationDensity}, unemploymentRate=${unemploymentRate}`
);
// Instead of throwing an error, use default values
crimeCount = crimeCount || 0;
populationDensity = populationDensity || 0;
unemploymentRate = unemploymentRate || 0;
console.log(
`⚠️ Using fallback values: crimeCount=${crimeCount}, populationDensity=${populationDensity}, unemploymentRate=${unemploymentRate}`
);
}
// Get the normalization params for the year
const normParams = this.kmeansModels[year]?.normalization;
if (!normParams) {
console.error(`❌ No normalization parameters found for year ${year}`);
// Return a default score instead of throwing an error
return 0; // Middle score as fallback
}
// Ensure the normalization parameters have valid ranges
if (
!normParams.crimes.range ||
!normParams.density.range ||
!normParams.unemployment.range
) {
console.error(`❌ Invalid normalization ranges for year ${year}`);
// Return a default score instead of throwing an error
return 0; // Middle score as fallback
}
// Normalize the features using min-max scaling (ensuring we don't divide by zero)
const normalizedCrimes =
(crimeCount - normParams.crimes.min) / normParams.crimes.range;
const normalizedDensity =
(populationDensity - normParams.density.min) / normParams.density.range;
const normalizedUnemployment =
(unemploymentRate - normParams.unemployment.min) /
normParams.unemployment.range;
// Custom weighting for security score (0-100):
// - Crime count has the highest impact (60%)
// - Population density has moderate impact (25%)
// - Unemployment has some impact (15%)
const crimeWeight = 0.6;
const densityWeight = 0.25;
const unemploymentWeight = 0.15;
// Calculate crime severity first
const crimeFactor = Math.pow(normalizedCrimes, 1.2); // Slightly exponential
// Calculate weighted crime severity score (higher means more severe crime situation)
const crimeSeverityScore =
crimeFactor * crimeWeight +
normalizedDensity * densityWeight +
normalizedUnemployment * unemploymentWeight;
// INVERT the score to get security score (higher means safer)
// Subtract from 1 to reverse the scale (1 = safest, 0 = least safe)
const securityScore = 1 - crimeSeverityScore;
// Scale to 0-100 range and ensure the result is always a valid integer
const finalScore = Math.min(
Math.max(Math.round(securityScore * 100), 0),
100
);
return finalScore;
}
/**
* Get the cluster level for a district in a specific year
*/
public getDistrictClusterLevel(
districtId: string,
year: number
): 'low' | 'medium' | 'high' | null {
return this.kmeansModels[year]?.clusters[districtId] || null;
}
/**
* Get all the cluster levels for a year
*/
public getYearClusters(
year: number
): Record<string, 'low' | 'medium' | 'high'> | null {
return this.kmeansModels[year]?.clusters || null;
}
/**
* Get the normalization parameters for a year
*/
public getNormalizationParams(year: number): NormalizationParams | null {
return this.kmeansModels[year]?.normalization || null;
}
}