MIF_E31221222/sigap-website/prisma/seeds/crime-incident.ts

741 lines
23 KiB
TypeScript

// prisma/seeds/CrimeIncidentsSeeder.ts
import { generateCode, generateId } from '../../app/_utils/common';
import { PrismaClient, crime_status } from '@prisma/client';
import axios from 'axios';
import { kmeans } from 'ml-kmeans';
export class CrimeIncidentsSeeder {
private mapboxToken: string;
private totalIncidentsCreated: number = 0;
private readonly MAX_INCIDENTS: number = 500;
// Store district demographic data to avoid repeated queries
private districtDemographicCache: Record<
string,
Record<
number,
{
populationDensity: number;
unemployment: number;
}
>
> = {};
// Store the k-means model for each year
private kmeansModels: Record<
number,
{
centroids: number[][];
clusters: Record<string, 'low' | 'medium' | 'high'>;
normalization?: {
year: number;
crimes: { min: number; max: number; range: number };
density: { min: number; max: number; range: number };
unemployment: { min: number; max: number; range: number };
};
}
> = {};
constructor(private prisma: PrismaClient) {
// You should store this in an environment variable
this.mapboxToken = process.env.NEXT_PUBLIC_MAPBOX_ACCESS_TOKEN || '';
}
async run(): Promise<void> {
console.log(
`Seeding crime incidents data (limited to ${this.MAX_INCIDENTS} records)...`
);
// Mendapatkan semua districts dan categories
const districts = await this.prisma.districts.findMany();
const cities = await this.prisma.cities.findMany();
const crimeCategories = await this.prisma.crime_categories.findMany();
// Pre-load all demographics data for faster access
await this.preloadDemographicData(districts);
// Menghapus data crime_incidents yang sudah ada
await this.prisma.$executeRaw`TRUNCATE TABLE "crime_incidents" CASCADE`;
await this.prisma.$executeRaw`TRUNCATE TABLE "crimes" CASCADE`;
// Seed untuk 5 tahun terakhir
const currentYear = new Date().getFullYear();
const years = [
currentYear - 4,
currentYear - 3,
currentYear - 2,
currentYear - 1,
currentYear,
];
// Fallback street names jika API gagal
const fallbackStreetNames = [
'Jalan Sudirman',
'Jalan Thamrin',
'Jalan Gatot Subroto',
'Jalan Diponegoro',
'Jalan Ahmad Yani',
'Jalan Imam Bonjol',
'Jalan Pahlawan',
'Jalan Merdeka',
'Jalan Pemuda',
'Jalan Gajah Mada',
'Jalan Hayam Wuruk',
'Jalan Veteran',
'Jalan Kartini',
'Jalan Juanda',
'Jalan Hasanudin',
'Jalan Surya Kencana',
];
// Calculate how many incidents to create per year (evenly distributed)
const incidentsPerYear = Math.floor(this.MAX_INCIDENTS / years.length);
// For each year, create crime records and incidents
for (const year of years) {
// Skip if we've already reached the limit
if (this.totalIncidentsCreated >= this.MAX_INCIDENTS) {
break;
}
// Simpan jumlah insiden per distrik dan kota
const districtCrimeCount: Record<string, number> = {};
const cityCrimeCount: Record<string, number> = {};
// Store district data for K-means clustering
const districtData: Record<
string,
{
numberOfCrimes: number;
populationDensity: number;
unemploymentRate: number;
}
> = {};
// Inisialisasi counter untuk tiap kota dan distrik
cities.forEach((city) => {
cityCrimeCount[city.id] = 0;
});
districts.forEach((district) => {
districtCrimeCount[district.id] = 0;
});
// First, create all crime records for districts
for (const district of districts) {
const city = await this.prisma.cities.findFirst({
where: {
id: district.city_id,
},
});
if (!city) {
throw new Error(`City not found for district ID: ${district.name}`);
}
const regencyCode = generateCode(city?.name);
const newCrimeId = generateId({
prefix: 'CR',
segments: {
codes: [regencyCode],
sequentialDigits: 4,
year: year,
},
format: '{prefix}-{sequence}-{codes}-{year}',
separator: '-',
randomSequence: true,
});
// Buat crime record baru
await this.prisma.crimes.create({
data: {
id: newCrimeId,
district_id: district.id,
city_id: district.city_id,
year,
number_of_crime: 0, // Akan diupdate nanti
rate: 'low', // Default rate
heat_map: this.generateHeatMap(district.id),
},
});
}
// Calculate incidents per district for this year
const incidentsPerDistrict = Math.ceil(
incidentsPerYear / districts.length
);
// Generate incidents untuk tiap district
for (const district of districts) {
// Skip if we've already reached the limit
if (this.totalIncidentsCreated >= this.MAX_INCIDENTS) {
break;
}
// Get the crime record for this district and year
const crime = await this.prisma.crimes.findFirst({
where: {
district_id: district.id,
year,
},
});
if (!crime) {
throw new Error(
`Crime record not found for district ID: ${district.name}`
);
}
// Get geographic data for the district once to use as base
const geoData = await this.prisma.geographics.findFirst({
where: { district_id: district.id },
});
// Base coordinates
const baseLatitude = geoData?.latitude || -8.0;
const baseLongitude = geoData?.longitude || 114.0;
// Cache for street names by coordinates (to reduce API calls)
const streetCache: Record<string, string> = {};
// Calculate how many incidents to create for this district
// Make sure we don't exceed the total limit
const maxIncidentsForThisDistrict = Math.min(
incidentsPerDistrict,
this.MAX_INCIDENTS - this.totalIncidentsCreated
);
for (let i = 0; i < maxIncidentsForThisDistrict; i++) {
// Pilih kategori secara acak
const randomCategory =
crimeCategories[Math.floor(Math.random() * crimeCategories.length)];
// Generate tanggal acak dalam rentang tahun ini
const startOfYear = new Date(year, 0, 1);
const endOfYear = new Date(year, 11, 31);
const randomDate = new Date(
this.getRandomNumber(startOfYear.getTime(), endOfYear.getTime())
);
// Generate waktu acak
const hours = Math.floor(this.getRandomNumber(0, 23));
const minutes = Math.floor(this.getRandomNumber(0, 59));
const randomTime = new Date(
randomDate.getFullYear(),
randomDate.getMonth(),
randomDate.getDate(),
hours,
minutes,
0
);
// Generate latitude dan longitude dengan sedikit variasi dari pusat district
const latitude = baseLatitude + this.getRandomNumber(-0.01, 0.01);
const longitude = baseLongitude + this.getRandomNumber(-0.01, 0.01);
// Generate status insiden acak
const statusOptions: crime_status[] = [
'open',
'closed',
'resolved',
'unresolved',
];
const status =
statusOptions[Math.floor(Math.random() * statusOptions.length)];
// Generate jumlah korban acak
const victimCount = Math.floor(this.getRandomNumber(0, 5));
// Generate deskripsi insiden
const descriptions = [
`Terjadi ${randomCategory.name.toLowerCase()} di daerah ${district.name}`,
`Dilaporkan kasus ${randomCategory.name.toLowerCase()} oleh warga setempat`,
`Kejadian ${randomCategory.name.toLowerCase()} melibatkan ${victimCount} korban`,
`Insiden ${randomCategory.name.toLowerCase()} terjadi pada malam hari`,
`Kasus ${randomCategory.name.toLowerCase()} sedang dalam penyelidikan`,
];
const randomDescription =
descriptions[Math.floor(Math.random() * descriptions.length)];
// Get street name from Mapbox or use fallback
let location = '';
const coordKey = `${latitude.toFixed(4)},${longitude.toFixed(4)}`;
try {
if (streetCache[coordKey]) {
location = streetCache[coordKey];
} else {
const streetName = await this.getStreetFromMapbox(
longitude,
latitude
);
location = `${streetName}`;
streetCache[coordKey] = location;
}
} catch (error) {
// Fallback to random street name if API fails
const randomStreet =
fallbackStreetNames[
Math.floor(Math.random() * fallbackStreetNames.length)
];
const randomHouseNumber = Math.floor(this.getRandomNumber(1, 200));
location = `${district.name}, ${randomStreet} No. ${randomHouseNumber}`;
console.warn(
`Failed to get street name from Mapbox: ${error}. Using fallback.`
);
}
const districtCode = generateCode(district.name);
const newCrimeIncidentId = generateId({
prefix: 'CI',
segments: {
codes: [districtCode],
sequentialDigits: 4,
year: year,
},
format: '{prefix}-{sequence}-{codes}-{year}',
separator: '-',
randomSequence: true,
});
// Insert data crime incident
await this.prisma.crime_incidents.create({
data: {
id: newCrimeIncidentId,
crime_id: crime.id,
crime_category_id: randomCategory.id,
date: randomDate,
time: randomTime,
location: location,
latitude,
longitude,
description: randomDescription,
victim_count: victimCount,
status,
},
});
// Increment counter untuk district dan city
districtCrimeCount[district.id]++;
cityCrimeCount[district.city_id]++;
this.totalIncidentsCreated++;
}
}
// Collect all district data for K-means clustering
for (const district of districts) {
const crimeCount = districtCrimeCount[district.id];
// Get demographic data for the district and year
const demographics = this.districtDemographicCache[district.id]?.[year];
const populationDensity = demographics?.populationDensity || 100; // Default if not found
const unemploymentRate = demographics?.unemployment || 5; // Default if not found
districtData[district.id] = {
numberOfCrimes: crimeCount,
populationDensity: populationDensity,
unemploymentRate: unemploymentRate,
};
}
// Run K-means clustering to classify districts
await this.runKMeansClustering(districtData, year);
// Create city crime records
for (const city of cities) {
const crimeCount = cityCrimeCount[city.id];
if (crimeCount > 0) {
const regencyCode = generateCode(city.name);
const newCrimeId = generateId({
prefix: 'CR',
segments: {
codes: [regencyCode],
sequentialDigits: 4,
year: year,
},
format: '{prefix}-{sequence}-{codes}-{year}',
separator: '-',
randomSequence: true,
});
// Get average population density and unemployment for city
const cityDistricts = districts.filter((d) => d.city_id === city.id);
let totalPopDensity = 0;
let totalUnemployment = 0;
let districtCount = 0;
for (const d of cityDistricts) {
const demographics = this.districtDemographicCache[d.id]?.[year];
if (demographics) {
totalPopDensity += demographics.populationDensity;
totalUnemployment += demographics.unemployment;
districtCount++;
}
}
const avgPopDensity =
districtCount > 0 ? totalPopDensity / districtCount : 100;
const avgUnemployment =
districtCount > 0 ? totalUnemployment / districtCount : 5;
// Determine city rate based on k-means clustering
const cityRate = this.predictClusterWithKMeans(
{
numberOfCrimes: crimeCount,
populationDensity: avgPopDensity,
unemploymentRate: avgUnemployment,
},
year
);
// Buat record untuk kota
await this.prisma.crimes.create({
data: {
id: newCrimeId,
city_id: city.id,
district_id: null,
year,
number_of_crime: crimeCount,
rate: cityRate,
heat_map: this.generateHeatMap(city.id),
},
});
}
}
// Update district crime records with correct counts and rates using k-means results
for (const district of districts) {
const crimeCount = districtCrimeCount[district.id];
// Get cluster assigned by K-means
const rate =
this.kmeansModels[year]?.clusters[district.id] ||
this.getCrimeRate(crimeCount);
await this.prisma.crimes.updateMany({
where: {
district_id: district.id,
year,
},
data: {
number_of_crime: crimeCount,
rate: rate,
},
});
}
}
console.log(
`${this.totalIncidentsCreated} crime incidents seeded (limit: ${this.MAX_INCIDENTS})`
);
}
/**
* Run K-means clustering on district data with improved normalization
*/
private async runKMeansClustering(
districtData: Record<
string,
{
numberOfCrimes: number;
populationDensity: number;
unemploymentRate: number;
}
>,
year: number
): Promise<void> {
// Convert to array format needed by kmeans library
const data: number[][] = [];
const districtIds: string[] = [];
// Extract all values for each feature to calculate statistics
const allCrimes: number[] = [];
const allDensities: number[] = [];
const allUnemployment: number[] = [];
// First pass: collect all values
for (const [districtId, values] of Object.entries(districtData)) {
allCrimes.push(values.numberOfCrimes);
allDensities.push(values.populationDensity);
allUnemployment.push(values.unemploymentRate);
districtIds.push(districtId);
}
// Calculate statistics for normalization
// Find min and max for each feature
const crimeStats = {
min: Math.min(...allCrimes),
max: Math.max(...allCrimes),
range: 0,
};
crimeStats.range = crimeStats.max - crimeStats.min || 1; // Avoid division by zero
const densityStats = {
min: Math.min(...allDensities),
max: Math.max(...allDensities),
range: 0,
};
densityStats.range = densityStats.max - densityStats.min || 1;
const unemploymentStats = {
min: Math.min(...allUnemployment),
max: Math.max(...allUnemployment),
range: 0,
};
unemploymentStats.range =
unemploymentStats.max - unemploymentStats.min || 1;
// Store normalization params for later prediction
this.normalizationParams = {
year,
crimes: crimeStats,
density: densityStats,
unemployment: unemploymentStats,
};
// Second pass: normalize using min-max scaling
for (const [districtId, values] of Object.entries(districtData)) {
// Min-max scaling: (value - min) / range -> scales to [0,1]
const normalizedCrimes =
(values.numberOfCrimes - crimeStats.min) / crimeStats.range;
const normalizedDensity =
(values.populationDensity - densityStats.min) / densityStats.range;
const normalizedUnemployment =
(values.unemploymentRate - unemploymentStats.min) /
unemploymentStats.range;
data.push([normalizedCrimes, normalizedDensity, normalizedUnemployment]);
}
if (data.length === 0) {
console.log(`No data for K-means clustering for year ${year}`);
return;
}
try {
// Run K-means with 3 clusters (low, medium, high)
const result = kmeans(data, 3, {
initialization: 'kmeans++',
maxIterations: 100,
});
// Determine which cluster corresponds to which label (low, medium, high)
const clusterCentroids = result.centroids;
// Sort clusters by the sum of their centroids (higher sum = higher crime rate)
const clusterSums = clusterCentroids.map((centroid) =>
centroid.reduce((sum, val) => sum + val, 0)
);
const sortedIndices = clusterSums
.map((sum, index) => ({ sum, index }))
.sort((a, b) => a.sum - b.sum)
.map((item) => item.index);
// Map sorted indices to labels
const labelMap: Record<number, 'low' | 'medium' | 'high'> = {
[sortedIndices[0]]: 'low',
[sortedIndices[1]]: 'medium',
[sortedIndices[2]]: 'high',
};
// Create mapping from district ID to cluster label
const clusters: Record<string, 'low' | 'medium' | 'high'> = {};
for (let i = 0; i < districtIds.length; i++) {
const clusterId = result.clusters[i];
clusters[districtIds[i]] = labelMap[clusterId];
}
// Store the K-means model and normalization params for this year
this.kmeansModels[year] = {
centroids: clusterCentroids,
clusters: clusters,
normalization: this.normalizationParams,
};
console.log(`✅ K-means clustering completed for year ${year}`);
} catch (error) {
console.error(
`Error running K-means clustering for year ${year}:`,
error
);
// Fall back to simple classification if K-means fails
}
}
/**
* Predict cluster for new data point using existing K-means model with improved normalization
*/
private predictClusterWithKMeans(
dataPoint: {
numberOfCrimes: number;
populationDensity: number;
unemploymentRate: number;
},
year: number
): 'low' | 'medium' | 'high' {
// If no model exists for this year, fall back to simple classification
if (!this.kmeansModels[year]) {
return this.getCrimeRate(dataPoint.numberOfCrimes);
}
// Get normalization parameters for this year
const normParams = this.kmeansModels[year].normalization;
if (!normParams) {
// Fallback to original method if normalization params aren't available
return this.getCrimeRate(dataPoint.numberOfCrimes);
}
// Normalize the data point using the same parameters as during training
const normalizedPoint = [
(dataPoint.numberOfCrimes - normParams.crimes.min) /
normParams.crimes.range,
(dataPoint.populationDensity - normParams.density.min) /
normParams.density.range,
(dataPoint.unemploymentRate - normParams.unemployment.min) /
normParams.unemployment.range,
];
// Find closest centroid
let minDistance = Infinity;
let closestClusterIndex = 0;
this.kmeansModels[year].centroids.forEach((centroid, index) => {
// Calculate Euclidean distance
const distance = Math.sqrt(
centroid.reduce(
(sum, val, i) => sum + Math.pow(val - normalizedPoint[i], 2),
0
)
);
if (distance < minDistance) {
minDistance = distance;
closestClusterIndex = index;
}
});
// Map from cluster index to label based on centroid sums
const clusterSums = this.kmeansModels[year].centroids.map((centroid) =>
centroid.reduce((sum, val) => sum + val, 0)
);
const sortedIndices = clusterSums
.map((sum, index) => ({ sum, index }))
.sort((a, b) => a.sum - b.sum)
.map((item) => item.index);
// Map sorted indices to labels
const labelMap: Record<number, 'low' | 'medium' | 'high'> = {
[sortedIndices[0]]: 'low',
[sortedIndices[1]]: 'medium',
[sortedIndices[2]]: 'high',
};
return labelMap[closestClusterIndex];
}
// Add this to the class properties
private normalizationParams: {
year: number;
crimes: { min: number; max: number; range: number };
density: { min: number; max: number; range: number };
unemployment: { min: number; max: number; range: number };
} | null = null;
/**
* Preload demographic data for all districts and years
*/
private async preloadDemographicData(districts: any[]): Promise<void> {
console.log('Preloading demographic data...');
for (const district of districts) {
this.districtDemographicCache[district.id] = {};
const demographics = await this.prisma.demographics.findMany({
where: { district_id: district.id },
});
for (const demo of demographics) {
// Ensure populationDensity is properly retrieved
const populationDensity = demo.population_density || 0;
this.districtDemographicCache[district.id][demo.year] = {
populationDensity: populationDensity,
unemployment: demo.number_of_unemployed || 5,
};
}
}
console.log('Demographic data preloaded');
}
/**
* Get street name from Mapbox API based on coordinates
*/
private async getStreetFromMapbox(lng: number, lat: number): Promise<string> {
try {
const response = await axios.get(
`https://api.mapbox.com/search/geocode/v6/reverse?longitude=${lng}&latitude=${lat}&access_token=${this.mapboxToken}`
);
if (
response.data &&
response.data.features &&
response.data.features.length > 0
) {
// Extract full_address from the first feature
const fullAddress = response.data.features[0].properties.full_address;
return (
fullAddress ||
`Jalan Tidak Diketahui No. ${Math.floor(this.getRandomNumber(1, 100))}`
);
}
// Fallback if no address found
return `Jalan Tidak Diketahui No. ${Math.floor(this.getRandomNumber(1, 100))}`;
} catch (error) {
console.error('Error fetching street from Mapbox:', error);
throw error;
}
}
private getRandomNumber(min: number, max: number): number {
return Math.random() * (max - min) + min;
}
/**
* Original simple version (kept for fallback)
*/
private getCrimeRate(numberOfCrimes: number): 'low' | 'medium' | 'high' {
// Simple logic for crime rate
if (numberOfCrimes < 10) return 'low';
if (numberOfCrimes < 30) return 'medium';
return 'high';
}
private generateHeatMap(id: string): any {
// Generate heat map dummy sebagai JSON
// Contoh: array koordinat dengan intensitas
const heatMapPoints = [];
const numPoints = Math.floor(this.getRandomNumber(5, 20));
for (let i = 0; i < numPoints; i++) {
heatMapPoints.push({
lat: this.getRandomNumber(-7.5, -8.5), // Kisaran latitude untuk Jember
lng: this.getRandomNumber(113.5, 114.5), // Kisaran longitude untuk Jember
intensity: this.getRandomNumber(1, 10),
});
}
return {
id: id,
points: heatMapPoints,
};
}
}