diff --git a/README.md b/README.md
index a691fb6d7..4aa012001 100644
--- a/README.md
+++ b/README.md
@@ -201,10 +201,13 @@ Perfect for developers, researchers, and businesses who need clean, structured g
| Countries | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| States | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Cities | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
+| **Sublocalities** 🆕 | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ |
| Country+States | ✅ | NA | NA | NA | NA | NA | NA | NA | NA |
| Country+Cities | ✅ | NA | NA | NA | NA | NA | NA | NA | NA |
| Country+State+Cities/World | ✅ | ✅ | ✅ | ✅ | ✅ | ✅ | NA | NA | NA |
+**New:** Sublocalities (neighborhoods, districts, areas within cities) are now available as a separate dataset with proper parent-city relationships. See [SUBLOCALITIES.md](docs/SUBLOCALITIES.md) for details.
+
## Demo
@@ -217,9 +220,12 @@ Total Sub Regions : 22
Total Countries : 250
Total States/Regions/Municipalities : 5,038
Total Cities/Towns/Districts : 151,024
+**Total Sublocalities/Neighborhoods : 0** 🆕
Last Updated On : 13th Oct 2025
+> **Note:** The sublocalities feature is newly added to help properly categorize neighborhoods, districts, and areas within cities (such as Bandra in Mumbai, Manhattan in New York). See [docs/SUBLOCALITIES.md](docs/SUBLOCALITIES.md) for details on how to contribute sub-locality data.
+
## Import MongoDB
How to import MongoDB database?
diff --git a/bin/Commands/ExportCsv.php b/bin/Commands/ExportCsv.php
index 420be775f..07513930a 100644
--- a/bin/Commands/ExportCsv.php
+++ b/bin/Commands/ExportCsv.php
@@ -17,6 +17,7 @@ class ExportCsv extends Command
'countries' => ['from' => '/json/countries.json', 'to' => '/csv/countries.csv'],
'states' => ['from' => '/json/states.json', 'to' => '/csv/states.csv'],
'cities' => ['from' => '/json/cities.json', 'to' => '/csv/cities.csv'],
+ 'sublocalities' => ['from' => '/json/sublocalities.json', 'to' => '/csv/sublocalities.csv'],
'regions' => ['from' => '/json/regions.json', 'to' => '/csv/regions.csv'],
'subregions' => ['from' => '/json/subregions.json', 'to' => '/csv/subregions.csv'],
];
@@ -25,6 +26,7 @@ class ExportCsv extends Command
'countries' => ['from' => '/json/countries.json', 'place_type' => 'country'],
'states' => ['from' => '/json/states.json', 'place_type' => 'state'],
'cities' => ['from' => '/json/cities.json', 'place_type' => 'city'],
+ 'sublocalities' => ['from' => '/json/sublocalities.json', 'place_type' => 'sublocality'],
'regions' => ['from' => '/json/regions.json', 'place_type' => 'region'],
'subregions' => ['from' => '/json/subregions.json', 'place_type' => 'subregion'],
];
diff --git a/bin/Commands/ExportJson.php b/bin/Commands/ExportJson.php
index 5ed15bf19..b3875da63 100644
--- a/bin/Commands/ExportJson.php
+++ b/bin/Commands/ExportJson.php
@@ -43,6 +43,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$k = 0; // countries-states-cities && countries-states
$l = 0;
$m = 0; // countries
+ $n = 0; // sublocalities
$countriesArray = array();
$statesArray = array();
@@ -52,6 +53,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$countryStateCityArray = array();
$regionsArray = array();
$subregionsArray = array();
+ $sublocalitiesArray = array();
$stateNamesArray = array();
$cityNamesArray = array();
@@ -273,11 +275,35 @@ protected function execute(InputInterface $input, OutputInterface $output): int
}
}
+ // Fetching All Sublocalities
+ $sql = "SELECT * FROM sublocalities ORDER BY name";
+ $result = $db->query($sql);
+ if ($result->num_rows > 0) {
+ while ($row = $result->fetch_assoc()) {
+ // Pushing it into Fresh Array
+ $sublocalitiesArray[$n]['id'] = (int)$row['id'];
+ $sublocalitiesArray[$n]['name'] = $row['name'];
+ $sublocalitiesArray[$n]['city_id'] = (int)$row['city_id'];
+ $sublocalitiesArray[$n]['state_id'] = (int)$row['state_id'];
+ $sublocalitiesArray[$n]['state_code'] = $row['state_code'];
+ $sublocalitiesArray[$n]['country_id'] = (int)$row['country_id'];
+ $sublocalitiesArray[$n]['country_code'] = $row['country_code'];
+ $sublocalitiesArray[$n]['latitude'] = $row['latitude'];
+ $sublocalitiesArray[$n]['longitude'] = $row['longitude'];
+ $sublocalitiesArray[$n]['native'] = $row['native'];
+ $sublocalitiesArray[$n]['timezone'] = $row['timezone'];
+ $sublocalitiesArray[$n]['translations'] = json_decode($row['translations'], true);
+ $sublocalitiesArray[$n]['wikiDataId'] = $row['wikiDataId'];
+ $n++;
+ }
+ }
+
$io->writeln('Total Regions Count : ' . count($regionsArray));
$io->writeln('Total Subregions Count : ' . count($subregionsArray));
$io->writeln('Total Countries Count : ' . count($countriesArray));
$io->writeln('Total States Count : ' . count($statesArray));
$io->writeln('Total Cities Count : ' . count($citiesArray));
+ $io->writeln('Total Sublocalities Count : ' . count($sublocalitiesArray));
// Add a Space
$io->newLine();
@@ -288,6 +314,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
'/json/countries.json' => $countriesArray,
'/json/states.json' => $statesArray,
'/json/cities.json' => $citiesArray,
+ '/json/sublocalities.json' => $sublocalitiesArray,
'/json/countries+states.json' => $countryStateArray,
'/json/countries+cities.json' => $countryCityArray,
'/json/countries+states+cities.json' => $countryStateCityArray
diff --git a/bin/Commands/ExportMongoDB.php b/bin/Commands/ExportMongoDB.php
index d13975e96..98efb3430 100644
--- a/bin/Commands/ExportMongoDB.php
+++ b/bin/Commands/ExportMongoDB.php
@@ -15,7 +15,7 @@ class ExportMongoDB extends Command
protected static $defaultName = 'export:mongodb';
protected static $defaultDescription = 'Export data to MongoDB format';
- private const COLLECTIONS = ['regions', 'subregions', 'countries', 'states', 'cities'];
+ private const COLLECTIONS = ['regions', 'subregions', 'countries', 'states', 'cities', 'sublocalities'];
private Filesystem $filesystem;
private array $dataCache = [];
@@ -67,6 +67,7 @@ protected function execute(InputInterface $input, OutputInterface $output): int
$this->processCountries($io, $rootDir);
$this->processStates($io, $rootDir);
$this->processCities($io, $rootDir);
+ $this->processSublocalities($io, $rootDir);
$io->success('MongoDB export completed successfully');
return Command::SUCCESS;
@@ -287,6 +288,70 @@ private function processCities(SymfonyStyle $io, string $rootDir): void
$io->info('Cities exported to MongoDB format');
}
+ private function processSublocalities(SymfonyStyle $io, string $rootDir): void
+ {
+ $io->section('Processing sublocalities');
+
+ // Handle case where sublocalities might not exist yet
+ if (!isset($this->dataCache['sublocalities']) || empty($this->dataCache['sublocalities'])) {
+ $io->warning('No sublocalities data found - skipping');
+ return;
+ }
+
+ $sublocalities = $this->dataCache['sublocalities'];
+ $processedSublocalities = [];
+
+ foreach ($sublocalities as $sublocality) {
+ $processedSublocality = $sublocality;
+
+ // Convert id to MongoDB _id format
+ $processedSublocality['_id'] = (int) $sublocality['id'];
+ unset($processedSublocality['id']);
+
+ // Parse JSON translations if it's a string
+ if (isset($processedSublocality['translations']) && is_string($processedSublocality['translations'])) {
+ $processedSublocality['translations'] = json_decode($processedSublocality['translations'], true);
+ }
+
+ // Add city reference
+ if (isset($processedSublocality['city_id'])) {
+ $processedSublocality['city'] = [
+ '$ref' => 'cities',
+ '$id' => (int) $processedSublocality['city_id']
+ ];
+ }
+
+ // Add state reference
+ if (isset($processedSublocality['state_id'])) {
+ $processedSublocality['state'] = [
+ '$ref' => 'states',
+ '$id' => (int) $processedSublocality['state_id']
+ ];
+ }
+
+ // Add country reference
+ if (isset($processedSublocality['country_id'])) {
+ $processedSublocality['country'] = [
+ '$ref' => 'countries',
+ '$id' => (int) $processedSublocality['country_id']
+ ];
+ }
+
+ // Convert coordinates to GeoJSON format for MongoDB geospatial queries
+ if (isset($processedSublocality['latitude']) && isset($processedSublocality['longitude'])) {
+ $processedSublocality['location'] = [
+ 'type' => 'Point',
+ 'coordinates' => [(float) $processedSublocality['longitude'], (float) $processedSublocality['latitude']]
+ ];
+ }
+
+ $processedSublocalities[] = $processedSublocality;
+ }
+
+ $this->saveCollection($rootDir, 'sublocalities', $processedSublocalities);
+ $io->info('Sublocalities exported to MongoDB format');
+ }
+
private function saveCollection(string $rootDir, string $collection, array $data): void
{
$outputFile = "$rootDir/mongodb/$collection.json";
diff --git a/bin/Commands/ExportSqlServer.php b/bin/Commands/ExportSqlServer.php
index e63b950aa..13e3aaf1b 100644
--- a/bin/Commands/ExportSqlServer.php
+++ b/bin/Commands/ExportSqlServer.php
@@ -15,7 +15,7 @@ class ExportSqlServer extends Command
protected static $defaultName = 'export:sql-server';
protected static $defaultDescription = 'Export data to SQL Server format';
- private const TABLES = ['regions', 'subregions', 'countries', 'states', 'cities'];
+ private const TABLES = ['regions', 'subregions', 'countries', 'states', 'cities', 'sublocalities'];
private Filesystem $filesystem;
public function __construct()
@@ -135,6 +135,29 @@ private function generateTableSchema(string $table): string
wikiDataId NVARCHAR(255) NULL,
CONSTRAINT FK_cities_states FOREIGN KEY (state_id) REFERENCES world.states(id),
CONSTRAINT FK_cities_countries FOREIGN KEY (country_id) REFERENCES world.countries(id)
+ );",
+ 'sublocalities' => "
+ IF OBJECT_ID('world.sublocalities', 'U') IS NOT NULL DROP TABLE world.sublocalities;
+ CREATE TABLE world.sublocalities (
+ id INT IDENTITY(1,1) PRIMARY KEY,
+ name NVARCHAR(255) NOT NULL,
+ city_id INT NOT NULL,
+ state_id INT NOT NULL,
+ state_code NVARCHAR(255) NOT NULL,
+ country_id INT NOT NULL,
+ country_code NCHAR(2) NOT NULL,
+ latitude DECIMAL(10,8) NOT NULL,
+ longitude DECIMAL(11,8) NOT NULL,
+ native NVARCHAR(255) NULL,
+ timezone NVARCHAR(255) NULL,
+ translations NVARCHAR(MAX),
+ created_at DATETIME2 NOT NULL DEFAULT '2014-01-01 12:01:01',
+ updated_at DATETIME2 NOT NULL DEFAULT GETDATE(),
+ flag BIT NOT NULL DEFAULT 1,
+ wikiDataId NVARCHAR(255) NULL,
+ CONSTRAINT FK_sublocalities_cities FOREIGN KEY (city_id) REFERENCES world.cities(id),
+ CONSTRAINT FK_sublocalities_states FOREIGN KEY (state_id) REFERENCES world.states(id),
+ CONSTRAINT FK_sublocalities_countries FOREIGN KEY (country_id) REFERENCES world.countries(id)
);"
];
diff --git a/bin/Commands/ExportXml.php b/bin/Commands/ExportXml.php
index 6c0b11177..6811a3a88 100644
--- a/bin/Commands/ExportXml.php
+++ b/bin/Commands/ExportXml.php
@@ -20,6 +20,7 @@ class ExportXml extends Command
'countries' => ['from' => '/json/countries.json', 'to' => '/xml/countries.xml', 'singular' => 'country'],
'states' => ['from' => '/json/states.json', 'to' => '/xml/states.xml', 'singular' => 'state'],
'cities' => ['from' => '/json/cities.json', 'to' => '/xml/cities.xml', 'singular' => 'city'],
+ 'sublocalities' => ['from' => '/json/sublocalities.json', 'to' => '/xml/sublocalities.xml', 'singular' => 'sublocality'],
];
private Filesystem $filesystem;
diff --git a/bin/Commands/ExportYaml.php b/bin/Commands/ExportYaml.php
index 63727bde3..1632d437a 100644
--- a/bin/Commands/ExportYaml.php
+++ b/bin/Commands/ExportYaml.php
@@ -20,6 +20,7 @@ class ExportYaml extends Command
'countries' => ['from' => '/json/countries.json', 'to' => '/yml/countries.yml', 'singular' => 'country'],
'states' => ['from' => '/json/states.json', 'to' => '/yml/states.yml', 'singular' => 'state'],
'cities' => ['from' => '/json/cities.json', 'to' => '/yml/cities.yml', 'singular' => 'city'],
+ 'sublocalities' => ['from' => '/json/sublocalities.json', 'to' => '/yml/sublocalities.yml', 'singular' => 'sublocality'],
];
private Filesystem $filesystem;
diff --git a/bin/scripts/sync/identify_sublocalities.py b/bin/scripts/sync/identify_sublocalities.py
new file mode 100755
index 000000000..db1152234
--- /dev/null
+++ b/bin/scripts/sync/identify_sublocalities.py
@@ -0,0 +1,333 @@
+#!/usr/bin/env python3
+"""
+Identify Potential Sub-localities Script
+
+This script helps identify cities that might be sub-localities (neighborhoods/areas
+within larger cities) by analyzing proximity and WikiData relationships.
+
+It looks for:
+1. Multiple cities in the same state that are very close to each other (< 20km)
+2. WikiData entries that indicate a place is part of another place
+3. Common naming patterns (e.g., "Mumbai Suburban", "North London", etc.)
+
+Usage:
+ python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH
+ python3 bin/scripts/sync/identify_sublocalities.py --all
+
+Requirements:
+ pip install mysql-connector-python geopy
+"""
+
+import argparse
+import sys
+import os
+import json
+from typing import List, Dict, Tuple, Optional
+import mysql.connector
+from datetime import datetime
+from collections import defaultdict
+
+
+class SublocalityIdentifier:
+ """Identify potential sub-localities from cities data"""
+
+ def __init__(self, host='localhost', user='root', password='root', database='world'):
+ """Initialize database connection"""
+ try:
+ self.conn = mysql.connector.connect(
+ host=host,
+ user=user,
+ password=password,
+ database=database,
+ charset='utf8mb4',
+ use_unicode=True
+ )
+ self.cursor = self.conn.cursor(dictionary=True)
+ print(f"✓ Connected to MySQL database '{database}'")
+ except mysql.connector.Error as e:
+ print(f"❌ MySQL connection failed: {e}")
+ sys.exit(1)
+
+ def get_cities_by_location(self, country_code: Optional[str] = None,
+ state_code: Optional[str] = None) -> List[Dict]:
+ """Get cities filtered by country and/or state"""
+ query = """
+ SELECT id, name, state_id, state_code, country_id, country_code,
+ latitude, longitude, wikiDataId
+ FROM cities
+ WHERE 1=1
+ """
+ params = []
+
+ if country_code:
+ query += " AND country_code = %s"
+ params.append(country_code)
+
+ if state_code:
+ query += " AND state_code = %s"
+ params.append(state_code)
+
+ query += " ORDER BY country_code, state_code, name"
+
+ self.cursor.execute(query, params)
+ return self.cursor.fetchall()
+
+ def calculate_distance(self, lat1: float, lon1: float, lat2: float, lon2: float) -> float:
+ """Calculate distance between two points using Haversine formula (in km)"""
+ from math import radians, cos, sin, asin, sqrt
+
+ # Convert to radians
+ lat1, lon1, lat2, lon2 = map(radians, [float(lat1), float(lon1), float(lat2), float(lon2)])
+
+ # Haversine formula
+ dlat = lat2 - lat1
+ dlon = lon2 - lon1
+ a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
+ c = 2 * asin(sqrt(a))
+
+ # Radius of earth in kilometers
+ r = 6371
+
+ return c * r
+
+ def find_nearby_cities(self, cities: List[Dict], max_distance_km: float = 20.0) -> List[Tuple]:
+ """Find cities that are very close to each other (potential sub-localities)"""
+ nearby_pairs = []
+
+ # Group cities by state for efficiency
+ by_state = defaultdict(list)
+ for city in cities:
+ key = f"{city['country_code']}-{city['state_code']}"
+ by_state[key].append(city)
+
+ # Check within each state
+ for state_key, state_cities in by_state.items():
+ for i, city1 in enumerate(state_cities):
+ for city2 in state_cities[i+1:]:
+ distance = self.calculate_distance(
+ city1['latitude'], city1['longitude'],
+ city2['latitude'], city2['longitude']
+ )
+
+ if distance <= max_distance_km:
+ nearby_pairs.append((city1, city2, distance))
+
+ return nearby_pairs
+
+ def analyze_naming_patterns(self, cities: List[Dict]) -> List[Dict]:
+ """Identify cities with naming patterns suggesting sub-localities"""
+ suspicious = []
+
+ # Patterns that suggest sub-locality
+ patterns = [
+ 'North ', 'South ', 'East ', 'West ',
+ 'Central ', 'Greater ',
+ ' Suburban', ' Urban', ' Rural',
+ 'Downtown', 'Uptown',
+ 'Inner ', 'Outer ',
+ ]
+
+ for city in cities:
+ name = city['name']
+ for pattern in patterns:
+ if pattern.lower() in name.lower():
+ suspicious.append({
+ 'city': city,
+ 'reason': f"Name contains '{pattern.strip()}'"
+ })
+ break
+
+ return suspicious
+
+ def generate_report(self, country_code: Optional[str] = None,
+ state_code: Optional[str] = None,
+ max_distance: float = 20.0) -> Dict:
+ """Generate a report of potential sub-localities"""
+
+ print(f"\n🔍 Analyzing cities...")
+ if country_code:
+ print(f" Country: {country_code}")
+ if state_code:
+ print(f" State: {state_code}")
+ print(f" Max distance: {max_distance} km")
+
+ cities = self.get_cities_by_location(country_code, state_code)
+ print(f" Found {len(cities)} cities to analyze\n")
+
+ # Find nearby cities
+ print("📍 Finding nearby cities...")
+ nearby = self.find_nearby_cities(cities, max_distance)
+ print(f" Found {len(nearby)} city pairs within {max_distance}km\n")
+
+ # Analyze naming patterns
+ print("📝 Analyzing naming patterns...")
+ suspicious_names = self.analyze_naming_patterns(cities)
+ print(f" Found {len(suspicious_names)} cities with suspicious naming patterns\n")
+
+ return {
+ 'nearby_cities': nearby,
+ 'suspicious_names': suspicious_names,
+ 'total_cities': len(cities)
+ }
+
+ def print_report(self, report: Dict, limit: int = 50):
+ """Print the analysis report"""
+
+ print("\n" + "=" * 80)
+ print("🔎 POTENTIAL SUB-LOCALITIES REPORT")
+ print("=" * 80)
+
+ print(f"\n📊 Summary:")
+ print(f" Total cities analyzed: {report['total_cities']}")
+ print(f" Nearby city pairs: {len(report['nearby_cities'])}")
+ print(f" Suspicious names: {len(report['suspicious_names'])}")
+
+ # Show nearby cities
+ if report['nearby_cities']:
+ print(f"\n🏙️ Nearby Cities (showing up to {limit}):")
+ print(" " + "-" * 76)
+
+ for i, (city1, city2, distance) in enumerate(report['nearby_cities'][:limit]):
+ print(f"\n {i+1}. {city1['name']} ↔ {city2['name']}")
+ print(f" Distance: {distance:.2f} km")
+ print(f" IDs: {city1['id']} and {city2['id']}")
+ print(f" State: {city1['state_code']}, Country: {city1['country_code']}")
+ if city1['wikiDataId']:
+ print(f" WikiData: {city1['wikiDataId']} and {city2['wikiDataId']}")
+
+ # Show suspicious names
+ if report['suspicious_names']:
+ print(f"\n📛 Suspicious Naming Patterns (showing up to {limit}):")
+ print(" " + "-" * 76)
+
+ for i, item in enumerate(report['suspicious_names'][:limit]):
+ city = item['city']
+ print(f"\n {i+1}. {city['name']}")
+ print(f" Reason: {item['reason']}")
+ print(f" ID: {city['id']}")
+ print(f" Location: {city['state_code']}, {city['country_code']}")
+ if city['wikiDataId']:
+ print(f" WikiData: {city['wikiDataId']}")
+
+ print("\n" + "=" * 80)
+ print("\n💡 Next Steps:")
+ print(" 1. Review the identified cities manually")
+ print(" 2. Check WikiData for each entry to verify relationships")
+ print(" 3. Move confirmed sub-localities to contributions/sublocalities/sublocalities.json")
+ print(" 4. Remove them from contributions/cities/.json")
+ print("\n")
+
+ def export_json_report(self, report: Dict, output_file: str = 'potential_sublocalities.json'):
+ """Export report to JSON for further processing"""
+
+ # Convert to serializable format
+ export_data = {
+ 'generated_at': datetime.now().isoformat(),
+ 'summary': {
+ 'total_cities': report['total_cities'],
+ 'nearby_pairs': len(report['nearby_cities']),
+ 'suspicious_names': len(report['suspicious_names'])
+ },
+ 'nearby_cities': [
+ {
+ 'city1': {
+ 'id': c1['id'],
+ 'name': c1['name'],
+ 'state_code': c1['state_code'],
+ 'country_code': c1['country_code'],
+ 'wikiDataId': c1['wikiDataId']
+ },
+ 'city2': {
+ 'id': c2['id'],
+ 'name': c2['name'],
+ 'state_code': c2['state_code'],
+ 'country_code': c2['country_code'],
+ 'wikiDataId': c2['wikiDataId']
+ },
+ 'distance_km': round(dist, 2)
+ }
+ for c1, c2, dist in report['nearby_cities']
+ ],
+ 'suspicious_names': [
+ {
+ 'id': item['city']['id'],
+ 'name': item['city']['name'],
+ 'state_code': item['city']['state_code'],
+ 'country_code': item['city']['country_code'],
+ 'reason': item['reason'],
+ 'wikiDataId': item['city']['wikiDataId']
+ }
+ for item in report['suspicious_names']
+ ]
+ }
+
+ with open(output_file, 'w', encoding='utf-8') as f:
+ json.dump(export_data, f, ensure_ascii=False, indent=2)
+
+ print(f"✓ Report exported to {output_file}")
+
+ def close(self):
+ """Close database connection"""
+ if hasattr(self, 'cursor'):
+ self.cursor.close()
+ if hasattr(self, 'conn'):
+ self.conn.close()
+
+
+def main():
+ """Main execution"""
+ parser = argparse.ArgumentParser(
+ description="Identify potential sub-localities from cities data"
+ )
+ parser.add_argument('--host', default='localhost', help='MySQL host')
+ parser.add_argument('--user', default='root', help='MySQL user')
+ parser.add_argument('--password', default='root', help='MySQL password')
+ parser.add_argument('--database', default='world', help='MySQL database')
+ parser.add_argument('--country', help='Filter by country code (e.g., IN, US)')
+ parser.add_argument('--state', help='Filter by state code (e.g., MH, CA)')
+ parser.add_argument('--distance', type=float, default=20.0,
+ help='Maximum distance in km to consider cities as nearby (default: 20)')
+ parser.add_argument('--export', help='Export report to JSON file')
+ parser.add_argument('--limit', type=int, default=50,
+ help='Limit number of results to display (default: 50)')
+
+ args = parser.parse_args()
+
+ # Change to project root
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ project_root = os.path.dirname(os.path.dirname(os.path.dirname(script_dir)))
+ os.chdir(project_root)
+
+ identifier = SublocalityIdentifier(
+ host=args.host,
+ user=args.user,
+ password=args.password,
+ database=args.database
+ )
+
+ try:
+ report = identifier.generate_report(
+ country_code=args.country,
+ state_code=args.state,
+ max_distance=args.distance
+ )
+
+ identifier.print_report(report, limit=args.limit)
+
+ if args.export:
+ identifier.export_json_report(report, args.export)
+
+ except KeyboardInterrupt:
+ print("\n\n⚠️ Interrupted by user")
+ sys.exit(1)
+ except Exception as e:
+ print(f"\n❌ Error: {e}")
+ import traceback
+ traceback.print_exc()
+ sys.exit(1)
+ finally:
+ identifier.close()
+
+
+if __name__ == '__main__':
+ main()
diff --git a/bin/scripts/sync/import_json_to_mysql.py b/bin/scripts/sync/import_json_to_mysql.py
index fe1efaad0..b4449509b 100755
--- a/bin/scripts/sync/import_json_to_mysql.py
+++ b/bin/scripts/sync/import_json_to_mysql.py
@@ -404,6 +404,15 @@ def import_subregions(self):
print(f" ⚠ {json_file} not found, skipping")
return 0
+ def import_sublocalities(self):
+ """Import sublocalities from JSON"""
+ json_file = os.path.join('contributions', 'sublocalities', 'sublocalities.json')
+ if os.path.exists(json_file):
+ return self.import_table('sublocalities', json_file)
+ else:
+ print(f" ⚠ {json_file} not found, skipping")
+ return 0
+
def close(self):
"""Close database connection"""
self.cursor.close()
@@ -443,6 +452,7 @@ def main():
countries_count = importer.import_countries()
states_count = importer.import_states()
cities_count = importer.import_cities()
+ sublocalities_count = importer.import_sublocalities()
print("\n" + "=" * 60)
print("✅ Import complete!")
@@ -451,6 +461,7 @@ def main():
print(f" 📍 Countries: {countries_count}")
print(f" 📍 States: {states_count}")
print(f" 📍 Cities: {cities_count:,}")
+ print(f" 📍 Sublocalities: {sublocalities_count:,}")
except Exception as e:
print(f"\n❌ Import failed: {e}")
diff --git a/bin/scripts/sync/sync_mysql_to_json.py b/bin/scripts/sync/sync_mysql_to_json.py
index 0584cb9ef..8d35a803f 100755
--- a/bin/scripts/sync/sync_mysql_to_json.py
+++ b/bin/scripts/sync/sync_mysql_to_json.py
@@ -232,6 +232,41 @@ def sync_subregions(self):
print(f" ✓ Synced {len(subregions)} subregions to {output_file}")
return len(subregions)
+ def sync_sublocalities(self):
+ """Sync sublocalities table to contributions/sublocalities/sublocalities.json"""
+ print("\n📦 Syncing sublocalities...")
+
+ # Check if sublocalities table exists
+ try:
+ self.cursor.execute("SHOW TABLES LIKE 'sublocalities'")
+ if not self.cursor.fetchone():
+ print(" ⚠ Table 'sublocalities' does not exist, skipping")
+ return 0
+ except Exception as e:
+ print(f" ⚠ Could not check for sublocalities table: {e}")
+ return 0
+
+ columns = self.get_table_columns('sublocalities')
+ excluded = self.get_excluded_columns()
+
+ self.cursor.execute("SELECT * FROM sublocalities ORDER BY id")
+ rows = self.cursor.fetchall()
+
+ sublocalities = []
+ for row in rows:
+ sublocalities.append(self.process_row(row, columns, excluded))
+
+ # Create sublocalities directory if it doesn't exist
+ sublocalities_dir = os.path.join('contributions', 'sublocalities')
+ os.makedirs(sublocalities_dir, exist_ok=True)
+
+ output_file = os.path.join(sublocalities_dir, 'sublocalities.json')
+ with open(output_file, 'w', encoding='utf-8') as f:
+ json.dump(sublocalities, f, ensure_ascii=False, indent=2)
+
+ print(f" ✓ Synced {len(sublocalities)} sublocalities to {output_file}")
+ return len(sublocalities)
+
def export_schema(self):
"""Export MySQL schema to bin/db/schema.sql using mysqldump"""
import subprocess
@@ -248,7 +283,7 @@ def export_schema(self):
database = self.conn.database
# Tables to export (in correct order respecting foreign keys)
- tables = ['regions', 'subregions', 'countries', 'states', 'cities']
+ tables = ['regions', 'subregions', 'countries', 'states', 'cities', 'sublocalities']
# Build mysqldump command
# --no-data: only schema, no data
@@ -370,6 +405,7 @@ def main():
countries_count = syncer.sync_countries()
states_count = syncer.sync_states()
cities_count = syncer.sync_cities()
+ sublocalities_count = syncer.sync_sublocalities()
print("\n" + "=" * 60)
print("✅ Sync complete!")
@@ -379,6 +415,7 @@ def main():
print(f" 📍 Countries: {countries_count}")
print(f" 📍 States: {states_count}")
print(f" 📍 Cities: {cities_count:,}")
+ print(f" 📍 Sublocalities: {sublocalities_count:,}")
print("\n💡 Next steps:")
print(" 1. Review changes: git diff")
print(" 2. Commit: git add . && git commit -m 'sync: update from MySQL'")
diff --git a/contributions/README.md b/contributions/README.md
index f60928eba..3c2f6ce0f 100644
--- a/contributions/README.md
+++ b/contributions/README.md
@@ -14,11 +14,13 @@ contributions/
│ └── countries.json (250 countries)
├── states/
│ └── states.json (5,000+ states/provinces)
-└── cities/
- ├── AD.json (Andorra cities)
- ├── US.json (United States cities)
- ├── IN.json (India cities)
- └── ... (209 country files)
+├── cities/
+│ ├── AD.json (Andorra cities)
+│ ├── US.json (United States cities)
+│ ├── IN.json (India cities)
+│ └── ... (209 country files)
+└── sublocalities/
+ └── sublocalities.json (Sub-localities/neighborhoods within cities)
```
## 🎯 How to Contribute
@@ -123,6 +125,30 @@ Edit `contributions/subregions/subregions.json` and add your subregion:
**Note:** Omit the `id` field for new subregions - it will be auto-assigned.
+### Adding a New Sub-locality
+
+Sub-localities are neighborhoods, areas, or districts within a city (e.g., Bandra in Mumbai, Manhattan in New York).
+
+Edit `contributions/sublocalities/sublocalities.json` and add your sub-locality:
+
+```json
+{
+ "name": "Bandra",
+ "city_id": 133024,
+ "state_id": 4008,
+ "state_code": "MH",
+ "country_id": 101,
+ "country_code": "IN",
+ "latitude": "19.05444444",
+ "longitude": "72.84055556",
+ "timezone": "Asia/Kolkata"
+}
+```
+
+**Note:** Omit the `id` field for new sub-localities - it will be auto-assigned.
+
+**Finding the parent city_id:** Look up the city in the appropriate `contributions/cities/.json` file and use its `id` value.
+
## 🛠️ Submission Process
**For External Contributors:**
@@ -166,6 +192,29 @@ Simply create a pull request with your JSON changes! You don't need to run any b
**Timezone Support:** The `timezone` field uses [IANA timezone identifiers](https://www.iana.org/time-zones). If omitted for new cities, it can be automatically populated using the `bin/scripts/sync/add_city_timezones.py` script, which determines the timezone from latitude/longitude coordinates.
+### Sub-locality Fields
+
+| Field | Required | Description | Example |
+|-------|----------|-------------|---------|
+| `id` | Auto | Unique identifier (omit for new sub-localities) | `1234` |
+| `name` | ✅ Yes | Official sub-locality name | `"Bandra"` |
+| `city_id` | ✅ Yes | ID of parent city | `133024` |
+| `state_id` | ✅ Yes | ID of parent state | `4008` |
+| `state_code` | ✅ Yes | ISO code of parent state | `"MH"` |
+| `country_id` | ✅ Yes | ID of parent country | `101` |
+| `country_code` | ✅ Yes | ISO2 code of parent country | `"IN"` |
+| `latitude` | ✅ Yes | Latitude coordinate | `"19.05444444"` |
+| `longitude` | ✅ Yes | Longitude coordinate | `"72.84055556"` |
+| `timezone` | No | IANA timezone | `"Asia/Kolkata"` |
+| `native` | No | Native name | `"बांद्रा"` |
+| `translations` | No | Name translations object | `{"es": "Bandra"}` |
+| `wikiDataId` | No | WikiData identifier | `"Q257622"` |
+| `created_at` | No | Creation timestamp (ISO 8601) | `"2019-10-05T23:18:06"` |
+| `updated_at` | No | Last update timestamp (ISO 8601) | `"2025-10-08T14:42:36"` |
+| `flag` | Auto | Active status flag (managed by system) | `1` |
+
+**Note:** Sub-localities represent areas within a city (neighborhoods, districts, suburbs, etc.). They should not be confused with independent cities or towns.
+
### Finding State IDs
To find the correct `state_id` and `state_code`:
diff --git a/contributions/sublocalities/sublocalities.json b/contributions/sublocalities/sublocalities.json
new file mode 100644
index 000000000..fe51488c7
--- /dev/null
+++ b/contributions/sublocalities/sublocalities.json
@@ -0,0 +1 @@
+[]
diff --git a/docs/QUICKSTART-SUBLOCALITIES.md b/docs/QUICKSTART-SUBLOCALITIES.md
new file mode 100644
index 000000000..9bfb08c3a
--- /dev/null
+++ b/docs/QUICKSTART-SUBLOCALITIES.md
@@ -0,0 +1,168 @@
+# Quick Start: Using the Sub-localities Feature
+
+This guide shows you how to use the new sub-localities feature to properly categorize neighborhoods and districts within cities.
+
+## 🎯 What are Sub-localities?
+
+Sub-localities are neighborhoods, districts, or areas within a larger city:
+- ✅ **Bandra** (part of Mumbai, India)
+- ✅ **Manhattan** (part of New York, USA)
+- ✅ **Montmartre** (part of Paris, France)
+- ❌ **NOT** independent cities or towns
+
+## 🔍 Step 1: Identify Sub-localities
+
+Use the identification script to find potential sub-localities:
+
+```bash
+# For Mumbai area (India, Maharashtra state)
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH --password root
+
+# For all of India
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --password root
+
+# Export results to JSON for review
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH --export mumbai_review.json
+```
+
+The script will show:
+- Cities very close to each other (< 20km by default)
+- Cities with naming patterns like "Suburban", "North", "East"
+- WikiData IDs for verification
+
+## ✅ Step 2: Verify with WikiData
+
+For each potential sub-locality, check WikiData:
+
+1. Visit `https://www.wikidata.org/wiki/Q257622` (replace with the WikiData ID)
+2. Look for "part of" (P361) or "located in" (P131) properties
+3. If it says "part of Mumbai" → It's a sub-locality!
+4. If it's independent → Keep it as a city
+
+## 📝 Step 3: Add to Sublocalities JSON
+
+### Find the Parent City ID
+
+Look in `contributions/cities/IN.json` for Mumbai:
+```json
+{
+ "id": 133024,
+ "name": "Mumbai",
+ ...
+}
+```
+
+### Add to `contributions/sublocalities/sublocalities.json`
+
+```json
+[
+ {
+ "name": "Bandra",
+ "city_id": 133024,
+ "state_id": 4008,
+ "state_code": "MH",
+ "country_id": 101,
+ "country_code": "IN",
+ "latitude": "19.05444444",
+ "longitude": "72.84055556",
+ "native": "बांद्रा",
+ "timezone": "Asia/Kolkata",
+ "translations": {
+ "hi": "बांद्रा",
+ "mr": "बांद्रा"
+ },
+ "wikiDataId": "Q257622"
+ }
+]
+```
+
+**Important:**
+- ❌ Do NOT include `"id"` field (auto-generated)
+- ✅ DO include all location data from the original city entry
+- ✅ DO add `"city_id"` pointing to parent city
+
+## 🗑️ Step 4: Remove from Cities
+
+If the entry was in `contributions/cities/IN.json`, remove it:
+
+```json
+// REMOVE entries like this from cities:
+{
+ "id": 147697,
+ "name": "Bandra",
+ ...
+}
+```
+
+## 💾 Step 5: Commit and Push
+
+```bash
+git add contributions/sublocalities/sublocalities.json
+git add contributions/cities/IN.json
+git commit -m "Move Bandra from cities to sub-localities of Mumbai"
+git push
+```
+
+GitHub Actions will automatically:
+1. Import to MySQL database
+2. Export to all formats (JSON, CSV, XML, YAML, MongoDB, SQL Server)
+3. Update the pull request
+
+## 🧪 Testing Locally (Optional)
+
+If you have MySQL installed:
+
+```bash
+# 1. Create database and load schema
+mysql -uroot -proot -e "CREATE DATABASE world;"
+mysql -uroot -proot world < sql/schema.sql
+
+# 2. Import your changes
+python3 bin/scripts/sync/import_json_to_mysql.py --password root
+
+# 3. Verify
+mysql -uroot -proot -e "SELECT * FROM world.sublocalities WHERE name='Bandra';"
+```
+
+## 📊 Example: Mumbai Sub-localities
+
+Here are confirmed Mumbai sub-localities to move:
+
+| Name | Current ID | WikiData | Move to Sublocalities |
+|------|-----------|----------|---------------------|
+| Andheri | 147680 | Q12413015 | ✅ Yes |
+| Bandra | 147697 | Q257622 | ✅ Yes |
+| Borivali | 147715 | Q4945504 | ✅ Yes |
+| Chembur | 147723 | Q251170 | ✅ Yes |
+| Colaba | 147728 | Q3632559 | ✅ Yes |
+| Dharavi | 147737 | Q649632 | ✅ Yes |
+| Juhu | 147768 | Q674362 | ✅ Yes |
+| Powai | 133484 | Q13118508 | ✅ Yes |
+| Worli | 147939 | Q1934607 | ✅ Yes |
+
+All should have `"city_id": 133024` (Mumbai)
+
+## 🎓 Tips
+
+1. **Batch Processing**: You can add multiple sub-localities in one PR
+2. **Keep Coordinates**: Always preserve latitude, longitude, timezone
+3. **Translations**: Preserve all translation data
+4. **WikiData**: Always verify with WikiData before making changes
+5. **Consistency**: If parent is "Mumbai", don't use "Bombay" for sub-localities
+
+## 📚 Full Documentation
+
+For complete details, see:
+- [docs/SUBLOCALITIES.md](../docs/SUBLOCALITIES.md) - Complete guide
+- [contributions/README.md](../contributions/README.md) - Field reference
+- [README.md](../README.md) - Project overview
+
+## ❓ Need Help?
+
+- Check WikiData for "part of" relationships
+- Run the identification script with different distance parameters
+- Open an issue if you're unsure about a specific case
+
+## 🙏 Thank You!
+
+Your contributions help improve data quality for thousands of developers worldwide!
diff --git a/docs/SUBLOCALITIES.md b/docs/SUBLOCALITIES.md
new file mode 100644
index 000000000..92de4f406
--- /dev/null
+++ b/docs/SUBLOCALITIES.md
@@ -0,0 +1,294 @@
+# Sub-localities Feature
+
+This document explains the sub-localities feature, which allows proper categorization of neighborhoods, districts, and areas within cities (such as Bandra in Mumbai, Manhattan in New York).
+
+## 📋 Overview
+
+Sub-localities are geographic areas that exist within a city. They are distinct from independent cities or towns. Examples include:
+
+- **Mumbai, India**: Bandra, Andheri, Borivali, Powai, etc.
+- **New York, USA**: Manhattan, Brooklyn, Queens, The Bronx, Staten Island
+- **London, UK**: Westminster, Camden, Southwark, etc.
+- **Paris, France**: Le Marais, Montmartre, Latin Quarter, etc.
+
+Previously, many sub-localities were incorrectly stored as separate cities in the database. This feature introduces a dedicated `sublocalities` table with proper relationships to parent cities.
+
+## 🗄️ Database Schema
+
+The `sublocalities` table has the following structure:
+
+```sql
+CREATE TABLE `sublocalities` (
+ `id` mediumint unsigned NOT NULL AUTO_INCREMENT,
+ `name` varchar(255) NOT NULL,
+ `city_id` mediumint unsigned NOT NULL, -- Foreign key to cities table
+ `state_id` mediumint unsigned NOT NULL,
+ `state_code` varchar(255) NOT NULL,
+ `country_id` mediumint unsigned NOT NULL,
+ `country_code` char(2) NOT NULL,
+ `latitude` decimal(10,8) NOT NULL,
+ `longitude` decimal(11,8) NOT NULL,
+ `native` varchar(255) DEFAULT NULL,
+ `timezone` varchar(255) DEFAULT NULL,
+ `translations` text,
+ `created_at` timestamp NOT NULL DEFAULT '2014-01-01 06:31:01',
+ `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `flag` tinyint(1) NOT NULL DEFAULT '1',
+ `wikiDataId` varchar(255) DEFAULT NULL,
+ PRIMARY KEY (`id`),
+ CONSTRAINT `sublocalities_ibfk_1` FOREIGN KEY (`city_id`) REFERENCES `cities` (`id`),
+ CONSTRAINT `sublocalities_ibfk_2` FOREIGN KEY (`state_id`) REFERENCES `states` (`id`),
+ CONSTRAINT `sublocalities_ibfk_3` FOREIGN KEY (`country_id`) REFERENCES `countries` (`id`)
+)
+```
+
+## 📁 Data Structure
+
+### Contributions JSON
+
+Sub-localities are stored in `contributions/sublocalities/sublocalities.json`:
+
+```json
+[
+ {
+ "name": "Bandra",
+ "city_id": 133024,
+ "state_id": 4008,
+ "state_code": "MH",
+ "country_id": 101,
+ "country_code": "IN",
+ "latitude": "19.05444444",
+ "longitude": "72.84055556",
+ "native": "बांद्रा",
+ "timezone": "Asia/Kolkata",
+ "translations": {
+ "hi": "बांद्रा",
+ "mr": "बांद्रा"
+ },
+ "wikiDataId": "Q257622"
+ }
+]
+```
+
+### Export Formats
+
+Sub-localities are exported to all standard formats:
+
+- **JSON**: `json/sublocalities.json`
+- **CSV**: `csv/sublocalities.csv`
+- **XML**: `xml/sublocalities.xml`
+- **YAML**: `yml/sublocalities.yml`
+- **MongoDB**: `mongodb/sublocalities.json`
+- **SQL Server**: `sqlserver/sublocalities.sql`
+
+## 🔧 Tools and Scripts
+
+### 1. Identify Sub-localities
+
+Use the identification script to find cities that might actually be sub-localities:
+
+```bash
+# Analyze all cities in India, Maharashtra state
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH
+
+# Analyze with custom distance threshold (default is 20km)
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH --distance 15
+
+# Export results to JSON for further processing
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH --export mumbai_analysis.json
+
+# Show more results
+python3 bin/scripts/sync/identify_sublocalities.py --country IN --state MH --limit 100
+```
+
+The script identifies potential sub-localities by:
+- Finding cities very close to each other (< 20km by default)
+- Detecting naming patterns (e.g., "Mumbai Suburban", "North Delhi")
+- Analyzing WikiData relationships
+
+### 2. Import Sub-localities to MySQL
+
+After adding sub-localities to `contributions/sublocalities/sublocalities.json`:
+
+```bash
+python3 bin/scripts/sync/import_json_to_mysql.py
+```
+
+This imports all data including the new sub-localities table.
+
+### 3. Export Sub-localities from MySQL
+
+To sync data back from MySQL to JSON:
+
+```bash
+python3 bin/scripts/sync/sync_mysql_to_json.py
+```
+
+### 4. Export to All Formats
+
+To export sub-localities to all supported formats:
+
+```bash
+cd bin
+php console export:json # Exports to JSON
+php console export:csv # Exports to CSV
+php console export:xml # Exports to XML
+php console export:yaml # Exports to YAML
+php console export:mongodb # Exports to MongoDB
+php console export:sql-server # Exports to SQL Server
+```
+
+## 📝 How to Add Sub-localities
+
+### Step 1: Identify Sub-localities
+
+Use the identification script or manually review your data to identify entries that should be sub-localities rather than cities.
+
+### Step 2: Find Parent City ID
+
+Look up the parent city in `contributions/cities/.json`:
+
+```json
+{
+ "id": 133024,
+ "name": "Mumbai",
+ "state_id": 4008,
+ "state_code": "MH",
+ "country_id": 101,
+ "country_code": "IN",
+ ...
+}
+```
+
+### Step 3: Add to Sublocalities JSON
+
+Add the sub-locality to `contributions/sublocalities/sublocalities.json`:
+
+```json
+{
+ "name": "Bandra",
+ "city_id": 133024, // Mumbai's ID
+ "state_id": 4008,
+ "state_code": "MH",
+ "country_id": 101,
+ "country_code": "IN",
+ "latitude": "19.05444444",
+ "longitude": "72.84055556",
+ "timezone": "Asia/Kolkata",
+ "wikiDataId": "Q257622"
+}
+```
+
+**Important**: Omit the `id` field - it will be auto-assigned by MySQL.
+
+### Step 4: Remove from Cities (if applicable)
+
+If the entry was previously in the cities list, remove it from `contributions/cities/.json`.
+
+### Step 5: Commit and Push
+
+```bash
+git add contributions/sublocalities/sublocalities.json
+git add contributions/cities/IN.json # If you removed entries
+git commit -m "Add Bandra as sub-locality of Mumbai"
+git push
+```
+
+GitHub Actions will automatically:
+1. Import the data to MySQL
+2. Export to all formats
+3. Update the pull request
+
+## 🔍 Example: Mumbai Sub-localities
+
+Mumbai has many well-known areas that were previously listed as separate cities:
+
+| Sub-locality | WikiData ID | Should be under |
+|-------------|-------------|-----------------|
+| Bandra | Q257622 | Mumbai (Q1156) |
+| Andheri | Q12413015 | Mumbai (Q1156) |
+| Borivali | Q4945504 | Mumbai (Q1156) |
+| Powai | Q13118508 | Mumbai (Q1156) |
+| Juhu | Q674362 | Mumbai (Q1156) |
+| Colaba | Q3632559 | Mumbai (Q1156) |
+| Dharavi | Q649632 | Mumbai (Q1156) |
+
+These should be moved from `cities` to `sublocalities` with `city_id = 133024` (Mumbai's ID).
+
+## 🎯 Best Practices
+
+1. **Verify WikiData**: Always check WikiData to confirm if a place is actually a sub-locality or an independent city.
+
+2. **Use the Identification Script**: Run the script to get suggestions, but manually verify each result.
+
+3. **Preserve Data**: When moving entries from cities to sub-localities, preserve all fields (coordinates, timezone, translations, etc.).
+
+4. **Document Changes**: In commit messages, explain why something is being categorized as a sub-locality.
+
+5. **Consistency**: Use consistent naming - if the parent city is "Mumbai", don't use "Bombay" for sub-localities.
+
+## 📊 Database Relationships
+
+```
+countries (1) ──┐
+ ├──> states (N) ──┐
+ │ ├──> cities (N) ──> sublocalities (N)
+ │ │
+ └──────────────────┘
+```
+
+Each sub-locality:
+- MUST have a parent city (`city_id`)
+- MUST have the same state and country as its parent city
+- Should be geographically within or very close to its parent city
+
+## 🐛 Troubleshooting
+
+### Script says "Table 'sublocalities' does not exist"
+
+The table needs to be created in MySQL first:
+
+```bash
+mysql -uroot -proot world < sql/schema.sql
+```
+
+### Import fails with "Foreign key constraint"
+
+Ensure:
+1. The parent `city_id` exists in the cities table
+2. The `state_id` and `country_id` match valid entries
+3. Import is done in correct order (cities before sublocalities)
+
+### Identification script shows no results
+
+Try:
+- Increasing the `--distance` parameter
+- Running without filters (`--country` or `--state`) to see global patterns
+- Checking if MySQL connection is successful
+
+## 🚀 Future Enhancements
+
+Potential improvements for the sub-localities feature:
+
+1. **Hierarchical Sub-localities**: Support for nested sub-localities (e.g., a neighborhood within a district)
+2. **Automated Classification**: ML model to automatically suggest sub-locality classifications
+3. **Batch Migration Tool**: Automated tool to move multiple entries from cities to sub-localities
+4. **Validation Rules**: Ensure sub-localities are geographically within their parent city boundaries
+
+## 📚 References
+
+- [WikiData Documentation](https://www.wikidata.org/)
+- [IANA Timezone Database](https://www.iana.org/time-zones)
+- [Contributing Guidelines](../contributions/README.md)
+- [GitHub Issue #XXX](https://github.com/dr5hn/countries-states-cities-database/issues/XXX) - Original request
+
+## 🤝 Contributing
+
+If you identify sub-localities that should be moved from the cities table, please:
+
+1. Run the identification script
+2. Manually verify the suggestions
+3. Create a pull request with the changes
+4. Include evidence (WikiData links, maps, etc.) in the PR description
+
+Thank you for helping improve the database quality! 🙏
diff --git a/sql/schema.sql b/sql/schema.sql
index 2e505bbab..c58165415 100644
--- a/sql/schema.sql
+++ b/sql/schema.sql
@@ -164,6 +164,40 @@ CREATE TABLE `subregions` (
CONSTRAINT `subregion_continent_final` FOREIGN KEY (`region_id`) REFERENCES `regions` (`id`)
) ENGINE=InnoDB AUTO_INCREMENT=23 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;
/*!40101 SET character_set_client = @saved_cs_client */;
+
+--
+-- Table structure for table `sublocalities`
+--
+
+DROP TABLE IF EXISTS `sublocalities`;
+/*!40101 SET @saved_cs_client = @@character_set_client */;
+/*!50503 SET character_set_client = utf8mb4 */;
+CREATE TABLE `sublocalities` (
+ `id` mediumint unsigned NOT NULL AUTO_INCREMENT,
+ `name` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
+ `city_id` mediumint unsigned NOT NULL,
+ `state_id` mediumint unsigned NOT NULL,
+ `state_code` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
+ `country_id` mediumint unsigned NOT NULL,
+ `country_code` char(2) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci NOT NULL,
+ `latitude` decimal(10,8) NOT NULL,
+ `longitude` decimal(11,8) NOT NULL,
+ `native` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL,
+ `timezone` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT 'IANA timezone identifier (e.g., America/New_York)',
+ `translations` text CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci,
+ `created_at` timestamp NOT NULL DEFAULT '2014-01-01 06:31:01',
+ `updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
+ `flag` tinyint(1) NOT NULL DEFAULT '1',
+ `wikiDataId` varchar(255) CHARACTER SET utf8mb4 COLLATE utf8mb4_unicode_ci DEFAULT NULL COMMENT 'Rapid API GeoDB Cities',
+ PRIMARY KEY (`id`),
+ KEY `sublocalities_city_idx` (`city_id`),
+ KEY `sublocalities_state_idx` (`state_id`),
+ KEY `sublocalities_country_idx` (`country_id`),
+ CONSTRAINT `sublocalities_ibfk_1` FOREIGN KEY (`city_id`) REFERENCES `cities` (`id`),
+ CONSTRAINT `sublocalities_ibfk_2` FOREIGN KEY (`state_id`) REFERENCES `states` (`id`),
+ CONSTRAINT `sublocalities_ibfk_3` FOREIGN KEY (`country_id`) REFERENCES `countries` (`id`)
+) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci ROW_FORMAT=COMPACT;
+/*!40101 SET character_set_client = @saved_cs_client */;
/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;