From 3bf2473c3a7becd88f49306a427633b49af68368 Mon Sep 17 00:00:00 2001 From: despiegk Date: Sun, 2 Feb 2025 08:10:32 +0300 Subject: [PATCH] location --- examples/data/location/location_example.vsh | 43 +++ lib/data/location/api.v | 69 +++++ lib/data/location/db.v | 309 ++++++++++++++++++++ lib/data/location/models.v | 70 +++++ lib/threefold/grid4/cloudslices/model.v | 2 +- 5 files changed, 492 insertions(+), 1 deletion(-) create mode 100755 examples/data/location/location_example.vsh create mode 100644 lib/data/location/api.v create mode 100644 lib/data/location/db.v create mode 100644 lib/data/location/models.v diff --git a/examples/data/location/location_example.vsh b/examples/data/location/location_example.vsh new file mode 100755 index 00000000..fd0f85c6 --- /dev/null +++ b/examples/data/location/location_example.vsh @@ -0,0 +1,43 @@ +#!/usr/bin/env -S v -n -w -gc none -cc tcc -d use_openssl -enable-globals run + +import freeflowuniverse.herolib.data.location + +// Create a new location instance +mut loc := location.new() or { panic(err) } +println('Location database initialized') + +// Initialize the database (downloads and imports data) +// This only needs to be done once or when updating data +println('Downloading and importing location data (this may take a few minutes)...') +loc.init_database() or { panic(err) } +println('Data import complete') + +// Example 1: Search for a city +println('\nSearching for London...') +results := loc.search('London', 'GB', 5, true) or { panic(err) } +for result in results { + println('${result.city.name}, ${result.country.name} (${result.country.iso2})') + println('Coordinates: ${result.city.latitude}, ${result.city.longitude}') + println('Population: ${result.city.population}') + println('Timezone: ${result.city.timezone}') + println('---') +} + +// Example 2: Search near coordinates (10km radius from London) +println('\nSearching for cities within 10km of London...') +nearby := loc.search_near(51.5074, -0.1278, 10.0, 5) or { panic(err) } +for result in nearby { + println('${result.city.name}, ${result.country.name}') + println('Distance from center: Approx ${result.similarity:.1f}km') + println('---') +} + +// Example 3: Fuzzy search in a specific country +println('\nFuzzy searching for "New" in United States...') +us_cities := loc.search('New', 'US', 5, true) or { panic(err) } +for result in us_cities { + println('${result.city.name}, ${result.country.name}') + println('State: ${result.city.admin1_code}') + println('Population: ${result.city.population}') + println('---') +} diff --git a/lib/data/location/api.v b/lib/data/location/api.v new file mode 100644 index 00000000..fafbca21 --- /dev/null +++ b/lib/data/location/api.v @@ -0,0 +1,69 @@ +module location + +// Location represents the main API for location operations +pub struct Location { +mut: + db LocationDB +} + +// new creates a new Location instance +pub fn new() !Location { + db := new_location_db()! + return Location{ + db: db + } +} + +// init_database downloads and imports the initial dataset +pub fn (mut l Location) init_database() ! { + l.db.download_and_import_data()! +} + +// search searches for locations based on the provided options +pub fn (l Location) search(query string, country_code string, limit int, fuzzy bool) ![]SearchResult { + opts := SearchOptions{ + query: query + country_code: country_code + limit: limit + fuzzy: fuzzy + } + return l.db.search_locations(opts) +} + +// search_near searches for locations near the given coordinates +pub fn (l Location) search_near(lat f64, lon f64, radius f64, limit int) ![]SearchResult { + opts := CoordinateSearchOptions{ + coordinates: Coordinates{ + latitude: lat + longitude: lon + } + radius: radius + limit: limit + } + return l.db.search_by_coordinates(opts) +} + +// Example usage: +/* +fn main() ! { + // Create a new location instance + mut loc := location.new()! + + // Initialize the database (downloads and imports data) + // Only needs to be done once or when updating data + loc.init_database()! + + // Search for a city + results := loc.search('London', 'GB', 5, true)! + for result in results { + println('${result.city.name}, ${result.country.name} (${result.country.iso2})') + println('Coordinates: ${result.city.latitude}, ${result.city.longitude}') + } + + // Search near coordinates (e.g., 10km radius from London) + nearby := loc.search_near(51.5074, -0.1278, 10.0, 5)! + for result in nearby { + println('${result.city.name} is nearby') + } +} +*/ diff --git a/lib/data/location/db.v b/lib/data/location/db.v new file mode 100644 index 00000000..be17d19c --- /dev/null +++ b/lib/data/location/db.v @@ -0,0 +1,309 @@ +module location + +import db.sqlite +import os +import encoding.csv +import freeflowuniverse.herolib.osal + +const ( + db_file = os.join_path(os.cache_dir(), 'location.db') + geonames_url = 'https://download.geonames.org/export/dump' + cities_url = '${geonames_url}/cities500.zip' +) + +// LocationDB handles all database operations for locations +pub struct LocationDB { +mut: + db sqlite.DB +} + +// new_location_db creates a new LocationDB instance +pub fn new_location_db() !LocationDB { + db := sqlite.connect(db_file)! + mut loc_db := LocationDB{ + db: db + } + loc_db.init_tables()! + return loc_db +} + +// init_tables creates the necessary database tables if they don't exist +fn (mut l LocationDB) init_tables() ! { + l.db.exec(' + CREATE TABLE IF NOT EXISTS countries ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + iso2 TEXT NOT NULL, + iso3 TEXT NOT NULL, + continent TEXT, + population INTEGER, + timezone TEXT, + UNIQUE(iso2), + UNIQUE(iso3) + ) + ')! + + l.db.exec(' + CREATE TABLE IF NOT EXISTS cities ( + id INTEGER PRIMARY KEY, + name TEXT NOT NULL, + ascii_name TEXT NOT NULL, + country_id INTEGER NOT NULL, + admin1_code TEXT, + latitude REAL, + longitude REAL, + population INTEGER, + timezone TEXT, + feature_class TEXT, + feature_code TEXT, + search_priority INTEGER DEFAULT 0, + FOREIGN KEY(country_id) REFERENCES countries(id) + ) + ')! + + l.db.exec(' + CREATE TABLE IF NOT EXISTS alternate_names ( + id INTEGER PRIMARY KEY, + city_id INTEGER NOT NULL, + name TEXT NOT NULL, + language_code TEXT, + is_preferred INTEGER, + is_short INTEGER, + FOREIGN KEY(city_id) REFERENCES cities(id) + ) + ')! + + // Create indexes for better search performance + l.db.exec('CREATE INDEX IF NOT EXISTS idx_city_name ON cities(name)')! + l.db.exec('CREATE INDEX IF NOT EXISTS idx_city_ascii ON cities(ascii_name)')! + l.db.exec('CREATE INDEX IF NOT EXISTS idx_city_coords ON cities(latitude, longitude)')! + l.db.exec('CREATE INDEX IF NOT EXISTS idx_alt_name ON alternate_names(name)')! +} + +// download_and_import_data downloads and imports GeoNames data +pub fn (mut l LocationDB) download_and_import_data() ! { + // Download country info + country_file := osal.download( + url: '${geonames_url}/countryInfo.txt' + dest: os.join_path(os.cache_dir(), 'countryInfo.txt') + )! + country_data := os.read_file(country_file.path)! + l.import_country_data(country_data)! + + // Download and process cities + cities_file := osal.download( + url: cities_url + dest: os.join_path(os.cache_dir(), 'cities500.zip') + expand_file: os.join_path(os.cache_dir(), 'cities500.txt') + )! + cities_data := os.read_file(cities_file.path)! + l.import_city_data(cities_data)! +} + +// import_country_data imports country information +fn (mut l LocationDB) import_country_data(data string) ! { + mut tx := l.db.begin()! + + for line in data.split_into_lines() { + if line.starts_with('#') { + continue + } + fields := line.split('\t') + if fields.len < 5 { + continue + } + + tx.exec(' + INSERT OR REPLACE INTO countries ( + iso2, iso3, name, continent, population, timezone + ) VALUES (?, ?, ?, ?, ?, ?) + ', [ + fields[0], // iso2 + fields[1], // iso3 + fields[4], // name + fields[8], // continent + fields[7].i64(), // population + fields[17] // timezone + ])! + } + + tx.commit()! +} + +// import_city_data imports city information +fn (mut l LocationDB) import_city_data(data string) ! { + mut tx := l.db.begin()! + + for line in data.split_into_lines() { + fields := line.split('\t') + if fields.len < 15 { + continue + } + + // Get country_id from iso2 code + country_id := l.get_country_id_by_iso2(fields[8]) or { continue } + + tx.exec(' + INSERT OR REPLACE INTO cities ( + id, name, ascii_name, country_id, admin1_code, + latitude, longitude, population, feature_class, + feature_code, timezone + ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ', [ + fields[0].int(), // id + fields[1], // name + fields[2], // ascii_name + country_id, + fields[10], // admin1_code + fields[4].f64(), // latitude + fields[5].f64(), // longitude + fields[14].i64(), // population + fields[6], // feature_class + fields[7], // feature_code + fields[17] // timezone + ])! + } + + tx.commit()! +} + +// get_country_id_by_iso2 retrieves a country's ID using its ISO2 code +fn (l LocationDB) get_country_id_by_iso2(iso2 string) !int { + row := l.db.query('SELECT id FROM countries WHERE iso2 = ?', [iso2])! + return row.vals[0].int() +} + +// search_locations searches for locations based on the provided options +pub fn (l LocationDB) search_locations(opts SearchOptions) ![]SearchResult { + mut query := ' + SELECT c.*, co.* + FROM cities c + JOIN countries co ON c.country_id = co.id + WHERE 1=1 + ' + mut params := []string{} + + if opts.query != '' { + if opts.fuzzy { + query += ' AND (c.name LIKE ? OR c.ascii_name LIKE ?)' + params << '%${opts.query}%' + params << '%${opts.query}%' + } else { + query += ' AND (c.name = ? OR c.ascii_name = ?)' + params << opts.query + params << opts.query + } + } + + if opts.country_code != '' { + query += ' AND co.iso2 = ?' + params << opts.country_code + } + + query += ' ORDER BY c.search_priority DESC, c.population DESC LIMIT ?' + params << opts.limit.str() + + rows := l.db.query(query, params)! + mut results := []SearchResult{cap: rows.len} + + for row in rows { + city := City{ + id: row.vals[0].int() + name: row.vals[1] + ascii_name: row.vals[2] + country_id: row.vals[3].int() + admin1_code: row.vals[4] + latitude: row.vals[5].f64() + longitude: row.vals[6].f64() + population: row.vals[7].i64() + timezone: row.vals[8] + feature_class: row.vals[9] + feature_code: row.vals[10] + search_priority: row.vals[11].int() + } + + country := Country{ + id: row.vals[12].int() + name: row.vals[13] + iso2: row.vals[14] + iso3: row.vals[15] + continent: row.vals[16] + population: row.vals[17].i64() + timezone: row.vals[18] + } + + results << SearchResult{ + city: city + country: country + similarity: 1.0 // TODO: implement proper similarity scoring + } + } + + return results +} + +// search_by_coordinates finds locations near the given coordinates +pub fn (l LocationDB) search_by_coordinates(opts CoordinateSearchOptions) ![]SearchResult { + // Use the Haversine formula to calculate distances + query := " + SELECT c.*, co.*, + (6371 * acos(cos(radians(?)) * cos(radians(latitude)) * + cos(radians(longitude) - radians(?)) + sin(radians(?)) * + sin(radians(latitude)))) AS distance + FROM cities c + JOIN countries co ON c.country_id = co.id + HAVING distance < ? + ORDER BY distance + LIMIT ? + " + + rows := l.db.query(query, [ + opts.coordinates.latitude.str(), + opts.coordinates.longitude.str(), + opts.coordinates.latitude.str(), + opts.radius.str(), + opts.limit.str() + ])! + + mut results := []SearchResult{cap: rows.len} + + for row in rows { + city := City{ + id: row.vals[0].int() + name: row.vals[1] + ascii_name: row.vals[2] + country_id: row.vals[3].int() + admin1_code: row.vals[4] + latitude: row.vals[5].f64() + longitude: row.vals[6].f64() + population: row.vals[7].i64() + timezone: row.vals[8] + feature_class: row.vals[9] + feature_code: row.vals[10] + search_priority: row.vals[11].int() + } + + country := Country{ + id: row.vals[12].int() + name: row.vals[13] + iso2: row.vals[14] + iso3: row.vals[15] + continent: row.vals[16] + population: row.vals[17].i64() + timezone: row.vals[18] + } + + results << SearchResult{ + city: city + country: country + similarity: 1.0 + } + } + + return results +} + +// close closes the database connection +pub fn (mut l LocationDB) close() { + l.db.close() +} diff --git a/lib/data/location/models.v b/lib/data/location/models.v new file mode 100644 index 00000000..3bc7659b --- /dev/null +++ b/lib/data/location/models.v @@ -0,0 +1,70 @@ +module location + +pub struct Country { +pub: + id int [primary] + name string [required] + iso2 string [required; sql: 'iso2'; max_len: 2] + iso3 string [required; sql: 'iso3'; max_len: 3] + continent string [max_len: 2] + population i64 + timezone string [max_len: 40] +} + +pub struct City { +pub: + id int [primary] + name string [required; max_len: 200] + ascii_name string [required; max_len: 200] // Normalized name without special characters + country_id int [required] + admin1_code string [max_len: 20] // State/Province code + latitude f64 + longitude f64 + population i64 + timezone string [max_len: 40] + feature_class string [max_len: 1] // For filtering (P for populated places) + feature_code string [max_len: 10] // Detailed type (PPL, PPLA, etc.) + search_priority int +} + +pub struct AlternateName { +pub: + id int [primary] + city_id int [required] + name string [required; max_len: 200] + language_code string [max_len: 2] + is_preferred bool + is_short bool +} + +// SearchResult represents a location search result with combined city and country info +pub struct SearchResult { +pub: + city City + country Country + similarity f64 // Search similarity score +} + +// Coordinates represents a geographic point +pub struct Coordinates { +pub: + latitude f64 + longitude f64 +} + +// SearchOptions represents parameters for location searches +pub struct SearchOptions { +pub: + query string + country_code string + limit int = 10 + fuzzy bool +} + +// CoordinateSearchOptions represents parameters for coordinate-based searches +pub struct CoordinateSearchOptions { +pub: + coordinates Coordinates + radius f64 // in kilometers + limit int = 10 +} diff --git a/lib/threefold/grid4/cloudslices/model.v b/lib/threefold/grid4/cloudslices/model.v index 54824040..0de0d9f5 100644 --- a/lib/threefold/grid4/cloudslices/model.v +++ b/lib/threefold/grid4/cloudslices/model.v @@ -18,7 +18,7 @@ pub mut: ssd string url string reputation int - uptime int + uptime int //0..100 continent string country string passmark int