# frozen_string_literal: true
require 'multi_json' require 'rgeo/shapefile' require 'rgeo/geo_json' require_relative '../../natural_earth'
# Natural Earth Data [1] files are available as triptychs of 2 ESRI shapefiles (`.shp` & `.shx`, to # store geographical features) and a dBase file (`.dbf`, to store attributes) that can be parsed by # `rgeo-shapefile`. # # However, we do not consider as essential all data included in these files (e.g., country name # translations in many languages, population, income group, etc.) as they are already outdated or # other projects already bundle them in a more accessible way (e.g., twitter-cldr, carmen). Moreover, # `rgeo-shapefile` only supports sequential reads and we can take advantage of a simpler file format. # # These tasks helps to transform Natural Earth Data triptychs into JSON and GeoJSON files. # # [1]: <www.naturalearthdata.com/> / <github.com/nvkelso/natural-earth-vector> # [2]: <www.naturalearthdata.com/downloads/10m-cultural-vectors/> namespace :import do
desc 'Build GeoJSON resources from Natural Earth full release files' task :cultural, [:dir] => :cleanup do |_t, args| dir = args[:dir] abort('Path does not exist') unless File.exist?(dir) abort('Path is not a directory') unless File.directory?(dir) abort('Release directory is not readable') unless File.readable?(dir) extensions = %w[shp shx dbf] file_patterns = { countries: '50m_cultural/ne_50m_admin_0_countries.%<ext>s', map_units: '50m_cultural/ne_50m_admin_0_map_units.%<ext>s', subdivisions: '10m_cultural/ne_10m_admin_1_states_provinces.%<ext>s' } file_patterns.each do |(_name, pattern)| extensions.each do |extension| file = File.join(dir, format(pattern, ext: extension)) abort(format('Missing file in release: %<file>s', file: file)) unless File.exist?(file) abort(format('Unreadable file in release: %<file>s', file: file)) unless File.readable?(file) end end data = { 'UM' => { 'iso-3166-1' => { 'alpha-2' => 'UM', 'alpha-3' => 'UMI', 'numeric' => 581 }, 'name' => 'United States Minor Outlying Islands', 'continent' => 'Oceania', 'region' => 'Pacific Islands', 'subdivisions' => {} } } # First get most data about countries from the map unit files map_units_shp = File.join(dir, format(file_patterns[:map_units], ext: 'shp')) RGeo::Shapefile::Reader.open(map_units_shp) do |shapefile| shapefile.each do |entry| next if entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99 data[entry['ISO_A2']] = { 'iso-3166-1' => { 'alpha-2' => entry['ISO_A2'], 'alpha-3' => entry['ISO_A3'], 'numeric' => entry['ISO_N3'].to_i }, 'name' => entry['GEOUNIT'], 'continent' => entry['CONTINENT'], 'region' => entry['SUBREGION'], 'subdivisions' => {}, 'geometry' => RGeo::GeoJSON.encode(entry.geometry) } end end # Get some additional countries from the country file countries_shp = File.join(dir, format(file_patterns[:countries], ext: 'shp')) RGeo::Shapefile::Reader.open(countries_shp) do |shapefile| shapefile.each do |entry| next if entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99 next if data.key?(entry['ISO_A2']) data[entry['ISO_A2']] = { 'iso-3166-1' => { 'alpha-2' => entry['ISO_A2'], 'alpha-3' => entry['ISO_A3'], 'numeric' => entry['ISO_N3'].to_i }, 'name' => entry['GEOUNIT'], 'continent' => entry['CONTINENT'], 'region' => entry['SUBREGION'], 'subdivisions' => {}, 'geometry' => RGeo::GeoJSON.encode(entry.geometry) } end end # Then fill the holes manually… # …for countries… countries = { 'Bosnia and Herzegovina' => { 'iso-3166-1' => { 'alpha-2' => 'BA', 'alpha-3' => 'BIH', 'numeric' => 70 } }, 'Georgia' => { 'iso-3166-1' => { 'alpha-2' => 'GE', 'alpha-3' => 'GEO', 'numeric' => 268 } }, 'Jan Mayen' => { 'iso-3166-1' => { 'alpha-2' => 'SJ', 'alpha-3' => 'SJM', 'numeric' => 744 } }, 'Norway' => { 'iso-3166-1' => { 'alpha-2' => 'NO', 'alpha-3' => 'NOR', 'numeric' => 578 } }, 'Papua New Guinea' => { 'iso-3166-1' => { 'alpha-2' => 'PG', 'alpha-3' => 'PNG', 'numeric' => 598 } }, 'Portugal' => { 'iso-3166-1' => { 'alpha-2' => 'PT', 'alpha-3' => 'PRT', 'numeric' => 620 } }, 'Serbia' => { 'iso-3166-1' => { 'alpha-2' => 'RS', 'alpha-3' => 'SRB', 'numeric' => 688 } } } RGeo::Shapefile::Reader.open(map_units_shp) do |shapefile| shapefile.each do |entry| next unless entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99 next unless countries.key?(entry['GEOUNIT']) country = countries[entry['GEOUNIT']] data[country['iso-3166-1']['alpha-2']] = { 'iso-3166-1' => country['iso-3166-1'], 'name' => entry['GEOUNIT'], 'continent' => entry['CONTINENT'], 'region' => entry['SUBREGION'], 'subdivisions' => {}, 'geometry' => RGeo::GeoJSON.encode(entry.geometry) } end end # …and some subdivisions. unrecognized_sovereign_states = ['Somaliland', 'Northern Cyprus'] nope = ['West Bank', 'Siachen Glacier'] # Just wikipedia them and you'll understand subdivisions = { # United Kingdom provinces and countries 'Wales' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-WLS' } }, 'Scotland' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-SCT' } }, 'Northern Ireland' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-NIR' } }, 'England' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-ENG' } }, # Serbian autonomous province 'Vojvodina' => { 'iso-3166-1' => 'RS', 'attributes' => { 'iso-3166-2' => 'RS-VO' } }, # Bosnia and Herzegovina province 'Republic Srpska' => { 'iso-3166-1' => 'BA', 'attributes' => { 'iso-3166-2' => 'BA-SRP' } }, # Belgian regions 'Flemish Region' => { 'iso-3166-1' => 'BE', 'attributes' => { 'iso-3166-2' => 'BE-VLG' } }, 'Walloon Region' => { 'iso-3166-1' => 'BE', 'attributes' => { 'iso-3166-2' => 'BE-WAL' } }, 'Brussels Capital Region' => { 'iso-3166-1' => 'BE', 'attributes' => { 'iso-3166-2' => 'BE-BRU' } }, # Portuguese autonomous province 'Madeira' => { 'iso-3166-1' => 'PT', 'attributes' => { 'iso-3166-2' => 'PT-30' } }, 'Azores' => { 'iso-3166-1' => 'PT', 'attributes' => { 'iso-3166-2' => 'PT-20' } }, # Autonomous region of Papua New Guinea 'Bougainville' => { 'iso-3166-1' => 'PG', 'attributes' => { 'iso-3166-2' => 'PG-NSB' } }, # At least, Gaza is officially recognized as Palestinian territory 'Gaza' => { 'iso-3166-1' => 'PS', 'attributes' => { 'iso-3166-2' => 'PS-GZA' } }, # Some regions have neither ISO-3166-1 nor ISO-3166-2 code so we need to forge one. 'Antigua' => { 'iso-3166-1' => 'AG', 'attributes' => { 'iso-3166-2' => 'AG-ZZ' } }, 'Barbuda' => { 'iso-3166-1' => 'AG', 'attributes' => { 'iso-3166-2' => 'AG-10' } }, 'Zanzibar' => { 'iso-3166-1' => 'TZ', 'attributes' => { 'iso-3166-2' => 'TZ-ZZ' } }, 'Ashmore and Cartier Islands' => { 'iso-3166-1' => 'AU', 'attributes' => { 'iso-3166-2' => 'AU-ZZ' } } } RGeo::Shapefile::Reader.open(map_units_shp) do |shapefile| shapefile.each do |entry| next unless entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99 next if countries.keys.include?(entry['GEOUNIT']) next if unrecognized_sovereign_states.include?(entry['GEOUNIT']) || nope.include?(entry['GEOUNIT']) subdivision = subdivisions[entry['GEOUNIT']] iso_3166_1 = subdivision['iso-3166-1'] iso_3166_2 = subdivision['attributes']['iso-3166-2'] data[iso_3166_1]['subdivisions'][iso_3166_2] = subdivision['attributes'].merge( 'name' => entry['GEOUNIT'], 'geometry' => RGeo::GeoJSON.encode(entry.geometry) ) end end # Some countries are so small they are considered as subdivisions in Natural Earth. country_as_subdivisions = { 'Gibraltar' => { 'iso-3166-1' => { 'alpha-2' => 'GI', 'alpha-3' => 'GIB', 'numeric' => 292 }, 'continent' => data['ES']['continent'], 'region' => data['ES']['region'] }, 'Tuvalu' => { 'iso-3166-1' => { 'alpha-2' => 'TV', 'alpha-3' => 'TUV', 'numeric' => 798 }, 'continent' => data['SB']['continent'], 'region' => data['SB']['region'] }, 'Bouvet Island' => { 'iso-3166-1' => { 'alpha-2' => 'BV', 'alpha-3' => 'BVT', 'numeric' => 74 }, 'continent' => 'Antartica', 'region' => 'Antarctic and Subantarctic islands' } } # Some countries have non official subdivisions in Natural Earth. countries_without_subdivisions = %w[ AI AQ AS AW AX BL CK CW EH FK FO GG GS GU HK HM IM IO JE KY MF MP NC NF NU PF PM PN TC TF VA VG VI ] # Some subdivisions have incorrect ISO-3166-2. iso_3166_2_fixes = { 'AZ-X01~' => 'AZ-SUS', 'BS-X01~' => 'BS-GC', 'CO-X01~' => 'CO-DC', 'LR-X01~' => 'LR-GP', 'LR-X02~' => 'LR-RG', 'TJ-X01~' => 'TJ-RA' } subdivisions_shp = File.join(dir, format(file_patterns[:subdivisions], ext: 'shp')) RGeo::Shapefile::Reader.open(subdivisions_shp) do |shapefile| shapefile.each do |entry| next if entry['iso_3166_2'].empty? || entry['iso_3166_2'].to_i == -99 next if entry['iso_a2'].empty? || entry['iso_a2'].to_i == -99 if country_as_subdivisions.key?(entry['name']) country = country_as_subdivisions[entry['name']] data[country['iso-3166-1']['alpha-2']] = country.merge( 'name' => entry['name'], 'subdivisions' => {}, 'geometry' => RGeo::GeoJSON.encode(entry.geometry) ) next end next if countries_without_subdivisions.include?(entry['iso_a2']) abort(format('Unknown country: %<iso_3166>s', iso_3166: entry['iso_a2'])) unless data.key?(entry['iso_a2']) iso_3166_2 = if iso_3166_2_fixes.key?(entry['iso_3166_2']) iso_3166_2_fixes[entry['iso_3166_2']] else entry['iso_3166_2'] end if iso_3166_2.match?(/~\z/) puts format('Unknown subdivision: %<name>s (%<iso_3166>s)', name: entry['name'], iso_3166: iso_3166_2) end data[entry['iso_a2']]['subdivisions'][entry['iso_3166_2']] = { 'name' => entry['name'], 'iso-3166-2' => entry['iso_3166_2'], 'geometry' => RGeo::GeoJSON.encode(entry.geometry) } end end # Now let's write all those data down Dir.mkdir(NaturalEarth::RESOURCES_DIR, 0o755) File.open(File.join(NaturalEarth::RESOURCES_DIR, 'countries.json'), 'wb') do |file| countries_json = data.each_with_object({}) do |(iso_3166_1, country), countries_to_json| countries_to_json[iso_3166_1] = country.slice('iso-3166-1', 'name', 'continent', 'region') countries_to_json[iso_3166_1]['subdivisions'] = country['subdivisions'].keys end file.puts MultiJson.dump(countries_json) end File.open(File.join(NaturalEarth::RESOURCES_DIR, 'subdivisions.json'), 'wb') do |file| subdivisions_json = data.each_with_object({}) do |(iso_3166_1, country), subdivisions_to_json| country['subdivisions'].each do |(iso_3166_2, subdivision)| subdivisions_to_json[iso_3166_2] = subdivision.slice('iso-3166-2', 'name').merge('country' => iso_3166_1) end end file.puts MultiJson.dump(subdivisions_json) end geometries_dir = File.join(NaturalEarth::RESOURCES_DIR, 'geometries') Dir.mkdir(geometries_dir, 0o755) data.each do |(iso_3166_1, country)| Dir.mkdir(File.join(geometries_dir, iso_3166_1), 0o755) File.open(File.join(geometries_dir, iso_3166_1, "#{iso_3166_1}.json"), 'wb') do |file| file.puts MultiJson.dump(country['geometry']) end next if country['subdivisions'].size.zero? country['subdivisions'].each do |(iso_3166_2, subdivision)| File.open(File.join(geometries_dir, iso_3166_1, "#{iso_3166_2}.json"), 'wb') do |file| file.puts MultiJson.dump(subdivision['geometry']) end end end end task :cleanup do dir = NaturalEarth::RESOURCES_DIR next unless File.exist?(dir) system("read -p 'Delete #{dir} (y/N)? ' -n 1 -r && ([[ $REPLY =~ ^[Yy]$ ]] && rm -rf #{dir})") abort('Resources cleanup failed') if $CHILD_STATUS.exitstatus.nonzero? puts '' puts "#{dir} removed." end
end