# frozen_string_literal: true

require 'multi_json' require 'rgeo/shapefile' require 'rgeo/geo_json' require_relative '../../natural_earth'

# Natural Earth Data [1] files are available as triptychs of 2 ESRI shapefiles (`.shp` & `.shx`, to # store geographical features) and a dBase file (`.dbf`, to store attributes) that can be parsed by # `rgeo-shapefile`. # # However, we do not consider as essential all data included in these files (e.g., country name # translations in many languages, population, income group, etc.) as they are already outdated or # other projects already bundle them in a more accessible way (e.g., twitter-cldr, carmen). Moreover, # `rgeo-shapefile` only supports sequential reads and we can take advantage of a simpler file format. # # These tasks helps to transform Natural Earth Data triptychs into JSON and GeoJSON files. # # [1]: <www.naturalearthdata.com/> / <github.com/nvkelso/natural-earth-vector> # [2]: <www.naturalearthdata.com/downloads/10m-cultural-vectors/> namespace :import do

desc 'Build GeoJSON resources from Natural Earth full release files'
task :cultural, [:dir] => :cleanup do |_t, args|
  dir = args[:dir]

  abort('Path does not exist') unless File.exist?(dir)
  abort('Path is not a directory') unless File.directory?(dir)
  abort('Release directory is not readable') unless File.readable?(dir)

  extensions = %w[shp shx dbf]
  file_patterns = {
    countries: '50m_cultural/ne_50m_admin_0_countries.%<ext>s',
    map_units: '50m_cultural/ne_50m_admin_0_map_units.%<ext>s',
    subdivisions: '10m_cultural/ne_10m_admin_1_states_provinces.%<ext>s'
  }

  file_patterns.each do |(_name, pattern)|
    extensions.each do |extension|
      file = File.join(dir, format(pattern, ext: extension))
      abort(format('Missing file in release: %<file>s', file: file)) unless File.exist?(file)
      abort(format('Unreadable file in release: %<file>s', file: file)) unless File.readable?(file)
    end
  end

  data = {
    'UM' => {
      'iso-3166-1' => {
        'alpha-2' => 'UM',
        'alpha-3' => 'UMI',
        'numeric' => 581
      },
      'name' => 'United States Minor Outlying Islands',
      'continent' => 'Oceania',
      'region' => 'Pacific Islands',
      'subdivisions' => {}
    }
  }

  # First get most data about countries from the map unit files
  map_units_shp = File.join(dir, format(file_patterns[:map_units], ext: 'shp'))
  RGeo::Shapefile::Reader.open(map_units_shp) do |shapefile|
    shapefile.each do |entry|
      next if entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99

      data[entry['ISO_A2']] = {
        'iso-3166-1' => {
          'alpha-2' => entry['ISO_A2'],
          'alpha-3' => entry['ISO_A3'],
          'numeric' => entry['ISO_N3'].to_i
        },
        'name' => entry['GEOUNIT'],
        'continent' => entry['CONTINENT'],
        'region' => entry['SUBREGION'],
        'subdivisions' => {},
        'geometry' => RGeo::GeoJSON.encode(entry.geometry)
      }
    end
  end

  # Get some additional countries from the country file
  countries_shp = File.join(dir, format(file_patterns[:countries], ext: 'shp'))
  RGeo::Shapefile::Reader.open(countries_shp) do |shapefile|
    shapefile.each do |entry|
      next if entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99
      next if data.key?(entry['ISO_A2'])

      data[entry['ISO_A2']] = {
        'iso-3166-1' => {
          'alpha-2' => entry['ISO_A2'],
          'alpha-3' => entry['ISO_A3'],
          'numeric' => entry['ISO_N3'].to_i
        },
        'name' => entry['GEOUNIT'],
        'continent' => entry['CONTINENT'],
        'region' => entry['SUBREGION'],
        'subdivisions' => {},
        'geometry' => RGeo::GeoJSON.encode(entry.geometry)
      }
    end
  end

  # Then fill the holes manually…
  # …for countries…
  countries = {
    'Bosnia and Herzegovina' => { 'iso-3166-1' => { 'alpha-2' => 'BA', 'alpha-3' => 'BIH', 'numeric' => 70 } },
    'Georgia' => { 'iso-3166-1' => { 'alpha-2' => 'GE', 'alpha-3' => 'GEO', 'numeric' => 268 } },
    'Jan Mayen' => { 'iso-3166-1' => { 'alpha-2' => 'SJ', 'alpha-3' => 'SJM', 'numeric' => 744 } },
    'Norway' => { 'iso-3166-1' => { 'alpha-2' => 'NO', 'alpha-3' => 'NOR', 'numeric' => 578 } },
    'Papua New Guinea' => { 'iso-3166-1' => { 'alpha-2' => 'PG', 'alpha-3' => 'PNG', 'numeric' => 598 } },
    'Portugal' => { 'iso-3166-1' => { 'alpha-2' => 'PT', 'alpha-3' => 'PRT', 'numeric' => 620 } },
    'Serbia' => { 'iso-3166-1' => { 'alpha-2' => 'RS', 'alpha-3' => 'SRB', 'numeric' => 688 } }
  }
  RGeo::Shapefile::Reader.open(map_units_shp) do |shapefile|
    shapefile.each do |entry|
      next unless entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99
      next unless countries.key?(entry['GEOUNIT'])

      country = countries[entry['GEOUNIT']]
      data[country['iso-3166-1']['alpha-2']] = {
        'iso-3166-1' => country['iso-3166-1'],
        'name' => entry['GEOUNIT'],
        'continent' => entry['CONTINENT'],
        'region' => entry['SUBREGION'],
        'subdivisions' => {},
        'geometry' => RGeo::GeoJSON.encode(entry.geometry)
      }
    end
  end

  # …and some subdivisions.
  unrecognized_sovereign_states = ['Somaliland', 'Northern Cyprus']
  nope = ['West Bank', 'Siachen Glacier'] # Just wikipedia them and you'll understand

  subdivisions = {
    # United Kingdom provinces and countries
    'Wales' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-WLS' } },
    'Scotland' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-SCT' } },
    'Northern Ireland' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-NIR' } },
    'England' => { 'iso-3166-1' => 'GB', 'attributes' => { 'iso-3166-2' => 'GB-ENG' } },

    # Serbian autonomous province
    'Vojvodina' => { 'iso-3166-1' => 'RS', 'attributes' => { 'iso-3166-2' => 'RS-VO' } },

    # Bosnia and Herzegovina province
    'Republic Srpska' => { 'iso-3166-1' => 'BA', 'attributes' => { 'iso-3166-2' => 'BA-SRP' } },

    # Belgian regions
    'Flemish Region' => { 'iso-3166-1' => 'BE', 'attributes' => { 'iso-3166-2' => 'BE-VLG' } },
    'Walloon Region' => { 'iso-3166-1' => 'BE', 'attributes' => { 'iso-3166-2' => 'BE-WAL' } },
    'Brussels Capital Region' => { 'iso-3166-1' => 'BE', 'attributes' => { 'iso-3166-2' => 'BE-BRU' } },

    # Portuguese autonomous province
    'Madeira' => { 'iso-3166-1' => 'PT', 'attributes' => { 'iso-3166-2' => 'PT-30' } },
    'Azores' => { 'iso-3166-1' => 'PT', 'attributes' => { 'iso-3166-2' => 'PT-20' } },

    # Autonomous region of Papua New Guinea
    'Bougainville' => { 'iso-3166-1' => 'PG', 'attributes' => { 'iso-3166-2' => 'PG-NSB' } },

    # At least, Gaza is officially recognized as Palestinian territory
    'Gaza' => { 'iso-3166-1' => 'PS', 'attributes' => { 'iso-3166-2' => 'PS-GZA' } },

    # Some regions have neither ISO-3166-1 nor ISO-3166-2 code so we need to forge one.
    'Antigua' => { 'iso-3166-1' => 'AG', 'attributes' => { 'iso-3166-2' => 'AG-ZZ' } },
    'Barbuda' => { 'iso-3166-1' => 'AG', 'attributes' => { 'iso-3166-2' => 'AG-10' } },
    'Zanzibar' => { 'iso-3166-1' => 'TZ', 'attributes' => { 'iso-3166-2' => 'TZ-ZZ' } },
    'Ashmore and Cartier Islands' => { 'iso-3166-1' => 'AU', 'attributes' => { 'iso-3166-2' => 'AU-ZZ' } }
  }
  RGeo::Shapefile::Reader.open(map_units_shp) do |shapefile|
    shapefile.each do |entry|
      next unless entry['ISO_A2'].empty? || entry['ISO_A2'].to_i == -99
      next if countries.keys.include?(entry['GEOUNIT'])
      next if unrecognized_sovereign_states.include?(entry['GEOUNIT']) || nope.include?(entry['GEOUNIT'])

      subdivision = subdivisions[entry['GEOUNIT']]
      iso_3166_1 = subdivision['iso-3166-1']
      iso_3166_2 = subdivision['attributes']['iso-3166-2']

      data[iso_3166_1]['subdivisions'][iso_3166_2] = subdivision['attributes'].merge(
        'name' => entry['GEOUNIT'],
        'geometry' => RGeo::GeoJSON.encode(entry.geometry)
      )
    end
  end

  # Some countries are so small they are considered as subdivisions in Natural Earth.
  country_as_subdivisions = {
    'Gibraltar' => {
      'iso-3166-1' => { 'alpha-2' => 'GI', 'alpha-3' => 'GIB', 'numeric' => 292 },
      'continent' => data['ES']['continent'],
      'region' => data['ES']['region']
    },
    'Tuvalu' => {
      'iso-3166-1' => { 'alpha-2' => 'TV', 'alpha-3' => 'TUV', 'numeric' => 798 },
      'continent' => data['SB']['continent'],
      'region' => data['SB']['region']
    },
    'Bouvet Island' => {
      'iso-3166-1' => { 'alpha-2' => 'BV', 'alpha-3' => 'BVT', 'numeric' => 74 },
      'continent' => 'Antartica',
      'region' => 'Antarctic and Subantarctic islands'
    }
  }

  # Some countries have non official subdivisions in Natural Earth.
  countries_without_subdivisions = %w[
    AI AQ AS AW AX BL CK CW EH FK FO GG GS GU HK HM IM IO JE KY MF MP NC NF NU PF PM PN TC TF VA VG VI
  ]

  # Some subdivisions have incorrect ISO-3166-2.
  iso_3166_2_fixes = {
    'AZ-X01~' => 'AZ-SUS',
    'BS-X01~' => 'BS-GC',
    'CO-X01~' => 'CO-DC',
    'LR-X01~' => 'LR-GP',
    'LR-X02~' => 'LR-RG',
    'TJ-X01~' => 'TJ-RA'
  }

  subdivisions_shp = File.join(dir, format(file_patterns[:subdivisions], ext: 'shp'))
  RGeo::Shapefile::Reader.open(subdivisions_shp) do |shapefile|
    shapefile.each do |entry|
      next if entry['iso_3166_2'].empty? || entry['iso_3166_2'].to_i == -99
      next if entry['iso_a2'].empty? || entry['iso_a2'].to_i == -99

      if country_as_subdivisions.key?(entry['name'])
        country = country_as_subdivisions[entry['name']]

        data[country['iso-3166-1']['alpha-2']] = country.merge(
          'name' => entry['name'],
          'subdivisions' => {},
          'geometry' => RGeo::GeoJSON.encode(entry.geometry)
        )
        next
      end

      next if countries_without_subdivisions.include?(entry['iso_a2'])

      abort(format('Unknown country: %<iso_3166>s', iso_3166: entry['iso_a2'])) unless data.key?(entry['iso_a2'])

      iso_3166_2 =
        if iso_3166_2_fixes.key?(entry['iso_3166_2'])
          iso_3166_2_fixes[entry['iso_3166_2']]
        else
          entry['iso_3166_2']
        end

      if iso_3166_2.match?(/~\z/)
        puts format('Unknown subdivision: %<name>s (%<iso_3166>s)', name: entry['name'], iso_3166: iso_3166_2)
      end

      data[entry['iso_a2']]['subdivisions'][entry['iso_3166_2']] = {
        'name' => entry['name'],
        'iso-3166-2' => entry['iso_3166_2'],
        'geometry' => RGeo::GeoJSON.encode(entry.geometry)
      }
    end
  end

  # Now let's write all those data down
  Dir.mkdir(NaturalEarth::RESOURCES_DIR, 0o755)

  File.open(File.join(NaturalEarth::RESOURCES_DIR, 'countries.json'), 'wb') do |file|
    countries_json = data.each_with_object({}) do |(iso_3166_1, country), countries_to_json|
      countries_to_json[iso_3166_1] = country.slice('iso-3166-1', 'name', 'continent', 'region')
      countries_to_json[iso_3166_1]['subdivisions'] = country['subdivisions'].keys
    end
    file.puts MultiJson.dump(countries_json)
  end

  File.open(File.join(NaturalEarth::RESOURCES_DIR, 'subdivisions.json'), 'wb') do |file|
    subdivisions_json = data.each_with_object({}) do |(iso_3166_1, country), subdivisions_to_json|
      country['subdivisions'].each do |(iso_3166_2, subdivision)|
        subdivisions_to_json[iso_3166_2] = subdivision.slice('iso-3166-2', 'name').merge('country' => iso_3166_1)
      end
    end
    file.puts MultiJson.dump(subdivisions_json)
  end

  geometries_dir = File.join(NaturalEarth::RESOURCES_DIR, 'geometries')
  Dir.mkdir(geometries_dir, 0o755)
  data.each do |(iso_3166_1, country)|
    Dir.mkdir(File.join(geometries_dir, iso_3166_1), 0o755)

    File.open(File.join(geometries_dir, iso_3166_1, "#{iso_3166_1}.json"), 'wb') do |file|
      file.puts MultiJson.dump(country['geometry'])
    end

    next if country['subdivisions'].size.zero?

    country['subdivisions'].each do |(iso_3166_2, subdivision)|
      File.open(File.join(geometries_dir, iso_3166_1, "#{iso_3166_2}.json"), 'wb') do |file|
        file.puts MultiJson.dump(subdivision['geometry'])
      end
    end
  end
end

task :cleanup do
  dir = NaturalEarth::RESOURCES_DIR
  next unless File.exist?(dir)

  system("read -p 'Delete #{dir} (y/N)? ' -n 1 -r && ([[ $REPLY =~ ^[Yy]$ ]] && rm -rf #{dir})")
  abort('Resources cleanup failed') if $CHILD_STATUS.exitstatus.nonzero?

  puts ''
  puts "#{dir} removed."
end

end