#!/usr/bin/env ruby

if ARGV.empty?
  puts 'bin/generate_east_asian_width path-to-EastAsianWidth.txt'
  exit 1
end

open(ARGV.first, 'rt') do |f|
  if m = f.gets.match(/^# EastAsianWidth-(\d+\.\d+\.\d+)\.txt/)
    unicode_version = m[1]
  else
    warn 'Failed to get UNICODE_VERSION'
    unicode_version = nil
  end

  list = []
  f.each_line do |line|
    next unless m = line.match(/^(\h+)(?:\.\.(\h+))?\s*;\s*(\w+)\s+#.+/)

    first = m[1].to_i(16)
    last = m[2]&.to_i(16) || first
    type = m[3].to_sym
    if !list.empty? and (list.last[:range].last + 1) == first and list.last[:type] == type
      list.last[:range] = (list.last[:range].first..last)
    else
      # [\u{D800}-\u{DFFF}] cause error.
      unless ((0xD800..0xDFFF).to_a & (first..last).to_a).empty?
        unless (first..0xD7FF).to_a.empty?
          list << {
            range: (first..0xD7FF),
            type: type.to_sym
          }
        end
        unless (0xE000..last).to_a.empty?
          list << {
            range: (first..0xD7FF),
            type: type.to_sym
          }
        end
      else
        list << {
          range: (first..last),
          type: type.to_sym
        }
      end
    end
  end
  grouped = list.group_by { |item| item[:type] }.map { |item| [item.first, item.last.map { |row| row[:range] }] }.to_h
  grouped = %i{F H W Na A N}.map { |type| [type, grouped[type]] }
  puts <<EOH
class Reline::Unicode::EastAsianWidth
  # This is based on EastAsianWidth.txt
  # UNICODE_VERSION = #{unicode_version ? "'#{unicode_version}'" : 'nil'}

EOH
  puts grouped.map { |item|
    type, ranges = item
    output =  "  # %s\n" %
      case type
      when :F  then 'Fullwidth'
      when :H  then 'Halfwidth'
      when :W  then 'Wide'
      when :Na then 'Narrow'
      when :A  then 'Ambiguous'
      when :N  then 'Neutral'
      end
    output += "  TYPE_%s = /^[\#{ %%W(\n" % type.upcase
    output += ranges.map { |range|
      if range.first == range.last
        '    \u{%04X}' % range.first
      else
        '    \u{%04X}-\u{%04X}' % [range.first, range.last]
      end
    }.join("\n")
    output += "\n  ).join }]/\n"
  }.join("\n")
  puts 'end'
end
