download japanese names from wikipedia

Home   »   download japanese names from wikipedia

require "json"
require "uri"
require "http/client"
require "http/params"

API_ENDPOINT = URI.parse "https://en.wikipedia.org/w/api.php"

module Wiki
    class Response
        include JSON::Serializable
        @[JSON::Field(key: "continue", root: "cmcontinue")]
        getter continue : String
        @[JSON::Field(key: "query", root: "categorymembers")]
        getter members : Array(Member)
    end

    class Member
        include JSON::Serializable
        @[JSON::Field(key: "title")]
        getter title : String
    end

    def self.get_wiki_catergory(
            category : String,
            output_path : Path,
            property = "title",
            format = "json"
        )
        cmcontinue = ""
        url = API_ENDPOINT
        members = [] of String
        counter = 0

        puts "starting download of #{category}"
        File.open(output_path, "w") { |f| f << %Q({"names": [\n) }
        loop do
            params = HTTP::Params.encode ({
                "action" => "query",
                "list" => "categorymembers",
                "cmprop" => property,
                "format" => format,
                "cmtitle" => category,
                "cmcontinue" => cmcontinue
            })
            url.query = params
            response = HTTP::Client.get url
            unless response.status_code == 200
                puts "query #{url} responded with #{response.status_code}"
            end
            response = Response.from_json response.body

            members |= response.members.map {|member| member.title }
            members_string = members[counter..members.size-1].to_s.lchop.rchop # remove []
            counter = members.size-1
            File.open(output_path, "a") { |f| f << members_string }

            cmcontinue = response.continue
            puts "continuing with #{cmcontinue}"
            break if cmcontinue.empty?

            File.open(output_path, "a") { |f| f << ",\n" }
            sleep(1)
        end
        File.open(output_path, "a") { |f| f << "]}" }
        puts "written to #{output_path}"
    end
end



Wiki.get_wiki_catergory(
    "Category:Japanese_masculine_given_names",
    Path.posix "./japanesenames.json"
)

Leave a Reply

Your email address will not be published.