--------Apache Version 2.0 license (AL2.0)----------------------------------------------------------------------------
/*
        Copyright 2021 Andreas Burgstaller

        Licensed under the Apache License, Version 2.0 (the "License");
        you may not use this file except in compliance with the License.
        You may obtain a copy of the License at

         http://www.apache.org/licenses/LICENSE-2.0

        Unless required by applicable law or agreed to in writing, software
        distributed under the License is distributed on an "AS IS" BASIS,
        WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
        See the License for the specific language governing permissions and
        limitations under the License.
*/
----------------------------------------------------------------------------------------------------------------------

url = "https://www.statistik.at/web_de/statistiken/menschen_und_gesellschaft/wohnen/wohnkosten/110836.html"

CALL startWebscraping_Statistic url
entryList = result

System = bsf.import("java.lang.System")
current_folder = System~getProperty("user.dir")

path = current_folder"\output\temp_statistic.json"
CALL writeJSONtoFile entryList~toString, path

----------------------------------------------------------------------------------------------------------------------
::REQUIRES "BSF.CLS"
----------------------------------------------------------------------------------------------------------------------

::ROUTINE writeJSONtoFile
  PARSE ARG input, path

    fileWriter = bsf.import("java.io.FileWriter")
    file = fileWriter~NEW(path,.false)
    file~write(input)
    file~flush
    file~close

----------------------------------------------------------------------------------------------------------------------

::ROUTINE startWebscraping_Statistic
    PARSE ARG url
        Jsoup = bsf.import("org.jsoup.Jsoup") --import Jsoup ressources
        entryList = .bsf~new("org.json.JSONArray") --create JSON Array

        --load the HTML Document of the statistic source
        SAY "Start Webscraping:" url
        mainpage = Jsoup~connect(url)~get()

        --Extract & Transform for  TABLE STATISTIC
        statistic = .statistic~NEW
        --COLUMN <NVARCHAR> DESCRIPTION
        statistic~description = mainpage~getElementsByClass("header")~first()~ownText

        --Extract the Table with the statistic values
        statTable = mainpage~getElementsByClass("body")~first()
        valueList = statTable~getElementsByTag("tr")

        --Extract & Transform for  TABLE DATA_SOURCE
        data_source = .data_source~NEW()
        data_source~name="Statistic Austria"
        data_source~internal_id="none"
        data_source~url=url

        --Extract & Transform for  TABLE ADRESS
        adress = .adress~NEW()
        adress~country = "AUT"
        adress~zip_code = ""
        adress~street = ""
        adress~floor = ""

        DO i = 0 TO (15) -- iterate through rows 2005 till 2020
            statElement = valueList~get(i)--get current row

            --Extract & Transform for TABLE CHRONOLOGY
            chronology = .chronology~NEW()
            chronology~creation_time = statElement~getElementsByTag("th")~first()~ownText"-01-01 00:00:00.0000000"
            chronology~available_from = ""
            chronology~rental_period = ""

            --Extract & Transform for  TABLE KEY_FIGURES
            temp = statElement~toString
            temp = temp~changeStr(",",".")
            parse var temp colum1 "<td>" colum2 "</td>" "<td>" colum3 "</td>" "<td>" colum4 "</td>" "<td>" colum5 "</td>" "<td>"  colum6 "</td>" "<td>"
            key_figures = .key_figures~NEW
            key_figures~total_amount = colum2
            key_figures~living_space = colum2/colum3
            key_figures~operating_cost = colum6
            key_figures~heating_cost = "NULL"
            key_figures~deposit = "NULL"
            key_figures~commission = "NULL"

            --create a JSON Object for the Ad Entry
            jsonEntry = .bsf~new("org.json.JSONObject")
            -- add the Data to the JSONObject
            jsonEntry~~put("data_source", data_source~exportJSON)
            jsonEntry~~put("adress", adress~exportJSON)
            jsonEntry~~put("key_figures", key_figures~exportJSON)
            jsonEntry~~put("chronology", chronology~exportJSON)
            jsonEntry~~put("statistic", statistic~exportJSON)

            SAY jsonEntry~toString -- print JSON String in console
            entryList~~put(jsonEntry) -- add JSON Object to Array
        END

        RETURN entryList

----------------------------------------------------------------------------------------------------------------------

::CLASS data_source
    ::METHOD name ATTRIBUTE
    ::METHOD internal_id ATTRIBUTE
    ::METHOD url ATTRIBUTE
    ::METHOD exportCSV
        say self~name";"self~internal_id";"self~"url"
    ::METHOD exportJSON
        jsonObject = bsf.import("org.json.JSONObject")
        jsonArray = bsf.import("org.json.JSONArray")

        jsonData = jsonObject~NEW
        jsonData~~put("name", self~name)~~put("internal_id", self~internal_id)~~put("url", self~url)
        RETURN jsonData

----------------------------------------------------------------------------------------------------------------------

::CLASS chronology
    ::METHOD  creation_time ATTRIBUTE
    ::METHOD  available_from ATTRIBUTE
    ::METHOD  rental_period ATTRIBUTE
    ::METHOD exportCSV
        say self~creation_time";"self~available_from";"self~rental_period
    ::METHOD exportJSON
            jsonObject = bsf.import("org.json.JSONObject")
            jsonArray = bsf.import("org.json.JSONArray")

            jsonData = jsonObject~NEW
            jsonData~~put("creation_time", self~creation_time)~~put("available_from", self~available_from)~~put("rental_period", self~rental_period)
            RETURN jsonData

----------------------------------------------------------------------------------------------------------------------

::CLASS key_figures
    ::METHOD  living_space ATTRIBUTE
    ::METHOD  total_amount ATTRIBUTE
    ::METHOD  operating_cost ATTRIBUTE
    ::METHOD  heating_cost ATTRIBUTE
    ::METHOD  deposit ATTRIBUTE
    ::METHOD  commission ATTRIBUTE
    ::METHOD exportCSV
            say self~living_space";"self~total_amount";"self~operating_cost";"self~heating_cost";"self~deposit";"self~commission
    ::METHOD exportJSON
            jsonObject = bsf.import("org.json.JSONObject")
            jsonArray = bsf.import("org.json.JSONArray")

            jsonData = jsonObject~NEW
            jsonData~~put("total_amount", self~total_amount)~~put("living_space", self~living_space)~~put("operating_cost", self~operating_cost)~~put("heating_cost", self~heating_cost)~~put("deposit", self~deposit)~~put("commission", self~commission)
            RETURN jsonData

----------------------------------------------------------------------------------------------------------------------

::CLASS statistic
    ::METHOD  description ATTRIBUTE
    ::METHOD exportCSV
            say self~description
    ::METHOD exportJSON
            jsonObject = bsf.import("org.json.JSONObject")
            jsonArray = bsf.import("org.json.JSONArray")

            jsonData = jsonObject~NEW
            jsonData~~put("description", self~description)
            RETURN jsonData

----------------------------------------------------------------------------------------------------------------------

::CLASS adress
    ::METHOD  country ATTRIBUTE
    ::METHOD  zip_code ATTRIBUTE
    ::METHOD  street ATTRIBUTE
    ::METHOD  floor ATTRIBUTE
    ::METHOD exportCSV
            say self~country";"self~zip_code";"self~street";"self~floor
    ::METHOD exportJSON
            jsonObject = bsf.import("org.json.JSONObject")
            jsonArray = bsf.import("org.json.JSONArray")

            jsonData = jsonObject~NEW
            jsonData~~put("country", self~country)~~put("zip_code", self~zip_code)~~put("street", self~street)~~put("floor", self~floor)
            RETURN jsonData

----------------------------------------------------------------------------------------------------------------------
