const Xml2Js = require('xml2js');
const fs = require('fs');
const URL = require('url');
const https = require('https');
const zlib = require('zlib');
const XMLParser = new Xml2Js.Parser(/* options */);

module.exports.getDataFromXML = async function (url, path = __dir + "/source/xml/") {
    return await getDataFromXML(url, [], path);
}

async function getDataFromXML(url, siteMapIgnore = [], path = __dir + "/source/xml/") {
    let urlIsOk = validateUrl(url, siteMapIgnore);
    if(urlIsOk === false) {
        return [];
    }
    let filePath = await getXML(url, path);
    let data = await parseXMLFile(filePath);
    let urls = await getUrl(data, siteMapIgnore);
    return urls;
}

function validateUrl(url, siteMapIgnore) {
    let result = true;
    if(siteMapIgnore && Array.isArray(siteMapIgnore)) {
        for (let pattern of siteMapIgnore) {
            if(url.match(pattern)) {
                result = false;
                break;
            }
        }
    }
    return result;
}


module.exports.parseXMLFile = async function (filePath) {
    let data = await parseXMLFile(filePath);
    let urls = getUrl(data);
    return urls;
}



async function getUrl(data, siteMapIgnore) {
    let result = [];
    if (data && data.length) {
        for (let urlObj of data) {
            let url = urlObj.loc;
            if (Array.isArray(url)) {
                url = url[0];
            }
            if (url) {
                let fileType = getFileType(url);
                if(['gz', 'xml'].includes(fileType)) {
                    let urls = await getDataFromXML(url, siteMapIgnore);
                    if(urls) {
                        result = result.concat(urls);
                    }
                } else {
                    result.push(url);
                }
            }
        }
    }
    return result;
}

function buildFileName(url) {
    let urlInfo = URL.parse(url);
    let name = urlInfo.hostname + urlInfo.path;
    let nameInfo = name.split('/');
    name = nameInfo.join('-');
    return name;
}

async function getXML(url, path) {
    let filePath = await downloadXML(url, path);
    let fileType = getFileType(filePath);
    let result = filePath;
    if (fileType.includes('gz')) {
        let newPath = filePath + '.xml';
        await decompressFile(filePath, newPath)
        result = newPath;
    }
    return result;
}

function downloadXML(url, path) {
    return new Promise(function (resolve, reject) {
        let fileName = buildFileName(url);
        if (fileName && fileName.includes('?')) {
            fileName = fileName.split('?')[0];
        }
        let filePath = path + fileName;

        const file = fs.createWriteStream(filePath);
        https.get(url,{
            "headers": {
                "accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8",
                "accept-language": "en-US,en;q=0.9,vi;q=0.8",
                "sec-ch-ua": "\" Not A;Brand\";v=\"99\", \"Chromium\";v=\"96\", \"Google Chrome\";v=\"96\"",
                "sec-ch-ua-mobile": "?0",
                "sec-ch-ua-platform": "\"macOS\"",
                "sec-fetch-dest": "image",
                "sec-fetch-mode": "no-cors",
                "sec-fetch-site": "same-origin",
                "cookie": "_sa=sa.1.4527671990.1641520410; sprdConsent=%7B%22active%22%3Afalse%2C%22necessary%22%3Atrue%2C%22functional%22%3Atrue%2C%22performance%22%3Atrue%2C%22remarketing%22%3Atrue%7D; sprdUserKey=5309C14C-97B8-4401-A390-A448B6A61CA9; AMCVS_68044180541804760A4C98A5%40AdobeOrg=1; direct_affiliate=2562; affiliate=2562; any_affiliate=|2562; _gcl_au=1.1.814314377.1641520415; s_ecid=MCMID%7C60853680950731782210459656097216172013; s_cc=true; _gid=GA1.2.1867351085.1641520415; _fbp=fb.1.1641520415809.2132082363; _hjSessionUser_1655720=eyJpZCI6ImM1NTE1YjMyLTI2ZjgtNWQyNS1hNmVkLTNlYzA2YzMzZWRkZiIsImNyZWF0ZWQiOjE2NDE1MjA0MTYxMDQsImV4aXN0aW5nIjp0cnVlfQ==; sprdCookieAccepted=true; AMCV_68044180541804760A4C98A5%40AdobeOrg=-2121179033%7CMCIDTS%7C19000%7CMCMID%7C60853680950731782210459656097216172013%7CMCAAMLH-1642132566%7C3%7CMCAAMB-1642132566%7CRKhpRz8krg2tLO6pguXWp5olkAcUniQYPHaMWWgdJ3xzPWQmdj0y%7CMCOPTOUT-1641534966s%7CNONE%7CMCAID%7CNONE%7CvVersion%7C5.3.0; PP=CYO%20%7C%20Designer%20%7C%20sketchomat; p_url=https%3A%2F%2Fwww.spreadshirt.com%2F; _ga=GA1.2.1788790964.1641520415; _ga_R9PB59NC4J=GS1.1.1641527748.2.1.1641528313.0",
                "Referrer-Policy": "strict-origin-when-cross-origin",
                "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.93 Safari/537.36"
            },
        }, response => {
            const stream = response.pipe(file);
            stream.on("finish", function () {
                resolve(filePath)
            });
            stream.on('error', function (e) {
                reject(e);
            })
        }).on('error', (e) => {
            reject(e);
        });
    });
}

function getFileType(filePath) {
    if(!filePath) {
        return '';
    }
    let urlSplit = filePath.split('.');
    return urlSplit.pop();
}

async function parseXMLFile(filePath) {
    let result = [];
    let fileData = fs.readFileSync(filePath);
    let jsonData =  await XMLParser.parseStringPromise(fileData);
    if (jsonData.urlset != null && jsonData.urlset.url != null) {
        result = jsonData.urlset.url;
    } else if (jsonData.sitemapindex != null && jsonData.sitemapindex.sitemap) {
        result = jsonData.sitemapindex.sitemap;
    }
    return result;
}


async function decompressFile(filePath, newFilePath) {
    return new Promise(function (resolve, reject) {
        const unzip = zlib.createUnzip();
        const fs = require('fs');
        const input = fs.createReadStream(filePath);
        const out = fs.createWriteStream(newFilePath);
        var stream = input.pipe(unzip).pipe(out);
        stream.on('finish', function () {
            resolve()
        });

    })

}