const Utils = require(__dir + '/src/helper/utils');
const TaskQueue = require(__dir + '/src/helper/queue');
const Service = require(__dir + '/src/helper/service');
const Teepublic = require(__dir + '/src/parser/teepublic');
const TeepublicParser = new Teepublic();
const CrawlProductData = require(__dir + '/src/models/crawl_product_data');
const CrawlProductUrl = require(__dir + '/src/models/crawl_product_url');
const CrawlProductHistory = require(__dir + '/src/models/crawl_product_history');
const CrawlProductDataObj = new CrawlProductData();
const CrawlProductUrlObj = new CrawlProductUrl();
const CrawlProductHistoryObj = new CrawlProductHistory();
const enableHostName = [
    'www.teepublic.com',
    'teepublic.com',
]
const maxDepth = 15;
const regexCheckProduct = new RegExp('.*\\/.*\\/([\\d]+)-.*');
const UrlLog = new Set();
class TeePublic {
    constructor() {
        this.handleProductQueue = new TaskQueue(7, 'handle product');
        this.spiderQueue = new TaskQueue(10, 'spider product');
        this.insertProductUrlQueue = new TaskQueue(20, 'queue product');
    }

    async spider(url, nesting, history) {
        this.log("request", url);
        if (!this.validateSpider(url, nesting, history)) {
            this.log('validateSpider ', url);
            return Promise.resolve();
        }
        let response = await Service.sendRequest(url);
        history.add(url);
        if (response) {
            CrawlProductHistoryObj.Insert({url:url});
            await CrawlProductUrlObj.InsertOrUpdate({
                url: url,
                is_crawl: 1
            });
            this.log("response ", response.length);
            if (this.isProductUrl(url)) {
                await CrawlProductDataObj.InsertOrUpdate({
                    url: url,
                    data: response
                });
            } else {
               this.log('not product ', url);
            }
            let parseData = TeepublicParser.parse(url, response);
            if (parseData && parseData.links) {
                for (let url of parseData.links) {
                    url = Utils.buildUrl(url);
                    if (UrlLog.has(url)) {
                        continue;
                    }
                    if (url.includes('/t-shirt/')) {
                        this.pushInsertUrl(url);
                    }
                }
            } else {
                this.log('parseData.links is empty ', url);
            }
        } else {
            this.log('response is empty ', url);
        }
    }
    log() {
        Utils.log(...arguments);
    }

    isProductUrl(url) {
        return regexCheckProduct.test(url);
    }

    pushSpider(link, nesting, history) {
        let self = this;
        let task = () => {
            return self.spider(link, nesting, history).catch(function (e) {
                console.log('err request:' + link, e);
                return Promise.resolve();
            });
        };
        self.spiderQueue.pushTask(task);

    }

    pushInsertUrl(url) {
        let task = () => {
            return CrawlProductUrlObj.InsertOrUpdate({
                url: url
            }).catch(function (e) {
                console.log('err pushInsertUrl:' + link);
                return Promise.resolve();
            });
        };
        this.insertProductUrlQueue.pushTask(task);
    }

    pushMultiInsertUrl(items) {
        let task = () => {
            return CrawlProductUrlObj.MultiInsert(items).catch(function (e) {
                console.log('err pushInsertUrl:' + link, e);
                return Promise.resolve();
            });
        };
        this.insertProductUrlQueue.pushTask(task);
    }

    validateSpider(url, nesting, history) {
        let result = true;
        let hostname = Utils.getHostnameFromUrl(url);
        if (!url
            || nesting > maxDepth
            || history.has(url)
            || !enableHostName.includes(hostname)) {
            result = false;
        }

        return result;
    }


    async handleCategory() {
        let maxPage = await this.getMaxId();
        for (let i = 1; i <= maxPage; i++) {
            let url = 'https://www.teepublic.com/stickers?page=' + i;
            let urls = await this.getProductUrl(url);
            if (urls) {
                this.handleProducts(urls);
            }
        }
    }

    handleProducts(urls) {
        let self = this;
        if (urls) {
            for (let url of urls) {
                let task = () => {
                    return self.handleProduct(url).catch(function (e) {
                        console.log('err request:' + url);
                        return Promise.resolve();
                    });
                };
                this.handleProductQueue.pushTask(task);
            }
        }
    }

    async handleProduct(url) {
        let isExists = await CrawlProductDataObj.getFirst({url: url});
        if (isExists) {
            console.log('exists url:' + url);
        } else {
            let response = await Service.sendRequest(url);
            if (response) {
                let data = {
                    url: url,
                    data: response
                }
                await CrawlProductDataObj.InsertOrUpdate(data);
            }
        }
    }

    async getProductUrl(url) {
        let result = [];
        let response = await Service.sendRequest(url);
        if (response) {
            let parseData = TeepublicParser.parse(url, response);
            if (parseData && parseData.links) {
                for (let link of parseData.links) {
                    if (link.includes('/t-shirt/')
                        || link.includes('/sticker/')
                    ) {
                        result.push(link);
                    }
                }
            }
        }

        return result;
    }


    async getMaxId() {
        let result = 250;
        const url = 'https://www.teepublic.com/t-shirts';
        let response = await Service.sendRequest(url);
        let parseData = TeepublicParser.parse(url, response);
        if (parseData && parseData.links) {
            for (let link of parseData.links) {
                if (!link.includes('t-shirts?page')) {
                    continue;
                }
                let urlObj = new URL(url);
                let page = urlObj.searchParams.get('page');
                if (page && page > result) {
                    result = page;
                }

            }
        }
        return result;
    }

    async sleep(ms) {
        await new Promise((res) => {
            setTimeout(res, ms)
        });
    }

    async crawlCategories() {
        const categories = ['t-shirt'];
        const histories = await this.buildHistories();
        const tmpSet = new Set();
        for (let category of categories) {
            let urls = await CrawlProductUrlObj.getByCat(category);
            let num = 0;
            for (let url of urls) {
                if (histories.has(url)) {
                    continue;
                }
                num++;
                histories.add(url);
                this.pushSpider(url, maxDepth, tmpSet);
            }
            console.log('CATEGORY ', category, ' ',  urls.length, ' pass: ' , num);
        }

    }

    async buildHistories() {
        let result = new Set();
        let urls = await CrawlProductDataObj.getAllUrl();
        if (urls) {
            for (let url of urls) {
                result.add(url);
            }
        }
        return result;
    }


    async crawlTag() {
        const crawlSet = new Set();
        let urls = await CrawlProductUrlObj.getTag('/t-shirts/');
        for (let url of urls){
            this.pushSpider(url, 1, crawlSet);
        }
    }

    async buildTagUrl() {
        let items = new Map();
        let urls = await CrawlProductUrlObj.getTag('/t-shirts/');
        const self = this;
        for (let url of urls){
            let pageId = 1;
            let urlObj = new URL(url);
            let path = urlObj.pathname;
            let item = items.has(path) ? items.get(path) : {
                url: urlObj.origin + path,
                min: 1,
                max: pageId,
            }

            if (urlObj.searchParams.get('page') && urlObj.searchParams.get('page') > item.max) {
                item.max = parseInt(urlObj.searchParams.get('page'));

            }
            items.set(path, item);
        }
        for (let item of items) {
            item = item[1];
            if (item.max > 1) {
                let allTag = [];
                for (var i = 1; i <= item.max; i++) {
                    let url = item.url;
                    if (i > 1) {
                        url += '?page='+ i
                    }
                    //self.pushSpider(url, maxDepth, history);
                    let data = CrawlProductUrlObj.buildData({url: url});
                    allTag.push(data);
                }
                self.pushMultiInsertUrl(allTag);
            } else {
                //self.pushSpider(item.url, maxDepth, history);
            }

        }
    }






}

module.exports = TeePublic;