const Cheerio = require('cheerio');
const urlParse = require('url').parse;
const urlResolve = require('url').resolve;
const ignoreFileRegex = new RegExp('\.(gif|jpg|jpeg|tiff|png|php)$', 'i');

class Base {

    constructor(config) {
        this.className = '';
        this.page = '';
        this.browser = null;
        this.browserPriority = null;
        this.config = config;

    }


    getValueElm(elmObj, domConfig, pageDomText = '') {
        const self = this;
        let configType = typeof domConfig;
        let result = '';
        if (configType === 'string') {
            result = self.getText(elmObj, domConfig);
        }
        if (configType === 'object') {
            if (domConfig.elm) {
                let dom = domConfig.elm;
                result = dom === 'self' ? elmObj.attr(domConfig.attr)
                    : domConfig.attr ? elmObj.find(dom).attr(domConfig.attr) : self.getText(elmObj, dom);
                if (domConfig.regex_advance) {
                    let regex = domConfig.regex_advance.replace('[[attr]]', result);
                    regex = new RegExp(regex, 'mg');
                    let m;
                    while ((m = regex.exec(pageDomText)) !== null) {
                        if (m.index === regex.lastIndex) {
                            regex.lastIndex++;
                        }
                        m.forEach((match, groupIndex) => {
                            result = match;
                        });
                    }
                }
            } else if (domConfig.elms) {
                let resultItems = [];
                let items = elmObj.find(domConfig.elms);
                if (items && items.length > 0) {
                    items.each(function () {
                        let item = Cheerio(this);
                        let value = domConfig.attr ? item.attr(domConfig.attr) : item.text();
                        if (value) {
                            value = value.trim();
                        }
                        resultItems.push(value)
                    });
                }
                if (domConfig.encode === 'json') {
                    result = JSON.stringify(resultItems);
                }
            } else {
                //body html
                result = elmObj;
            }
            if (result && domConfig.regex) {
                let matchIndex = domConfig.regex.index;
                let matchRegex = result.match(domConfig.regex.RegExp);
                result = matchRegex && matchRegex[matchIndex] ? matchRegex[matchIndex] : '';
            }

            if (result && domConfig.split) {
                let tmp = result.split(domConfig.split.separator);
                let index = domConfig.split.index;
                if (tmp) {
                    if (index === 'last') {
                        result = tmp.pop();
                    } else if (index === 'first') {
                        result = tmp[0];
                    } else {
                        result = tmp[index] ? tmp[index] : '';
                    }
                }
            }

            if (result && domConfig.replace) {
                let replaceConfig = domConfig.replace;
                if (!Array.isArray(domConfig.replace)) {
                    replaceConfig = [domConfig.replace];
                }
                for (let key in replaceConfig) {
                    let replaceItem = replaceConfig[key];
                    result = result.replace(replaceItem.pattern, replaceItem.replacement);
                }
            }


            if (!result && domConfig.or) {
                let orElms = domConfig.or;
                if (!Array.isArray(domConfig.or)) {
                    orElms = [domConfig.or];
                }
                for (let orElm of orElms) {
                    result =  self.getValueElm(elmObj, orElm);
                    if (result) {
                        break;
                    }
                }
            }


        }
        if (result && domConfig.toNumber) {
            result = self.toNumber(result);
        } else if (result && typeof result.trim === "function") {
            result = result.trim();
        }
        return result;
    }


    getItem(itemObj, config, pageDomText = '') {
        let item = {};
        for (let col in config) {
            if (!config[col]) {
                continue;
            }
            let domElm = config[col];
            item[col] =  this.getValueElm(itemObj, domElm, pageDomText);
        }
        return item;
    }


    getText(elmObj, domConfig) {
        return elmObj.find(domConfig).text();
    }



    getLinks(pageObj, urlInfo) {
        const self = this;
        let links = [].slice.call(pageObj('a'))
            .map(function (element) {
                return self.getLinkUrl(urlInfo, element);
            })
            .filter(function (element) {
                return !!element;
            });
        return links;

    }


    getLinkUrl(urlInfo, childElm) {
        if (childElm.attribs.href == '#') {
            return null;
        }
        var childrenUrl = urlResolve(urlInfo.href, childElm.attribs.href || "");
        if (!urlInfo || !childrenUrl) {
            return null;
        }

        var childrenUrlInfo = urlParse(childrenUrl);
        var compareHostName = !childrenUrlInfo.hostname
            || !urlInfo.hostname
            || (childrenUrlInfo.hostname !== urlInfo.hostname
                && !urlInfo.hostname.includes(childrenUrlInfo.hostname)
                && !childrenUrlInfo.hostname.includes(urlInfo.hostname));
        if (compareHostName
            || !childrenUrlInfo.pathname
            || ignoreFileRegex.test(childrenUrl)
        ) {
            return null;
        }
        // childrenUrl = childrenUrlInfo.protocol + '//' + childrenUrlInfo.hostname + childrenUrlInfo.pathname;
        // childrenUrl = this.trimLastElement(childrenUrl);
        return childrenUrl;
    }


    trimLastElement(url) {
        var to = url.lastIndexOf('/');
        if (to === (url.length - 1)) {
            url = url.substring(0, to);
        }
        return url;
    }

    toNumber(str) {
        return str ? Number(str.replace(/[^0-9.-]+/g,"")) : 0;
    }


}

module.exports = Base;

