type LinkType = 'text' | 'url' | 'tld';

type LinkFromURL = {
    id: number;
    linkType: LinkType;
    value: string;
    url?: string;
};

const URL_PROTOCOL_REGEX = /(https?|ftp?:\/\/[^\s]+)/im;
/*
 * NOTEs:
 * - This implementation deems [a-zA-Z0-9-] for valid constituents of a domain
 * - As currently there are no 'commercial' 1 character TLDs, we deem a valid domain/subdomain to be of length 2-63
 * Known limitations:
 * - 'google.co?&.uk' will match because once the regex meets a non ., non-domain character (? in this case) it matches anything
 *
 * RegEx breakdown:
 * (
 * (
 * ?:https?:\/\/ - matches http(s) protocol
 * |
 * ftp?:\/\/ - matches ft(p) protocol
 * |
 * [a-zA-Z0-9-]+\.{1}[a-zA-Z0-9-]{2,63} - matches a single domain without a protocol prefix (google.com) for example
 * )
 * THEN
 *
 * (
 * ?:[^\s.]+ - macthes any non ., non whitespace characters exchaustively
 * |
 * \.{1}[a-zA-Z0-9-]{2,63} - matches domain syntax
 * )* - repeat this group 0+ times so that 'a.b' for example is a match
 * )
 */
const URL_REGEX =
    /((?:https?:\/\/|ftp?:\/\/|[a-zA-Z0-9-]+\.{1}[a-zA-Z0-9-]{2,63})(?:[^\s.]+|\.{1}[a-zA-Z0-9-]{2,63})*)/im;

export const splitByURLs = (text: string) =>
    text.split(URL_REGEX).filter(Boolean);

export const isTLD = (text: string) => !URL_PROTOCOL_REGEX.test(text);

export const generateURLsFromText = (text: string): LinkFromURL[] => {
    return splitByURLs(text).map((textBlock, i) => {
        if (URL_REGEX.test(textBlock)) {
            if (isTLD(textBlock)) {
                return {
                    id: i,
                    linkType: 'tld',
                    value: textBlock,
                    url: `http://${textBlock.replace(/(\/\/)/m, '')}`,
                };
            }

            return {id: i, linkType: 'url', value: textBlock, url: textBlock};
        }

        return {id: i, linkType: 'text', value: textBlock};
    });
};
