/*
// Configuration settings
clearurls {
    strip-notices no; // default yes
    exempt-channels "#help,#staff*,#private";
}
*/

#include "unrealircd.h"

#define MAX_URL_LEN 450
#define URL_PATTERN "https?://[^\\s<>]+"

int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs);
int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type);
static int is_channel_exempt(const char *chan_name);
struct configstruct {
    int strip_notices;
    char *exempt_channels;
};
static struct configstruct conf;

typedef struct {
    const char *name;            /* Friendly name for the pattern */
    const char *base_pattern;    /* PCRE regex for URL matching */
    const char *tracking_params; /* Comma-separated list of parameters to strip */
} URLPattern;

static URLPattern url_patterns[] = {
    {
        .name = "Google",
        .base_pattern = "https?://([^/]*\\.)?google\\.",
        .tracking_params = "gclid,gclsrc,dclid,gbraid,wbraid,_ga",
    },
    {
        .name = "Facebook",
        .base_pattern = "https?://([^/]*\\.)?(facebook\\.com|fb\\.com|fbcdn\\.net)",
        .tracking_params = "fbclid,fb_action_ids,fb_action_types,fb_source,fb_ref,action_object_map,action_type_map,action_ref_map",
    },
    {
        .name = "Amazon",
        .base_pattern = "https?://([^/]*\\.)?amazon\\.",
        .tracking_params = "pd_rd_i,pd_rd_r,pd_rd_w,pd_rd_wg,pf_rd_i,pf_rd_m,pf_rd_p,pf_rd_r,pf_rd_s,pf_rd_t,psc,qid,ref_,tag",
    },
    {
        .name = "YouTube",
        .base_pattern = "https?://([^/]*\\.)?(youtube\\.com|youtu\\.be)",
        .tracking_params = "feature,gclid,kw,si,pp",
    },
    {
        .name = "Twitter/X",
        .base_pattern = "https?://([^/]*\\.)?(twitter\\.com|x\\.com|t\\.co)",
        .tracking_params = "s,t,cn,ref_src,ref_url,twclid",
    },
    {
        .name = "Instagram",
        .base_pattern = "https?://([^/]*\\.)?instagram\\.com",
        .tracking_params = "igshid,igsh",
    },
    {
        .name = "TikTok",
        .base_pattern = "https?://([^/]*\\.)?(tiktok\\.com|vm\\.tiktok\\.com)",
        .tracking_params = "is_copy_url,is_from_webapp,sender_device,sender_web_id",
    },
    {
        .name = "LinkedIn",
        .base_pattern = "https?://([^/]*\\.)?linkedin\\.com",
        .tracking_params = "trk,trkInfo,trackingId,refId,originalReferer",
    },
    {
        .name = "Reddit",
        .base_pattern = "https?://([^/]*\\.)?reddit\\.com",
        .tracking_params = "ref,ref_source,rdt_cid",
    },
    {
        .name = "Spotify",
        .base_pattern = "https?://([^/]*\\.)?spotify\\.com",
        .tracking_params = "si,context,dl_branch,nd",
    },
    {
        .name = "Pinterest",
        .base_pattern = "https?://([^/]*\\.)?pinterest\\.",
        .tracking_params = "source,campaign",
    },
    {
        .name = "Generic Tracking",
        .base_pattern = "https?://",
        .tracking_params = "utm_source,utm_medium,utm_campaign,utm_term,utm_content,utm_id,utm_source_platform,utm_creative_format,utm_marketing_tactic,_hsenc,_hsmi,mc_cid,mc_eid,mkt_tok,oly_anon_id,oly_enc_id,rb_clickid,s_cid,vero_id,wickedid,yclid,msclkid",
    },

    { .name = NULL }  /* marks end of list */
};

ModuleHeader MOD_HEADER = {
    "third/clearurls",
    "0.0.1",
    "Strip tracking parameters from URLs",
    "roger",
    "unrealircd-6",
};

/* Function prototypes */
static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx);
static int should_strip_param(const char *url, const char *param);
static char *strip_tracking_params(const char *text);
static char *process_url(const char *url);

/* Compiled regex patterns */
static Match **compiled_patterns = NULL;
static Match *url_detect_pattern = NULL;

MOD_INIT() {
    HookAdd(modinfo->handle, HOOKTYPE_CAN_SEND_TO_CHANNEL, 0, clearurls_chanmsg);
    HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, queue_configrun);
    return MOD_SUCCESS;
}

MOD_LOAD() {
    int i, count;

    /* Compile URL detection pattern */
    url_detect_pattern = unreal_create_match(MATCH_PCRE_REGEX, URL_PATTERN, NULL);
    if (!url_detect_pattern) {
        unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL,
                  "Failed to compile URL detection regex - module load aborted");
        return MOD_FAILED;
    }

    /* Count number of patterns */
    for (count = 0; url_patterns[count].name != NULL; count++);

    /* Allocate array for compiled patterns */
    compiled_patterns = safe_alloc((count + 1) * sizeof(Match *));

    /* Compile all regex patterns once during load */
    for (i = 0; url_patterns[i].name != NULL; i++) {
        compiled_patterns[i] = unreal_create_match(MATCH_PCRE_REGEX, url_patterns[i].base_pattern, NULL);

        if (!compiled_patterns[i]) {
            unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL,
                      "Failed to compile regex for pattern: $pattern_name - module load aborted",
                      log_data_string("pattern_name", url_patterns[i].name));

            /* Clean up any patterns that were successfully compiled */
            for (int j = 0; j < i; j++) {
                if (compiled_patterns[j]) {
                    unreal_delete_match(compiled_patterns[j]);
                }
            }
            safe_free(compiled_patterns);
            compiled_patterns = NULL;

            return MOD_FAILED;
        }
    }
    return MOD_SUCCESS;
}

MOD_UNLOAD() {
    int i;

    /* Free config strings */
    if (conf.exempt_channels) {
        safe_free(conf.exempt_channels);
    }
    memset(&conf, 0, sizeof(conf));

    /* Free URL detection pattern */
    if (url_detect_pattern) {
        unreal_delete_match(url_detect_pattern);
        safe_free(url_detect_pattern);
        url_detect_pattern = NULL;
    }

    /* Free all compiled regex patterns */
    if (compiled_patterns) {
        for (i = 0; compiled_patterns[i] != NULL || url_patterns[i].name != NULL; i++) {
            if (compiled_patterns[i]) {
                unreal_delete_match(compiled_patterns[i]);
            }
        }
        safe_free(compiled_patterns);
        compiled_patterns = NULL;
    }
    return MOD_SUCCESS;
}

MOD_TEST() {
    HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, queue_configtest);
    return MOD_SUCCESS;
}

static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx) {
    /* Allow PRIVMSG. Allow NOTICE only if config enabled. */
    if (sendtype == SEND_TYPE_NOTICE) {
        if (!conf.strip_notices)
            return 0;
    } else if (sendtype != SEND_TYPE_PRIVMSG) {
        return 0;
    }

    /* Check if the channel is exempted */
    if (channel && is_channel_exempt(channel->name))
        return 0;

    if(!MyUser(client))
        return 0;

    if (!*text || !**text || strlen(*text) <= 7)
        return 0;

    /* Check if message contains a URL */
    if (!unreal_match(url_detect_pattern, *text))
        return 0;

    char *cleaned;
    char *dup = NULL;

    // we have a url in the message, lets process it
    cleaned = strip_tracking_params(*text);
    if (!cleaned)
        return 0;

    if (strcmp(*text, cleaned) != 0) {
        /* duplicate the cleaned text since *text expects const char* */
        safe_strdup(dup, cleaned);
        *text = dup;
    }
    safe_free(cleaned);

    return 0;
}

/* Extract and process URLs from text */
static char *strip_tracking_params(const char *text) {
    const char *p = text;
    size_t len;
    len = strlen(text);
    char *result = safe_alloc(len + 1);
    char *out = result;

    while (*p) {
        /* Simple URL detection: look for http:// or https:// */
        if (strncmp(p, "http://", 7) == 0 || strncmp(p, "https://", 8) == 0) {
            const char *url_start = p;
            const char *url_end = p;
            char url_buf[512];
            char *cleaned_url;
            size_t url_len;

            /* Find end of URL */
            while (*url_end && !strchr(" \n\r<>", *url_end)) {
                url_end++;
            }

            url_len = url_end - url_start;
            if (url_len >= sizeof(url_buf))
                url_len = sizeof(url_buf) - 1;

            strncpy(url_buf, url_start, url_len);
            url_buf[url_len] = '\0';

            /* Process the URL */
            cleaned_url = process_url(url_buf);
            if (cleaned_url) {
                strcpy(out, cleaned_url);
                out += strlen(cleaned_url);
                safe_free(cleaned_url);
            } else {
                strcpy(out, url_buf);
                out += strlen(url_buf);
            }
            /* Move past the URL */
            p = url_end;
        } else {
            /* Regular character, just copy it */
            *out++ = *p++;
        }
    }

    *out = '\0';
    return result;
}

/* Process a single URL and strip tracking parameters */
static char *process_url(const char *url) {
    char *result = safe_alloc(strlen(url) + 1);
    const char *query_start = strchr(url, '?');
    char *out = result;

    if (!query_start) {
        strcpy(result, url);
        return result;
    }

    /* Copy URL up to the query string */
    size_t base_len = query_start - url;
    strncpy(result, url, base_len);
    out = result + base_len;

    /* Process query parameters */
    const char *p = query_start + 1;
    char param_buf[128];
    int first_param = 1;

    *out++ = '?';

    while (*p) {
        const char *param_start = p;
        const char *equal = strchr(p, '=');
        const char *amp = strchr(p, '&');
        const char *param_end = amp ? amp : (p + strlen(p));

        if (!equal || equal > param_end)
            equal = param_end;

        size_t param_len = equal - param_start;
        if (param_len >= sizeof(param_buf))
            param_len = sizeof(param_buf) - 1;

        strncpy(param_buf, param_start, param_len);
        param_buf[param_len] = '\0';

        /* Check if this parameter should be stripped */
        if (!should_strip_param(url, param_buf)) {
            /* Keep this parameter */
            if (!first_param)
                *out++ = '&';

            size_t copy_len = param_end - param_start;
            strncpy(out, param_start, copy_len);
            out += copy_len;
            first_param = 0;
        }

        p = param_end;
        if (*p == '&')
            p++;
    }

    /* Remove trailing '?' */
    if (out > result && *(out - 1) == '?')
        out--;

    *out = '\0';
    return result;
}

/* Check if a parameter should be stripped based on URL patterns */
static int should_strip_param(const char *url, const char *param) {
    char *p = NULL, *token = NULL, *params_copy = NULL;
    int i;

    if (!compiled_patterns)
        return 0;

    for (i = 0; url_patterns[i].name != NULL; i++) {
        if (unreal_match(compiled_patterns[i], url)) {
            /* URL matches pattern, check if parameter should be stripped */
            safe_strdup(params_copy, url_patterns[i].tracking_params);
            token = strtok_r(params_copy, ",", &p);

            while (token) {
                /* Trim whitespace */
                while (*token == ' ')
                    token++;

                if (strcasecmp(token, param) == 0) {
                    safe_free(params_copy);
                    return 1;  /* Strip this parameter */
                }
                token = strtok_r(NULL, ",", &p);
            }
            safe_free(params_copy);
        }
    }

    return 0; /* Don't strip */
}

int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs) {
    ConfigEntry *cep;
    int errors = 0;

    if (type != CONFIG_MAIN || !ce || !ce->name || strcmp(ce->name, "clearurls"))
        return 0;

    for (cep = ce->items; cep; cep = cep->next) {
        if (!cep->name)
            continue;

        if (!strcmp(cep->name, "strip-notices")) {
            if (config_checkval(cep->value, CFG_YESNO) == -1) {
                config_error("%s:%d: clearurls::strip-notices must be 'yes' or 'no'",
                             cep->file->filename, cep->line_number);
                errors++;
            }
            continue;
        }

        if (!strcmp(cep->name, "exempt-channels")) {
            if (!cep->value || (strlen(cep->value) == 0)) {
                config_error("%s:%d: clearurls::exempt-channels cannot be empty if defined",
                             cep->file->filename, cep->line_number);
                errors++;
            }
            continue;
        }

        /*  found an unknown variable inside our block */
        config_warn("%s:%d: unknown directive clearurls::%s",
                    cep->file->filename, cep->line_number, cep->name);
    }

    *errs = errors;
    return errors ? -1 : 1;
}

int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type) {
    ConfigEntry *cep;

    if (type != CONFIG_MAIN || !ce || !ce->name || strcmp(ce->name, "clearurls"))
        return 0;

    if (conf.exempt_channels) {
        safe_free(conf.exempt_channels);
    }
    memset(&conf, 0, sizeof(conf));

    /* Default: Strip notices is ON */
    conf.strip_notices = 1;

    /* Check for clearurls { ... } block */
    for(cep = ce->items; cep; cep = cep->next) {
        if(!cep->name)
            continue;

        if(!strcmp(cep->name, "strip-notices")) {
            /* Reads values like "yes", "no", "1", "0", "true", "false" */
            conf.strip_notices = config_checkval(cep->value, CFG_YESNO);
            continue;
        }
        if (!strcmp(cep->name, "exempt-channels")) {
            /* Make a safe copy of the string */
            safe_free(conf.exempt_channels);
            safe_strdup(conf.exempt_channels, cep->value);
            continue;
        }
    }
    return 1;
}

static int is_channel_exempt(const char *chan_name) {
    char *list = NULL, *p = NULL, *token = NULL;
    int result = 0;

    /* If no exemptions are configured, return 0 (false) */
    if (!conf.exempt_channels)
        return 0;

    /* Create a temporary copy because strtok modifies the string */
    safe_strdup(list, conf.exempt_channels);
    if (!list)
        return 0;

    /* Iterate over comma-separated values */
    for (token = strtok_r(list, ",", &p); token; token = strtok_r(NULL, ",", &p)) {
        /* match_simple handles wildcards: * matches anything, ? matches one char */
        if (match_simple(token, chan_name)) {
            result = 1; /* Match found, channel is exempt */
            break;
        }
    }

    safe_free(list);
    return result; /* No match found */
}