/* // Configuration settings clearurls { strip-notices no; // default yes exempt-channels "#help,#staff*,#private"; } */ #include "unrealircd.h" #define MAX_URL_LEN 450 #define URL_PATTERN "https?://[^\\s<>]+" int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs); int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type); static int is_channel_exempt(const char *chan_name); struct configstruct { int strip_notices; char *exempt_channels; }; static struct configstruct conf; typedef struct { const char *name; /* Friendly name for the pattern */ const char *base_pattern; /* PCRE regex for URL matching */ const char *tracking_params; /* Comma-separated list of parameters to strip */ } URLPattern; static URLPattern url_patterns[] = { { .name = "Google", .base_pattern = "https?://([^/]*\\.)?google\\.", .tracking_params = "gclid,gclsrc,dclid,gbraid,wbraid,_ga", }, { .name = "Facebook", .base_pattern = "https?://([^/]*\\.)?(facebook\\.com|fb\\.com|fbcdn\\.net)", .tracking_params = "fbclid,fb_action_ids,fb_action_types,fb_source,fb_ref,action_object_map,action_type_map,action_ref_map", }, { .name = "Amazon", .base_pattern = "https?://([^/]*\\.)?amazon\\.", .tracking_params = "pd_rd_i,pd_rd_r,pd_rd_w,pd_rd_wg,pf_rd_i,pf_rd_m,pf_rd_p,pf_rd_r,pf_rd_s,pf_rd_t,psc,qid,ref_,tag", }, { .name = "YouTube", .base_pattern = "https?://([^/]*\\.)?(youtube\\.com|youtu\\.be)", .tracking_params = "feature,gclid,kw,si,pp", }, { .name = "Twitter/X", .base_pattern = "https?://([^/]*\\.)?(twitter\\.com|x\\.com|t\\.co)", .tracking_params = "s,t,cn,ref_src,ref_url,twclid", }, { .name = "Instagram", .base_pattern = "https?://([^/]*\\.)?instagram\\.com", .tracking_params = "igshid,igsh", }, { .name = "TikTok", .base_pattern = "https?://([^/]*\\.)?(tiktok\\.com|vm\\.tiktok\\.com)", .tracking_params = "is_copy_url,is_from_webapp,sender_device,sender_web_id", }, { .name = "LinkedIn", .base_pattern = "https?://([^/]*\\.)?linkedin\\.com", .tracking_params = "trk,trkInfo,trackingId,refId,originalReferer", }, { .name = "Reddit", .base_pattern = "https?://([^/]*\\.)?reddit\\.com", .tracking_params = "ref,ref_source,rdt_cid", }, { .name = "Spotify", .base_pattern = "https?://([^/]*\\.)?spotify\\.com", .tracking_params = "si,context,dl_branch,nd", }, { .name = "Pinterest", .base_pattern = "https?://([^/]*\\.)?pinterest\\.", .tracking_params = "source,campaign", }, { .name = "Generic Tracking", .base_pattern = "https?://", .tracking_params = "utm_source,utm_medium,utm_campaign,utm_term,utm_content,utm_id,utm_source_platform,utm_creative_format,utm_marketing_tactic,_hsenc,_hsmi,mc_cid,mc_eid,mkt_tok,oly_anon_id,oly_enc_id,rb_clickid,s_cid,vero_id,wickedid,yclid,msclkid", }, { .name = NULL } /* marks end of list */ }; ModuleHeader MOD_HEADER = { "third/clearurls", "0.0.1", "Strip tracking parameters from URLs", "roger", "unrealircd-6", }; /* Function prototypes */ static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx); static int should_strip_param(const char *url, const char *param); static char *strip_tracking_params(const char *text); static char *process_url(const char *url); /* Compiled regex patterns */ static Match **compiled_patterns = NULL; static Match *url_detect_pattern = NULL; MOD_INIT() { HookAdd(modinfo->handle, HOOKTYPE_CAN_SEND_TO_CHANNEL, 0, clearurls_chanmsg); HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, queue_configrun); return MOD_SUCCESS; } MOD_LOAD() { int i, count; /* Compile URL detection pattern */ url_detect_pattern = unreal_create_match(MATCH_PCRE_REGEX, URL_PATTERN, NULL); if (!url_detect_pattern) { unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL, "Failed to compile URL detection regex - module load aborted"); return MOD_FAILED; } /* Count number of patterns */ for (count = 0; url_patterns[count].name != NULL; count++); /* Allocate array for compiled patterns */ compiled_patterns = safe_alloc((count + 1) * sizeof(Match *)); /* Compile all regex patterns once during load */ for (i = 0; url_patterns[i].name != NULL; i++) { compiled_patterns[i] = unreal_create_match(MATCH_PCRE_REGEX, url_patterns[i].base_pattern, NULL); if (!compiled_patterns[i]) { unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL, "Failed to compile regex for pattern: $pattern_name - module load aborted", log_data_string("pattern_name", url_patterns[i].name)); /* Clean up any patterns that were successfully compiled */ for (int j = 0; j < i; j++) { if (compiled_patterns[j]) { unreal_delete_match(compiled_patterns[j]); } } safe_free(compiled_patterns); compiled_patterns = NULL; return MOD_FAILED; } } return MOD_SUCCESS; } MOD_UNLOAD() { int i; /* Free config strings */ if (conf.exempt_channels) { safe_free(conf.exempt_channels); } memset(&conf, 0, sizeof(conf)); /* Free URL detection pattern */ if (url_detect_pattern) { unreal_delete_match(url_detect_pattern); safe_free(url_detect_pattern); url_detect_pattern = NULL; } /* Free all compiled regex patterns */ if (compiled_patterns) { for (i = 0; compiled_patterns[i] != NULL || url_patterns[i].name != NULL; i++) { if (compiled_patterns[i]) { unreal_delete_match(compiled_patterns[i]); } } safe_free(compiled_patterns); compiled_patterns = NULL; } return MOD_SUCCESS; } MOD_TEST() { HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, queue_configtest); return MOD_SUCCESS; } static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx) { /* Allow PRIVMSG. Allow NOTICE only if config enabled. */ if (sendtype == SEND_TYPE_NOTICE) { if (!conf.strip_notices) return 0; } else if (sendtype != SEND_TYPE_PRIVMSG) { return 0; } /* Check if the channel is exempted */ if (channel && is_channel_exempt(channel->name)) return 0; if(!MyUser(client)) return 0; if (!*text || !**text || strlen(*text) <= 7) return 0; /* Check if message contains a URL */ if (!unreal_match(url_detect_pattern, *text)) return 0; char *cleaned; char *dup = NULL; // we have a url in the message, lets process it cleaned = strip_tracking_params(*text); if (!cleaned) return 0; if (strcmp(*text, cleaned) != 0) { /* duplicate the cleaned text since *text expects const char* */ safe_strdup(dup, cleaned); *text = dup; } safe_free(cleaned); return 0; } /* Extract and process URLs from text */ static char *strip_tracking_params(const char *text) { const char *p = text; size_t len; len = strlen(text); char *result = safe_alloc(len + 1); char *out = result; while (*p) { /* Simple URL detection: look for http:// or https:// */ if (strncmp(p, "http://", 7) == 0 || strncmp(p, "https://", 8) == 0) { const char *url_start = p; const char *url_end = p; char url_buf[512]; char *cleaned_url; size_t url_len; /* Find end of URL */ while (*url_end && !strchr(" \n\r<>", *url_end)) { url_end++; } url_len = url_end - url_start; if (url_len >= sizeof(url_buf)) url_len = sizeof(url_buf) - 1; strncpy(url_buf, url_start, url_len); url_buf[url_len] = '\0'; /* Process the URL */ cleaned_url = process_url(url_buf); if (cleaned_url) { strcpy(out, cleaned_url); out += strlen(cleaned_url); safe_free(cleaned_url); } else { strcpy(out, url_buf); out += strlen(url_buf); } /* Move past the URL */ p = url_end; } else { /* Regular character, just copy it */ *out++ = *p++; } } *out = '\0'; return result; } /* Process a single URL and strip tracking parameters */ static char *process_url(const char *url) { char *result = safe_alloc(strlen(url) + 1); const char *query_start = strchr(url, '?'); char *out = result; if (!query_start) { strcpy(result, url); return result; } /* Copy URL up to the query string */ size_t base_len = query_start - url; strncpy(result, url, base_len); out = result + base_len; /* Process query parameters */ const char *p = query_start + 1; char param_buf[128]; int first_param = 1; *out++ = '?'; while (*p) { const char *param_start = p; const char *equal = strchr(p, '='); const char *amp = strchr(p, '&'); const char *param_end = amp ? amp : (p + strlen(p)); if (!equal || equal > param_end) equal = param_end; size_t param_len = equal - param_start; if (param_len >= sizeof(param_buf)) param_len = sizeof(param_buf) - 1; strncpy(param_buf, param_start, param_len); param_buf[param_len] = '\0'; /* Check if this parameter should be stripped */ if (!should_strip_param(url, param_buf)) { /* Keep this parameter */ if (!first_param) *out++ = '&'; size_t copy_len = param_end - param_start; strncpy(out, param_start, copy_len); out += copy_len; first_param = 0; } p = param_end; if (*p == '&') p++; } /* Remove trailing '?' */ if (out > result && *(out - 1) == '?') out--; *out = '\0'; return result; } /* Check if a parameter should be stripped based on URL patterns */ static int should_strip_param(const char *url, const char *param) { char *p = NULL, *token = NULL, *params_copy = NULL; int i; if (!compiled_patterns) return 0; for (i = 0; url_patterns[i].name != NULL; i++) { if (unreal_match(compiled_patterns[i], url)) { /* URL matches pattern, check if parameter should be stripped */ safe_strdup(params_copy, url_patterns[i].tracking_params); token = strtok_r(params_copy, ",", &p); while (token) { /* Trim whitespace */ while (*token == ' ') token++; if (strcasecmp(token, param) == 0) { safe_free(params_copy); return 1; /* Strip this parameter */ } token = strtok_r(NULL, ",", &p); } safe_free(params_copy); } } return 0; /* Don't strip */ } int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs) { ConfigEntry *cep; int errors = 0; if (type != CONFIG_MAIN || !ce || !ce->name || strcmp(ce->name, "clearurls")) return 0; for (cep = ce->items; cep; cep = cep->next) { if (!cep->name) continue; if (!strcmp(cep->name, "strip-notices")) { if (config_checkval(cep->value, CFG_YESNO) == -1) { config_error("%s:%d: clearurls::strip-notices must be 'yes' or 'no'", cep->file->filename, cep->line_number); errors++; } continue; } if (!strcmp(cep->name, "exempt-channels")) { if (!cep->value || (strlen(cep->value) == 0)) { config_error("%s:%d: clearurls::exempt-channels cannot be empty if defined", cep->file->filename, cep->line_number); errors++; } continue; } /* found an unknown variable inside our block */ config_warn("%s:%d: unknown directive clearurls::%s", cep->file->filename, cep->line_number, cep->name); } *errs = errors; return errors ? -1 : 1; } int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type) { ConfigEntry *cep; if (type != CONFIG_MAIN || !ce || !ce->name || strcmp(ce->name, "clearurls")) return 0; if (conf.exempt_channels) { safe_free(conf.exempt_channels); } memset(&conf, 0, sizeof(conf)); /* Default: Strip notices is ON */ conf.strip_notices = 1; /* Check for clearurls { ... } block */ for(cep = ce->items; cep; cep = cep->next) { if(!cep->name) continue; if(!strcmp(cep->name, "strip-notices")) { /* Reads values like "yes", "no", "1", "0", "true", "false" */ conf.strip_notices = config_checkval(cep->value, CFG_YESNO); continue; } if (!strcmp(cep->name, "exempt-channels")) { /* Make a safe copy of the string */ safe_free(conf.exempt_channels); safe_strdup(conf.exempt_channels, cep->value); continue; } } return 1; } static int is_channel_exempt(const char *chan_name) { char *list = NULL, *p = NULL, *token = NULL; int result = 0; /* If no exemptions are configured, return 0 (false) */ if (!conf.exempt_channels) return 0; /* Create a temporary copy because strtok modifies the string */ safe_strdup(list, conf.exempt_channels); if (!list) return 0; /* Iterate over comma-separated values */ for (token = strtok_r(list, ",", &p); token; token = strtok_r(NULL, ",", &p)) { /* match_simple handles wildcards: * matches anything, ? matches one char */ if (match_simple(token, chan_name)) { result = 1; /* Match found, channel is exempt */ break; } } safe_free(list); return result; /* No match found */ }