/*
// Configuration settings
clearurls {
strip-notices no; // default yes
exempt-channels "#help,#staff*,#private";
}
*/
#include "unrealircd.h"
#define MAX_URL_LEN 450
#define URL_PATTERN "https?://[^\\s<>]+"
int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs);
int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type);
static int is_channel_exempt(const char *chan_name);
struct configstruct {
int strip_notices;
char *exempt_channels;
};
static struct configstruct conf;
typedef struct {
const char *name; /* Friendly name for the pattern */
const char *base_pattern; /* PCRE regex for URL matching */
const char *tracking_params; /* Comma-separated list of parameters to strip */
} URLPattern;
static URLPattern url_patterns[] = {
{
.name = "Google",
.base_pattern = "https?://([^/]*\\.)?google\\.",
.tracking_params = "gclid,gclsrc,dclid,gbraid,wbraid,_ga",
},
{
.name = "Facebook",
.base_pattern = "https?://([^/]*\\.)?(facebook\\.com|fb\\.com|fbcdn\\.net)",
.tracking_params = "fbclid,fb_action_ids,fb_action_types,fb_source,fb_ref,action_object_map,action_type_map,action_ref_map",
},
{
.name = "Amazon",
.base_pattern = "https?://([^/]*\\.)?amazon\\.",
.tracking_params = "pd_rd_i,pd_rd_r,pd_rd_w,pd_rd_wg,pf_rd_i,pf_rd_m,pf_rd_p,pf_rd_r,pf_rd_s,pf_rd_t,psc,qid,ref_,tag",
},
{
.name = "YouTube",
.base_pattern = "https?://([^/]*\\.)?(youtube\\.com|youtu\\.be)",
.tracking_params = "feature,gclid,kw,si,pp",
},
{
.name = "Twitter/X",
.base_pattern = "https?://([^/]*\\.)?(twitter\\.com|x\\.com|t\\.co)",
.tracking_params = "s,t,cn,ref_src,ref_url,twclid",
},
{
.name = "Instagram",
.base_pattern = "https?://([^/]*\\.)?instagram\\.com",
.tracking_params = "igshid,igsh",
},
{
.name = "TikTok",
.base_pattern = "https?://([^/]*\\.)?(tiktok\\.com|vm\\.tiktok\\.com)",
.tracking_params = "is_copy_url,is_from_webapp,sender_device,sender_web_id",
},
{
.name = "LinkedIn",
.base_pattern = "https?://([^/]*\\.)?linkedin\\.com",
.tracking_params = "trk,trkInfo,trackingId,refId,originalReferer",
},
{
.name = "Reddit",
.base_pattern = "https?://([^/]*\\.)?reddit\\.com",
.tracking_params = "ref,ref_source,rdt_cid",
},
{
.name = "Spotify",
.base_pattern = "https?://([^/]*\\.)?spotify\\.com",
.tracking_params = "si,context,dl_branch,nd",
},
{
.name = "Pinterest",
.base_pattern = "https?://([^/]*\\.)?pinterest\\.",
.tracking_params = "source,campaign",
},
{
.name = "Generic Tracking",
.base_pattern = "https?://",
.tracking_params = "utm_source,utm_medium,utm_campaign,utm_term,utm_content,utm_id,utm_source_platform,utm_creative_format,utm_marketing_tactic,_hsenc,_hsmi,mc_cid,mc_eid,mkt_tok,oly_anon_id,oly_enc_id,rb_clickid,s_cid,vero_id,wickedid,yclid,msclkid",
},
{ .name = NULL } /* marks end of list */
};
ModuleHeader MOD_HEADER = {
"third/clearurls",
"0.0.1",
"Strip tracking parameters from URLs",
"roger",
"unrealircd-6",
};
/* Function prototypes */
static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx);
static int should_strip_param(const char *url, const char *param);
static char *strip_tracking_params(const char *text);
static char *process_url(const char *url);
/* Compiled regex patterns */
static Match **compiled_patterns = NULL;
static Match *url_detect_pattern = NULL;
MOD_INIT() {
HookAdd(modinfo->handle, HOOKTYPE_CAN_SEND_TO_CHANNEL, 0, clearurls_chanmsg);
HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, queue_configrun);
return MOD_SUCCESS;
}
MOD_LOAD() {
int i, count;
/* Compile URL detection pattern */
url_detect_pattern = unreal_create_match(MATCH_PCRE_REGEX, URL_PATTERN, NULL);
if (!url_detect_pattern) {
unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL,
"Failed to compile URL detection regex - module load aborted");
return MOD_FAILED;
}
/* Count number of patterns */
for (count = 0; url_patterns[count].name != NULL; count++);
/* Allocate array for compiled patterns */
compiled_patterns = safe_alloc((count + 1) * sizeof(Match *));
/* Compile all regex patterns once during load */
for (i = 0; url_patterns[i].name != NULL; i++) {
compiled_patterns[i] = unreal_create_match(MATCH_PCRE_REGEX, url_patterns[i].base_pattern, NULL);
if (!compiled_patterns[i]) {
unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL,
"Failed to compile regex for pattern: $pattern_name - module load aborted",
log_data_string("pattern_name", url_patterns[i].name));
/* Clean up any patterns that were successfully compiled */
for (int j = 0; j < i; j++) {
if (compiled_patterns[j]) {
unreal_delete_match(compiled_patterns[j]);
}
}
safe_free(compiled_patterns);
compiled_patterns = NULL;
return MOD_FAILED;
}
}
return MOD_SUCCESS;
}
MOD_UNLOAD() {
int i;
/* Free config strings */
if (conf.exempt_channels) {
safe_free(conf.exempt_channels);
}
memset(&conf
, 0, sizeof(conf
));
/* Free URL detection pattern */
if (url_detect_pattern) {
unreal_delete_match(url_detect_pattern);
safe_free(url_detect_pattern);
url_detect_pattern = NULL;
}
/* Free all compiled regex patterns */
if (compiled_patterns) {
for (i = 0; compiled_patterns[i] != NULL || url_patterns[i].name != NULL; i++) {
if (compiled_patterns[i]) {
unreal_delete_match(compiled_patterns[i]);
}
}
safe_free(compiled_patterns);
compiled_patterns = NULL;
}
return MOD_SUCCESS;
}
MOD_TEST() {
HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, queue_configtest);
return MOD_SUCCESS;
}
static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx) {
/* Allow PRIVMSG. Allow NOTICE only if config enabled. */
if (sendtype == SEND_TYPE_NOTICE) {
if (!conf.strip_notices)
return 0;
} else if (sendtype != SEND_TYPE_PRIVMSG) {
return 0;
}
/* Check if the channel is exempted */
if (channel && is_channel_exempt(channel->name))
return 0;
if(!MyUser(client))
return 0;
if (!*text
|| !**text
|| strlen(*text
) <= 7)
return 0;
/* Check if message contains a URL */
if (!unreal_match(url_detect_pattern, *text))
return 0;
char *cleaned;
char *dup = NULL;
// we have a url in the message, lets process it
cleaned = strip_tracking_params(*text);
if (!cleaned)
return 0;
if (strcmp(*text
, cleaned
) != 0) {
/* duplicate the cleaned text since *text expects const char* */
safe_strdup(dup, cleaned);
*text = dup;
}
safe_free(cleaned);
return 0;
}
/* Extract and process URLs from text */
static char *strip_tracking_params(const char *text) {
const char *p = text;
size_t len;
char *result = safe_alloc(len + 1);
char *out = result;
while (*p) {
/* Simple URL detection: look for http:// or https:// */
if (strncmp(p
, "http://", 7) == 0 || strncmp(p
, "https://", 8) == 0) {
const char *url_start = p;
const char *url_end = p;
char url_buf[512];
char *cleaned_url;
size_t url_len;
/* Find end of URL */
while (*url_end
&& !strchr(" \n\r<>", *url_end
)) {
url_end++;
}
url_len = url_end - url_start;
if (url_len >= sizeof(url_buf))
url_len = sizeof(url_buf) - 1;
strncpy(url_buf
, url_start
, url_len
);
url_buf[url_len] = '\0';
/* Process the URL */
cleaned_url = process_url(url_buf);
if (cleaned_url) {
safe_free(cleaned_url);
} else {
}
/* Move past the URL */
p = url_end;
} else {
/* Regular character, just copy it */
*out++ = *p++;
}
}
*out = '\0';
return result;
}
/* Process a single URL and strip tracking parameters */
static char *process_url(const char *url) {
char *result
= safe_alloc
(strlen(url
) + 1);
const char *query_start
= strchr(url
, '?');
char *out = result;
if (!query_start) {
return result;
}
/* Copy URL up to the query string */
size_t base_len = query_start - url;
out = result + base_len;
/* Process query parameters */
const char *p = query_start + 1;
char param_buf[128];
int first_param = 1;
*out++ = '?';
while (*p) {
const char *param_start = p;
const char *equal
= strchr(p
, '=');
const char *amp
= strchr(p
, '&');
const char *param_end
= amp
? amp
: (p
+ strlen(p
));
if (!equal || equal > param_end)
equal = param_end;
size_t param_len = equal - param_start;
if (param_len >= sizeof(param_buf))
param_len = sizeof(param_buf) - 1;
strncpy(param_buf
, param_start
, param_len
);
param_buf[param_len] = '\0';
/* Check if this parameter should be stripped */
if (!should_strip_param(url, param_buf)) {
/* Keep this parameter */
if (!first_param)
*out++ = '&';
size_t copy_len = param_end - param_start;
strncpy(out
, param_start
, copy_len
);
out += copy_len;
first_param = 0;
}
p = param_end;
if (*p == '&')
p++;
}
/* Remove trailing '?' */
if (out > result && *(out - 1) == '?')
out--;
*out = '\0';
return result;
}
/* Check if a parameter should be stripped based on URL patterns */
static int should_strip_param(const char *url, const char *param) {
char *p = NULL, *token = NULL, *params_copy = NULL;
int i;
if (!compiled_patterns)
return 0;
for (i = 0; url_patterns[i].name != NULL; i++) {
if (unreal_match(compiled_patterns[i], url)) {
/* URL matches pattern, check if parameter should be stripped */
safe_strdup(params_copy, url_patterns[i].tracking_params);
token = strtok_r(params_copy, ",", &p);
while (token) {
/* Trim whitespace */
while (*token == ' ')
token++;
if (strcasecmp(token, param) == 0) {
safe_free(params_copy);
return 1; /* Strip this parameter */
}
token = strtok_r(NULL, ",", &p);
}
safe_free(params_copy);
}
}
return 0; /* Don't strip */
}
int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs) {
ConfigEntry *cep;
int errors = 0;
if (type
!= CONFIG_MAIN
|| !ce
|| !ce
->name
|| strcmp(ce
->name
, "clearurls"))
return 0;
for (cep = ce->items; cep; cep = cep->next) {
if (!cep->name)
continue;
if (!strcmp(cep
->name
, "strip-notices")) {
if (config_checkval(cep->value, CFG_YESNO) == -1) {
config_error("%s:%d: clearurls::strip-notices must be 'yes' or 'no'",
cep->file->filename, cep->line_number);
errors++;
}
continue;
}
if (!strcmp(cep
->name
, "exempt-channels")) {
if (!cep
->value
|| (strlen(cep
->value
) == 0)) {
config_error("%s:%d: clearurls::exempt-channels cannot be empty if defined",
cep->file->filename, cep->line_number);
errors++;
}
continue;
}
/* found an unknown variable inside our block */
config_warn("%s:%d: unknown directive clearurls::%s",
cep->file->filename, cep->line_number, cep->name);
}
*errs = errors;
return errors ? -1 : 1;
}
int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type) {
ConfigEntry *cep;
if (type
!= CONFIG_MAIN
|| !ce
|| !ce
->name
|| strcmp(ce
->name
, "clearurls"))
return 0;
if (conf.exempt_channels) {
safe_free(conf.exempt_channels);
}
memset(&conf
, 0, sizeof(conf
));
/* Default: Strip notices is ON */
conf.strip_notices = 1;
/* Check for clearurls { ... } block */
for(cep = ce->items; cep; cep = cep->next) {
if(!cep->name)
continue;
if(!strcmp(cep
->name
, "strip-notices")) {
/* Reads values like "yes", "no", "1", "0", "true", "false" */
conf.strip_notices = config_checkval(cep->value, CFG_YESNO);
continue;
}
if (!strcmp(cep
->name
, "exempt-channels")) {
/* Make a safe copy of the string */
safe_free(conf.exempt_channels);
safe_strdup(conf.exempt_channels, cep->value);
continue;
}
}
return 1;
}
static int is_channel_exempt(const char *chan_name) {
char *list = NULL, *p = NULL, *token = NULL;
int result = 0;
/* If no exemptions are configured, return 0 (false) */
if (!conf.exempt_channels)
return 0;
/* Create a temporary copy because strtok modifies the string */
safe_strdup(list, conf.exempt_channels);
if (!list)
return 0;
/* Iterate over comma-separated values */
for (token = strtok_r(list, ",", &p); token; token = strtok_r(NULL, ",", &p)) {
/* match_simple handles wildcards: * matches anything, ? matches one char */
if (match_simple(token, chan_name)) {
result = 1; /* Match found, channel is exempt */
break;
}
}
safe_free(list);
return result; /* No match found */
}