Clearurls.c

C Guest 58 Views Size: 14.85 KB Posted on: Jan 10, 26 @ 6:15 AM
  1. /*
  2. // Configuration settings
  3. clearurls {
  4.     strip-notices no; // default yes
  5.     exempt-channels "#help,#staff*,#private";
  6. }
  7. */
  8.  
  9. #include "unrealircd.h"
  10.  
  11. #define MAX_URL_LEN 450
  12. #define URL_PATTERN "https?://[^\\s<>]+"
  13.  
  14. int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs);
  15. int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type);
  16. static int is_channel_exempt(const char *chan_name);
  17. struct configstruct {
  18.     int strip_notices;
  19.     char *exempt_channels;
  20. };
  21. static struct configstruct conf;
  22.  
  23. typedef struct {
  24.     const char *name;            /* Friendly name for the pattern */
  25.     const char *base_pattern;    /* PCRE regex for URL matching */
  26.     const char *tracking_params; /* Comma-separated list of parameters to strip */
  27. } URLPattern;
  28.  
  29. static URLPattern url_patterns[] = {
  30.     {
  31.         .name = "Google",
  32.         .base_pattern = "https?://([^/]*\\.)?google\\.",
  33.         .tracking_params = "gclid,gclsrc,dclid,gbraid,wbraid,_ga",
  34.     },
  35.     {
  36.         .name = "Facebook",
  37.         .base_pattern = "https?://([^/]*\\.)?(facebook\\.com|fb\\.com|fbcdn\\.net)",
  38.         .tracking_params = "fbclid,fb_action_ids,fb_action_types,fb_source,fb_ref,action_object_map,action_type_map,action_ref_map",
  39.     },
  40.     {
  41.         .name = "Amazon",
  42.         .base_pattern = "https?://([^/]*\\.)?amazon\\.",
  43.         .tracking_params = "pd_rd_i,pd_rd_r,pd_rd_w,pd_rd_wg,pf_rd_i,pf_rd_m,pf_rd_p,pf_rd_r,pf_rd_s,pf_rd_t,psc,qid,ref_,tag",
  44.     },
  45.     {
  46.         .name = "YouTube",
  47.         .base_pattern = "https?://([^/]*\\.)?(youtube\\.com|youtu\\.be)",
  48.         .tracking_params = "feature,gclid,kw,si,pp",
  49.     },
  50.     {
  51.         .name = "Twitter/X",
  52.         .base_pattern = "https?://([^/]*\\.)?(twitter\\.com|x\\.com|t\\.co)",
  53.         .tracking_params = "s,t,cn,ref_src,ref_url,twclid",
  54.     },
  55.     {
  56.         .name = "Instagram",
  57.         .base_pattern = "https?://([^/]*\\.)?instagram\\.com",
  58.         .tracking_params = "igshid,igsh",
  59.     },
  60.     {
  61.         .name = "TikTok",
  62.         .base_pattern = "https?://([^/]*\\.)?(tiktok\\.com|vm\\.tiktok\\.com)",
  63.         .tracking_params = "is_copy_url,is_from_webapp,sender_device,sender_web_id",
  64.     },
  65.     {
  66.         .name = "LinkedIn",
  67.         .base_pattern = "https?://([^/]*\\.)?linkedin\\.com",
  68.         .tracking_params = "trk,trkInfo,trackingId,refId,originalReferer",
  69.     },
  70.     {
  71.         .name = "Reddit",
  72.         .base_pattern = "https?://([^/]*\\.)?reddit\\.com",
  73.         .tracking_params = "ref,ref_source,rdt_cid",
  74.     },
  75.     {
  76.         .name = "Spotify",
  77.         .base_pattern = "https?://([^/]*\\.)?spotify\\.com",
  78.         .tracking_params = "si,context,dl_branch,nd",
  79.     },
  80.     {
  81.         .name = "Pinterest",
  82.         .base_pattern = "https?://([^/]*\\.)?pinterest\\.",
  83.         .tracking_params = "source,campaign",
  84.     },
  85.     {
  86.         .name = "Generic Tracking",
  87.         .base_pattern = "https?://",
  88.         .tracking_params = "utm_source,utm_medium,utm_campaign,utm_term,utm_content,utm_id,utm_source_platform,utm_creative_format,utm_marketing_tactic,_hsenc,_hsmi,mc_cid,mc_eid,mkt_tok,oly_anon_id,oly_enc_id,rb_clickid,s_cid,vero_id,wickedid,yclid,msclkid",
  89.     },
  90.  
  91.     { .name = NULL }  /* marks end of list */
  92. };
  93.  
  94. ModuleHeader MOD_HEADER = {
  95.     "third/clearurls",
  96.     "0.0.1",
  97.     "Strip tracking parameters from URLs",
  98.     "roger",
  99.     "unrealircd-6",
  100. };
  101.  
  102. /* Function prototypes */
  103. static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx);
  104. static int should_strip_param(const char *url, const char *param);
  105. static char *strip_tracking_params(const char *text);
  106. static char *process_url(const char *url);
  107.  
  108. /* Compiled regex patterns */
  109. static Match **compiled_patterns = NULL;
  110. static Match *url_detect_pattern = NULL;
  111.  
  112. MOD_INIT() {
  113.     HookAdd(modinfo->handle, HOOKTYPE_CAN_SEND_TO_CHANNEL, 0, clearurls_chanmsg);
  114.     HookAdd(modinfo->handle, HOOKTYPE_CONFIGRUN, 0, queue_configrun);
  115.     return MOD_SUCCESS;
  116. }
  117.  
  118. MOD_LOAD() {
  119.     int i, count;
  120.  
  121.     /* Compile URL detection pattern */
  122.     url_detect_pattern = unreal_create_match(MATCH_PCRE_REGEX, URL_PATTERN, NULL);
  123.     if (!url_detect_pattern) {
  124.         unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL,
  125.                   "Failed to compile URL detection regex - module load aborted");
  126.         return MOD_FAILED;
  127.     }
  128.  
  129.     /* Count number of patterns */
  130.     for (count = 0; url_patterns[count].name != NULL; count++);
  131.  
  132.     /* Allocate array for compiled patterns */
  133.     compiled_patterns = safe_alloc((count + 1) * sizeof(Match *));
  134.  
  135.     /* Compile all regex patterns once during load */
  136.     for (i = 0; url_patterns[i].name != NULL; i++) {
  137.         compiled_patterns[i] = unreal_create_match(MATCH_PCRE_REGEX, url_patterns[i].base_pattern, NULL);
  138.  
  139.         if (!compiled_patterns[i]) {
  140.             unreal_log(ULOG_ERROR, "clearurls", "REGEX_COMPILE_FAILED", NULL,
  141.                       "Failed to compile regex for pattern: $pattern_name - module load aborted",
  142.                       log_data_string("pattern_name", url_patterns[i].name));
  143.  
  144.             /* Clean up any patterns that were successfully compiled */
  145.             for (int j = 0; j < i; j++) {
  146.                 if (compiled_patterns[j]) {
  147.                     unreal_delete_match(compiled_patterns[j]);
  148.                 }
  149.             }
  150.             safe_free(compiled_patterns);
  151.             compiled_patterns = NULL;
  152.  
  153.             return MOD_FAILED;
  154.         }
  155.     }
  156.     return MOD_SUCCESS;
  157. }
  158.  
  159. MOD_UNLOAD() {
  160.     int i;
  161.  
  162.     /* Free config strings */
  163.     if (conf.exempt_channels) {
  164.         safe_free(conf.exempt_channels);
  165.     }
  166.     memset(&conf, 0, sizeof(conf));
  167.  
  168.     /* Free URL detection pattern */
  169.     if (url_detect_pattern) {
  170.         unreal_delete_match(url_detect_pattern);
  171.         safe_free(url_detect_pattern);
  172.         url_detect_pattern = NULL;
  173.     }
  174.  
  175.     /* Free all compiled regex patterns */
  176.     if (compiled_patterns) {
  177.         for (i = 0; compiled_patterns[i] != NULL || url_patterns[i].name != NULL; i++) {
  178.             if (compiled_patterns[i]) {
  179.                 unreal_delete_match(compiled_patterns[i]);
  180.             }
  181.         }
  182.         safe_free(compiled_patterns);
  183.         compiled_patterns = NULL;
  184.     }
  185.     return MOD_SUCCESS;
  186. }
  187.  
  188. MOD_TEST() {
  189.     HookAdd(modinfo->handle, HOOKTYPE_CONFIGTEST, 0, queue_configtest);
  190.     return MOD_SUCCESS;
  191. }
  192.  
  193. static int clearurls_chanmsg(Client *client, Channel *channel, Membership *member, const char **text, const char **errmsg, SendType sendtype, ClientContext *clictx) {
  194.     /* Allow PRIVMSG. Allow NOTICE only if config enabled. */
  195.     if (sendtype == SEND_TYPE_NOTICE) {
  196.         if (!conf.strip_notices)
  197.             return 0;
  198.     } else if (sendtype != SEND_TYPE_PRIVMSG) {
  199.         return 0;
  200.     }
  201.  
  202.     /* Check if the channel is exempted */
  203.     if (channel && is_channel_exempt(channel->name))
  204.         return 0;
  205.  
  206.     if(!MyUser(client))
  207.         return 0;
  208.  
  209.     if (!*text || !**text || strlen(*text) <= 7)
  210.         return 0;
  211.  
  212.     /* Check if message contains a URL */
  213.     if (!unreal_match(url_detect_pattern, *text))
  214.         return 0;
  215.  
  216.     char *cleaned;
  217.     char *dup = NULL;
  218.  
  219.     // we have a url in the message, lets process it
  220.     cleaned = strip_tracking_params(*text);
  221.     if (!cleaned)
  222.         return 0;
  223.  
  224.     if (strcmp(*text, cleaned) != 0) {
  225.         /* duplicate the cleaned text since *text expects const char* */
  226.         safe_strdup(dup, cleaned);
  227.         *text = dup;
  228.     }
  229.     safe_free(cleaned);
  230.  
  231.     return 0;
  232. }
  233.  
  234. /* Extract and process URLs from text */
  235. static char *strip_tracking_params(const char *text) {
  236.     const char *p = text;
  237.     size_t len;
  238.     len = strlen(text);
  239.     char *result = safe_alloc(len + 1);
  240.     char *out = result;
  241.  
  242.     while (*p) {
  243.         /* Simple URL detection: look for http:// or https:// */
  244.         if (strncmp(p, "http://", 7) == 0 || strncmp(p, "https://", 8) == 0) {
  245.             const char *url_start = p;
  246.             const char *url_end = p;
  247.             char url_buf[512];
  248.             char *cleaned_url;
  249.             size_t url_len;
  250.  
  251.             /* Find end of URL */
  252.             while (*url_end && !strchr(" \n\r<>", *url_end)) {
  253.                 url_end++;
  254.             }
  255.  
  256.             url_len = url_end - url_start;
  257.             if (url_len >= sizeof(url_buf))
  258.                 url_len = sizeof(url_buf) - 1;
  259.  
  260.             strncpy(url_buf, url_start, url_len);
  261.             url_buf[url_len] = '\0';
  262.  
  263.             /* Process the URL */
  264.             cleaned_url = process_url(url_buf);
  265.             if (cleaned_url) {
  266.                 strcpy(out, cleaned_url);
  267.                 out += strlen(cleaned_url);
  268.                 safe_free(cleaned_url);
  269.             } else {
  270.                 strcpy(out, url_buf);
  271.                 out += strlen(url_buf);
  272.             }
  273.             /* Move past the URL */
  274.             p = url_end;
  275.         } else {
  276.             /* Regular character, just copy it */
  277.             *out++ = *p++;
  278.         }
  279.     }
  280.  
  281.     *out = '\0';
  282.     return result;
  283. }
  284.  
  285. /* Process a single URL and strip tracking parameters */
  286. static char *process_url(const char *url) {
  287.     char *result = safe_alloc(strlen(url) + 1);
  288.     const char *query_start = strchr(url, '?');
  289.     char *out = result;
  290.  
  291.     if (!query_start) {
  292.         strcpy(result, url);
  293.         return result;
  294.     }
  295.  
  296.     /* Copy URL up to the query string */
  297.     size_t base_len = query_start - url;
  298.     strncpy(result, url, base_len);
  299.     out = result + base_len;
  300.  
  301.     /* Process query parameters */
  302.     const char *p = query_start + 1;
  303.     char param_buf[128];
  304.     int first_param = 1;
  305.  
  306.     *out++ = '?';
  307.  
  308.     while (*p) {
  309.         const char *param_start = p;
  310.         const char *equal = strchr(p, '=');
  311.         const char *amp = strchr(p, '&');
  312.         const char *param_end = amp ? amp : (p + strlen(p));
  313.  
  314.         if (!equal || equal > param_end)
  315.             equal = param_end;
  316.  
  317.         size_t param_len = equal - param_start;
  318.         if (param_len >= sizeof(param_buf))
  319.             param_len = sizeof(param_buf) - 1;
  320.  
  321.         strncpy(param_buf, param_start, param_len);
  322.         param_buf[param_len] = '\0';
  323.  
  324.         /* Check if this parameter should be stripped */
  325.         if (!should_strip_param(url, param_buf)) {
  326.             /* Keep this parameter */
  327.             if (!first_param)
  328.                 *out++ = '&';
  329.  
  330.             size_t copy_len = param_end - param_start;
  331.             strncpy(out, param_start, copy_len);
  332.             out += copy_len;
  333.             first_param = 0;
  334.         }
  335.  
  336.         p = param_end;
  337.         if (*p == '&')
  338.             p++;
  339.     }
  340.  
  341.     /* Remove trailing '?' */
  342.     if (out > result && *(out - 1) == '?')
  343.         out--;
  344.  
  345.     *out = '\0';
  346.     return result;
  347. }
  348.  
  349. /* Check if a parameter should be stripped based on URL patterns */
  350. static int should_strip_param(const char *url, const char *param) {
  351.     char *p = NULL, *token = NULL, *params_copy = NULL;
  352.     int i;
  353.  
  354.     if (!compiled_patterns)
  355.         return 0;
  356.  
  357.     for (i = 0; url_patterns[i].name != NULL; i++) {
  358.         if (unreal_match(compiled_patterns[i], url)) {
  359.             /* URL matches pattern, check if parameter should be stripped */
  360.             safe_strdup(params_copy, url_patterns[i].tracking_params);
  361.             token = strtok_r(params_copy, ",", &p);
  362.  
  363.             while (token) {
  364.                 /* Trim whitespace */
  365.                 while (*token == ' ')
  366.                     token++;
  367.  
  368.                 if (strcasecmp(token, param) == 0) {
  369.                     safe_free(params_copy);
  370.                     return 1;  /* Strip this parameter */
  371.                 }
  372.                 token = strtok_r(NULL, ",", &p);
  373.             }
  374.             safe_free(params_copy);
  375.         }
  376.     }
  377.  
  378.     return 0; /* Don't strip */
  379. }
  380.  
  381. int queue_configtest(ConfigFile *cf, ConfigEntry *ce, int type, int *errs) {
  382.     ConfigEntry *cep;
  383.     int errors = 0;
  384.  
  385.     if (type != CONFIG_MAIN || !ce || !ce->name || strcmp(ce->name, "clearurls"))
  386.         return 0;
  387.  
  388.     for (cep = ce->items; cep; cep = cep->next) {
  389.         if (!cep->name)
  390.             continue;
  391.  
  392.         if (!strcmp(cep->name, "strip-notices")) {
  393.             if (config_checkval(cep->value, CFG_YESNO) == -1) {
  394.                 config_error("%s:%d: clearurls::strip-notices must be 'yes' or 'no'",
  395.                              cep->file->filename, cep->line_number);
  396.                 errors++;
  397.             }
  398.             continue;
  399.         }
  400.  
  401.         if (!strcmp(cep->name, "exempt-channels")) {
  402.             if (!cep->value || (strlen(cep->value) == 0)) {
  403.                 config_error("%s:%d: clearurls::exempt-channels cannot be empty if defined",
  404.                              cep->file->filename, cep->line_number);
  405.                 errors++;
  406.             }
  407.             continue;
  408.         }
  409.  
  410.         /*  found an unknown variable inside our block */
  411.         config_warn("%s:%d: unknown directive clearurls::%s",
  412.                     cep->file->filename, cep->line_number, cep->name);
  413.     }
  414.  
  415.     *errs = errors;
  416.     return errors ? -1 : 1;
  417. }
  418.  
  419. int queue_configrun(ConfigFile *cf, ConfigEntry *ce, int type) {
  420.     ConfigEntry *cep;
  421.  
  422.     if (type != CONFIG_MAIN || !ce || !ce->name || strcmp(ce->name, "clearurls"))
  423.         return 0;
  424.  
  425.     if (conf.exempt_channels) {
  426.         safe_free(conf.exempt_channels);
  427.     }
  428.     memset(&conf, 0, sizeof(conf));
  429.  
  430.     /* Default: Strip notices is ON */
  431.     conf.strip_notices = 1;
  432.  
  433.     /* Check for clearurls { ... } block */
  434.     for(cep = ce->items; cep; cep = cep->next) {
  435.         if(!cep->name)
  436.             continue;
  437.  
  438.         if(!strcmp(cep->name, "strip-notices")) {
  439.             /* Reads values like "yes", "no", "1", "0", "true", "false" */
  440.             conf.strip_notices = config_checkval(cep->value, CFG_YESNO);
  441.             continue;
  442.         }
  443.         if (!strcmp(cep->name, "exempt-channels")) {
  444.             /* Make a safe copy of the string */
  445.             safe_free(conf.exempt_channels);
  446.             safe_strdup(conf.exempt_channels, cep->value);
  447.             continue;
  448.         }
  449.     }
  450.     return 1;
  451. }
  452.  
  453. static int is_channel_exempt(const char *chan_name) {
  454.     char *list = NULL, *p = NULL, *token = NULL;
  455.     int result = 0;
  456.  
  457.     /* If no exemptions are configured, return 0 (false) */
  458.     if (!conf.exempt_channels)
  459.         return 0;
  460.  
  461.     /* Create a temporary copy because strtok modifies the string */
  462.     safe_strdup(list, conf.exempt_channels);
  463.     if (!list)
  464.         return 0;
  465.  
  466.     /* Iterate over comma-separated values */
  467.     for (token = strtok_r(list, ",", &p); token; token = strtok_r(NULL, ",", &p)) {
  468.         /* match_simple handles wildcards: * matches anything, ? matches one char */
  469.         if (match_simple(token, chan_name)) {
  470.             result = 1; /* Match found, channel is exempt */
  471.             break;
  472.         }
  473.     }
  474.  
  475.     safe_free(list);
  476.     return result; /* No match found */
  477. }

Raw Paste

Comments 0
Login to post a comment.
  • No comments yet. Be the first.
Login to post a comment. Login or Register
We use cookies. To comply with GDPR in the EU and the UK we have to show you these.

We use cookies and similar technologies to keep this website functional (including spam protection via Google reCAPTCHA or Cloudflare Turnstile), and — with your consent — to measure usage and show ads. See Privacy.