// Source: https://docs.datadoghq.com/real_user_monitoring/guide/identify-bots-in-the-ui/
const botNavigatorPattern =
    '(googlebot/|bot|Googlebot-Mobile|Googlebot-Image|Google favicon|Mediapartners-Google|bingbot|slurp|java|wget|curl|Commons-HttpClient|Python-urllib|libwww|httpunit|nutch|phpcrawl|msnbot|jyxobot|FAST-WebCrawler|FAST Enterprise Crawler|biglotron|teoma|convera|seekbot|gigablast|exabot|ngbot|ia_archiver|GingerCrawler|webmon |httrack|webcrawler|grub.org|UsineNouvelleCrawler|antibot|netresearchserver|speedy|fluffy|bibnum.bnf|findlink|msrbot|panscient|yacybot|AISearchBot|IOI|ips-agent|tagoobot|MJ12bot|dotbot|woriobot|yanga|buzzbot|mlbot|yandexbot|purebot|Linguee Bot|Voyager|CyberPatrol|voilabot|baiduspider|citeseerxbot|spbot|twengabot|postrank|turnitinbot|scribdbot|page2rss|sitebot|linkdex|Adidxbot|blekkobot|ezooms|dotbot|Mail.RU_Bot|discobot|heritrix|findthatfile|europarchive.org|NerdByNature.Bot|sistrix crawler|ahrefsbot|Aboundex|domaincrawler|wbsearchbot|summify|ccbot|edisterbot|seznambot|ec2linkfinder|gslfbot|aihitbot|intelium_bot|facebookexternalhit|yeti|RetrevoPageAnalyzer|lb-spider|sogou|lssbot|careerbot|wotbox|wocbot|ichiro|DuckDuckBot|lssrocketcrawler|drupact|webcompanycrawler|acoonbot|openindexspider|gnam gnam spider|web-archive-net.com.bot|backlinkcrawler|coccoc|integromedb|content crawler spider|toplistbot|seokicks-robot|it2media-domain-crawler|ip-web-crawler.com|siteexplorer.info|elisabot|proximic|changedetection|blexbot|arabot|WeSEE:Search|niki-bot|CrystalSemanticsBot|rogerbot|360Spider|psbot|InterfaxScanBot|Lipperhey SEO Service|CC Metadata Scaper|g00g1e.net|GrapeshotCrawler|urlappendbot|brainobot|fr-crawler|binlar|SimpleCrawler|Livelapbot|Twitterbot|cXensebot|smtbot|bnf.fr_bot|A6-Indexer|ADmantX|Facebot|Twitterbot|OrangeBot|memorybot|AdvBot|MegaIndex|SemanticScholarBot|ltx71|nerdybot|xovibot|BUbiNG|Qwantify|archive.org_bot|Applebot|TweetmemeBot|crawler4j|findxbot|SemrushBot|yoozBot|lipperhey|y!j-asr|Domain Re-Animator Bot|AddThis)'

export const botNavigatorRegex = new RegExp(botNavigatorPattern, 'i')

const analyticsUrls = [
    'browser-intake-datadoghq.eu',
    'o350260.ingest.sentry.io',
    'analytics.google.com',
    'region1.analytics.google.com'
]

// Third party services (e.g. marketing, user support, adds)
const thirdPartyServicesUrls = [
    // Google Ads
    'stats.g.doubleclick.net',
    'td.doubleclick.net',
    // Hubspot
    'track.hubspot.com',
    'app.hubspot.com',
    'perf-na1.hsforms.com',
    'cta-service-cms2.hubspot.com',
    'forms.hubspot.com',
    'api.hubspot.com',
    'js.hs-scripts.com',
    'js.hsleadflows.net',
    'js.hs-analytics.net',
    'js.hs-banner.com',
    'js.hubspot.com',
    'js.hubspotfeedback.com',
    'js.usemessages.com',
    // Videconsultation
    'ipp01.patientus.de/hlg.tokbox.com/prod/logging',
    'hlg.tokbox.com/prod/logging',
    // Plugin vendors
    'memedcaroagcyp.dataplane.rudderstack.com',
    'api.rudderlabs.com',
    // Salesforce
    'd.la1-core1.sfdc-cehfhs.salesforceliveagent.com',
    'd.la2-c1-cdg.salesforceliveagent.com',
    'docplanner-experience.force.com/docplannersurveys',
    // Others
    'backend.getbeamer.com',
    'www.promoter.ninja/api',
    'cdn.promoter.ninja',
    'st.getsitecontrol.com',
    'widgets.getsitecontrol.com',
    'dev.visualwebsiteoptimizer.com'
]

const thirdPartyServicesUrlsRegexps = [/https?:\/\/www\.google\..*\/ads\/ga-audiences/]

// Unknown
const unknownUsageUrls = ['ipv4.icanhazip.com']

// Comet techniques (https://docs.datadoghq.com/real_user_monitoring/browser/monitoring_page_performance/#how-page-activity-is-calculated)
const cometUrls: string[] = []

// NOTE: the provided urls are not domains, as they may include part of the path
const regexpFromUrls = [...analyticsUrls, ...thirdPartyServicesUrls, ...unknownUsageUrls, ...cometUrls].map(url =>
    RegExp(`https?://${url}`)
)

export const excludedActivityUrls = [...regexpFromUrls, ...thirdPartyServicesUrlsRegexps]
