User-agent: * Disallow: # Group 1: Major search engines – allow everything except geojson files User-agent: Googlebot User-agent: Googlebot-Image User-agent: Googlebot-Video User-agent: Googlebot-News User-agent: Google-Extended User-agent: Bingbot User-agent: MSNBot User-agent: DuckDuckBot User-agent: Qwantify User-agent: Applebot User-agent: Applebot-Extended User-agent: YandexBot User-agent: YandexImages Disallow: /geodata_postcode/ Disallow: /geodata_gebieden/ Disallow: /geo_onderwijs/ Allow: / # Group 2: Generic bots – allow everything except geojson, images and maps and urls of interactive maps with parameters User-agent: * Crawl-delay: 5 Disallow: /geodata_postcode/ Disallow: /geodata_gebieden/ Disallow: /geo_onderwijs/ Disallow: /afbeeldingen/ Disallow: /images/ Disallow: /kaarten/ Disallow: /maps/ Disallow: /kaart? Disallow: /postcodekaart? Disallow: /onderwijskaart? Disallow: /kinderopvangkaart? # Block non-existent or CMS-related paths (preventive) Disallow: /api/ Disallow: /admin/ Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /wp-content/ Disallow: /xmlrpc.php Disallow: /cgi-bin/ Disallow: /download/ Disallow: /search Disallow: /feed Disallow: /login Allow: / # Group 3: AI and data crawlers – restricted access User-agent: AI2Bot User-agent: AhrefsBot User-agent: Amazonbot User-agent: Anthropic-AI User-agent: Arquivo-web-crawler User-agent: Archive.org_bot User-agent: Baiduspider User-agent: Baiduspider-image User-agent: BLEXBot User-agent: Bytespider User-agent: CCBot User-agent: ChatGPT-User User-agent: Claude-Web User-agent: ClaudeBot User-agent: Cohere-AI User-agent: Cohere-Training-Data-Crawler User-agent: CognitiveSEO Bot User-agent: DataForSEO User-agent: Diffbot User-agent: DotBot User-agent: DuckAssistBot User-agent: Exabot User-agent: FacebookBot User-agent: FriendlyCrawler User-agent: GPTBot User-agent: GPTBot-Image User-agent: GPTBot-Video User-agent: GPTCrawler User-agent: Google-CloudVertexBot User-agent: IA_Archiver User-agent: IA_Archiver-Web.Archive.Org User-agent: ImagesiftBot User-agent: Img2Dataset User-agent: Kangaroo-LLM User-agent: LinkpadBot User-agent: Lipperhey User-agent: LRTBot User-agent: Magpie-Crawler User-agent: Meta-ExternalAgent User-agent: Meta-ExternalFetcher User-agent: MJ12bot User-agent: NetpeakSpiderBot User-agent: Nibbler User-agent: OAI-SearchBot User-agent: OnCrawl User-agent: OpenAI-SearchBot User-agent: Omgili User-agent: OmgiliBot User-agent: Operator User-agent: PanguBot User-agent: Peer39_Crawler User-agent: Perplexity-User User-agent: PerplexityBot User-agent: PetalBot User-agent: Proximic User-agent: ResearchBot User-agent: rogerbot User-agent: SEOkicks-Robot User-agent: SemrushBot User-agent: SemrushBot-OCOB User-agent: SemrushBot-SWA User-agent: SilktideBot User-agent: spbot User-agent: Timpibot User-agent: Webzio-Extended User-agent: YouBot User-agent: YandexAdditional User-agent: YandexAdditionalBot Crawl-Delay: 2 Disallow: /geodata_postcode/ Disallow: /geodata_gebieden/ Disallow: /geo_onderwijs/ Disallow: /afbeeldingen/ Disallow: /images/ Disallow: /kaarten/ Disallow: /maps/ Disallow: /postcode/ Disallow: /kaart? Disallow: /postcodekaart? Disallow: /onderwijskaart? Disallow: /kinderopvangkaart? # Block non-existent or CMS-related paths (preventive) Disallow: /api/ Disallow: /admin/ Disallow: /wp-admin/ Disallow: /wp-login.php Disallow: /wp-content/ Disallow: /xmlrpc.php Disallow: /cgi-bin/ Disallow: /download/ Disallow: /search Disallow: /feed Disallow: /login Allow: / # Group 4: Total scraping protection – block completely User-agent: Sitecheck.InternetSeer.com User-agent: Zealbot User-agent: MSIECrawler User-agent: SiteSnagger User-agent: WebStripper User-agent: WebCopier User-agent: Fetch User-agent: Offline Explorer User-agent: Teleport User-agent: TeleportPro User-agent: WebZIP User-agent: Linko User-agent: HTTrack User-agent: Microsoft.URL.Control User-agent: Xenu User-agent: Larbin User-agent: Libwww User-agent: ZyBORG User-agent: Download Ninja User-agent: Brightbot 1.0 User-agent: SiteSucker User-agent: Python-Requests User-agent: curl User-agent: wget User-agent: Go-http-client User-agent: Java User-agent: Node-fetch User-agent: python-httpx User-agent: aiohttp User-agent: okhttp User-agent: scrapy User-agent: mechanize User-agent: feedfetcher-google User-agent: serpstatbot User-agent: trendictionbot Disallow: / # Sitemap Sitemap: https://allecijfers.nl/sitemap.xml