# robots.txt — Combat.ma # https://www.combat.ma/robots.txt # Updated: 2025-04-30 # Allow all well-behaved crawlers User-agent: * Allow: / # Block admin, cart, checkout, and internal search from indexing Disallow: /admin/ Disallow: /panier/ Disallow: /checkout/ Disallow: /compte/ Disallow: /recherche? Disallow: /api/ Disallow: /*.json$ Disallow: /cdn-cgi/ # Allow Google to crawl images for image search User-agent: Googlebot-Image Allow: /images/ # Allow Google AdsBot User-agent: AdsBot-Google Allow: / # Block common scrapers and bad bots User-agent: SemrushBot Crawl-delay: 10 User-agent: AhrefsBot Crawl-delay: 10 User-agent: MJ12bot Disallow: / User-agent: DotBot Disallow: / # Sitemap location Sitemap: https://www.combat.ma/sitemap.xml # Crawl delay for all other bots (be kind to the server) Crawl-delay: 2