# Robots.txt for QuickAnalytics - Data Analytics Solutions UAE # This file provides directives for search engine crawlers # Learn more: https://developers.google.com/search/docs/crawling-indexing/robots/robots_txt # Default rule for all web crawlers User-agent: * Allow: / # Sitemap location - helps search engines discover all pages Sitemap: https://quickanalytics.ae/sitemap.xml Sitemap: https://quickanalytics.ae/sitemap-images.xml Sitemap: https://quickanalytics.ae/sitemap-news.xml # Crawl delay to be respectful to server resources (1 second) Crawl-delay: 1 # Disallow private and administrative areas Disallow: /admin/ Disallow: /api/ Disallow: /crm/ Disallow: /_* Disallow: /*.json$ Disallow: /404 Disallow: /500 Disallow: /auth/ Disallow: /callback Disallow: /login Disallow: /logout # Disallow URL parameters that create duplicate content Disallow: /*?*utm_source=* Disallow: /*?*utm_medium=* Disallow: /*?*utm_campaign=* Disallow: /*?*utm_content=* Disallow: /*?*utm_term=* Disallow: /*?*ref=* Disallow: /*?*source=* Disallow: /*?*fbclid=* Disallow: /*?*gclid=* # Explicitly allow important public directories Allow: /services/ Allow: /industries/ Allow: /resources/ Allow: /portfolio/ Allow: /about Allow: /contact Allow: /privacy Allow: /terms Allow: /cookies Allow: /sitemap # Allow CSS, JS, and media files for proper rendering Allow: /*.css$ Allow: /*.js$ Allow: /*.png$ Allow: /*.jpg$ Allow: /*.jpeg$ Allow: /*.gif$ Allow: /*.webp$ Allow: /*.svg$ Allow: /*.ico$ Allow: /*.pdf$ # Specific directives for major search engines User-agent: Googlebot Allow: / Crawl-delay: 1 User-agent: Bingbot Allow: / Crawl-delay: 2 User-agent: Slurp Allow: / Crawl-delay: 2 User-agent: DuckDuckBot Allow: / Crawl-delay: 1 User-agent: Baiduspider Allow: / Crawl-delay: 3 User-agent: YandexBot Allow: / Crawl-delay: 2 # Social media crawlers for link previews User-agent: facebookexternalhit Allow: / User-agent: Twitterbot Allow: / User-agent: LinkedInBot Allow: / User-agent: WhatsApp Allow: / # SEO and analysis tools User-agent: SemrushBot Allow: / Crawl-delay: 5 User-agent: AhrefsBot Allow: / Crawl-delay: 10 User-agent: MJ12bot Allow: / Crawl-delay: 10 # Block aggressive or unwanted crawlers User-agent: SemrushBot-SA Disallow: / User-agent: MegaIndex Disallow: / User-agent: DotBot Disallow: / User-agent: CCBot Disallow: / User-agent: ChatGPT-User Disallow: / User-agent: GPTBot Disallow: / User-agent: Google-Extended Disallow: / # Archive crawlers User-agent: ia_archiver Allow: / User-agent: archive.org_bot Allow: / # Additional notes: # - This robots.txt follows best practices for SEO and crawler management # - All important content pages are explicitly allowed for search engine indexing # - Private areas and duplicate content are properly blocked # - Crawl delays are set to balance SEO discovery with server performance # - Major search engines and social media crawlers are accommodated # - AI training bots are blocked to protect content