diff --git a/CHANGELOG.md b/CHANGELOG.md index 59f2931125..dcfe326e3d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,9 @@ ## DMPTool Releases +### v5.5.9 +- Updated config/robots.txt + ### v5.58 - Added custom override for `idsc.miami.edu` and `miami.edu` diff --git a/config/robots.txt b/config/robots.txt index 16a87f98c2..df6326adc3 100644 --- a/config/robots.txt +++ b/config/robots.txt @@ -1,9 +1,27 @@ -# See http://www.robotstxt.org/wc/norobots.html for documentation on how to use the robots.txt file +# See https://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file # -# To ban all spiders from the entire site uncomment the next two lines: -# User-Agent: * -# Disallow: / +# This file is copied into place during the Capistrano deploy process. # Prevent spiders from downloading PDF files. User-agent: * -Disallow: /pdf/ +Crawl-delay: 5 +Disallow: /rails/active_storage/ +Disallow: /narratives/*.pdf + +# Explicitly name certain bots to ensure they are blocked from downloading PDF files. +User-agent: Amazonbot +User-agent: Amzn-SearchBot +User-agent: AmazonProductDiscoverybot +User-agent: Bytespider +User-agent: GPTBot +User-agent: OAI-SearchBot +User-agent: ClaudeBot +User-agent: CCBot +User-agent: Googlebot +User-agent: Meta-ExternalAgent +User-Agent: PetalBot +User-agent: Turnitin +User-agent: TurnitinBot +Crawl-delay: 10 +Disallow: /rails/active_storage/ +Disallow: /narratives/*.pdf diff --git a/public/robots.txt b/public/robots.txt deleted file mode 100644 index 60cf8dfe18..0000000000 --- a/public/robots.txt +++ /dev/null @@ -1,29 +0,0 @@ -# See https://www.robotstxt.org/robotstxt.html for documentation on how to use the robots.txt file -# -# To ban all spiders from the entire site uncomment the next two lines: -# User-Agent: * -# Disallow: / - -# Prevent spiders from downloading PDF files. - -User-agent: * -Crawl-delay: 5 -Disallow: /pdf/ -Disallow: /rails/active_storage/ - -User-agent: Amazonbot -User-agent: Amzn-SearchBot -User-agent: AmazonProductDiscoverybot -User-agent: Bytespider -User-agent: GPTBot -User-agent: OAI-SearchBot -User-agent: ClaudeBot -User-agent: CCBot -User-agent: Googlebot -User-agent: Meta-ExternalAgent -User-agent: Turnitin -User-agent: TurnitinBot -Crawl-delay: 10 -Disallow: /pdf/ -Disallow: /rails/active_storage/ -