diff --git a/assets/js/phoenix/socket.js b/assets/js/phoenix/socket.js index 204a5626a9..8aa4773921 100644 --- a/assets/js/phoenix/socket.js +++ b/assets/js/phoenix/socket.js @@ -36,6 +36,11 @@ import Timer from "./timer" * @param {number} [opts.longPollFallbackMs] - The millisecond time to attempt the primary transport * before falling back to the LongPoll transport. Disabled by default. * + * Clients without WebSocket support fall back to LongPoll, and this includes search + * engine crawlers, whose renderers do not open WebSockets. They then repeatedly request + * the LongPoll endpoint (e.g. `/live/longpoll` for LiveView), which serves no indexable + * content. Disallow that path in `robots.txt` to avoid spending crawl budget on it. + * * @param {boolean} [opts.debug] - When true, enables debug logging. Default false. * * @param {Function} [opts.encode] - The function to encode outgoing messages. diff --git a/installer/templates/phx_static/robots.txt b/installer/templates/phx_static/robots.txt index 26e06b5f19..eb674ddd65 100644 --- a/installer/templates/phx_static/robots.txt +++ b/installer/templates/phx_static/robots.txt @@ -3,3 +3,9 @@ # To ban all spiders from the entire site uncomment the next two lines: # User-agent: * # Disallow: / + +# /live/longpoll is LiveView's fallback transport for clients that can't open a +# WebSocket, which includes search engine crawlers. It carries no indexable +# content, so disallow it to keep crawlers from spending crawl budget there. +User-agent: * +Disallow: /live/longpoll diff --git a/installer/test/phx_new_test.exs b/installer/test/phx_new_test.exs index 4314855c29..57bcfaa677 100644 --- a/installer/test/phx_new_test.exs +++ b/installer/test/phx_new_test.exs @@ -141,6 +141,7 @@ defmodule Mix.Tasks.Phx.NewTest do # assets assert_file("phx_blog/priv/static/images/logo.svg") + assert_file("phx_blog/priv/static/robots.txt", ~r"Disallow: /live/longpoll") assert_file("phx_blog/.gitignore", fn file -> assert file =~ "/priv/static/assets/"