From a07699c403d9d8acfb463261748b2e298c650fa8 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Sat, 6 Jun 2026 22:57:03 +0700 Subject: [PATCH 1/2] Disallow /live/longpoll in generated robots.txt Clients that can't open a WebSocket fall back to the LongPoll transport, and search engine crawlers are in that group: their renderers don't open WebSockets. With longPollFallbackMs set (the default in generated apps), they fall back and repeatedly fetch /live/longpoll while rendering each page. That endpoint serves no indexable content, so the requests are wasted crawl budget. Add the rule to the scaffolded robots.txt and assert it in the installer test so a future template edit can't silently drop it. --- installer/templates/phx_static/robots.txt | 6 ++++++ installer/test/phx_new_test.exs | 1 + 2 files changed, 7 insertions(+) diff --git a/installer/templates/phx_static/robots.txt b/installer/templates/phx_static/robots.txt index 26e06b5f19..eb674ddd65 100644 --- a/installer/templates/phx_static/robots.txt +++ b/installer/templates/phx_static/robots.txt @@ -3,3 +3,9 @@ # To ban all spiders from the entire site uncomment the next two lines: # User-agent: * # Disallow: / + +# /live/longpoll is LiveView's fallback transport for clients that can't open a +# WebSocket, which includes search engine crawlers. It carries no indexable +# content, so disallow it to keep crawlers from spending crawl budget there. +User-agent: * +Disallow: /live/longpoll diff --git a/installer/test/phx_new_test.exs b/installer/test/phx_new_test.exs index 4314855c29..57bcfaa677 100644 --- a/installer/test/phx_new_test.exs +++ b/installer/test/phx_new_test.exs @@ -141,6 +141,7 @@ defmodule Mix.Tasks.Phx.NewTest do # assets assert_file("phx_blog/priv/static/images/logo.svg") + assert_file("phx_blog/priv/static/robots.txt", ~r"Disallow: /live/longpoll") assert_file("phx_blog/.gitignore", fn file -> assert file =~ "/priv/static/assets/" From 72e9fcd348838935e6a899d0b9bdc8c461128ad2 Mon Sep 17 00:00:00 2001 From: Patrick Olsen Date: Sat, 6 Jun 2026 22:57:03 +0700 Subject: [PATCH 2/2] Document the crawler LongPoll fallback on longPollFallbackMs Clients without WebSocket support fall back to LongPoll, and search engine crawlers are the common case: their renderers don't open WebSockets and repeatedly request the LongPoll endpoint (/live/longpoll for LiveView), which serves no indexable content. Note this next to the option that enables the fallback so the robots.txt advice has a home. --- assets/js/phoenix/socket.js | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/assets/js/phoenix/socket.js b/assets/js/phoenix/socket.js index 204a5626a9..8aa4773921 100644 --- a/assets/js/phoenix/socket.js +++ b/assets/js/phoenix/socket.js @@ -36,6 +36,11 @@ import Timer from "./timer" * @param {number} [opts.longPollFallbackMs] - The millisecond time to attempt the primary transport * before falling back to the LongPoll transport. Disabled by default. * + * Clients without WebSocket support fall back to LongPoll, and this includes search + * engine crawlers, whose renderers do not open WebSockets. They then repeatedly request + * the LongPoll endpoint (e.g. `/live/longpoll` for LiveView), which serves no indexable + * content. Disallow that path in `robots.txt` to avoid spending crawl budget on it. + * * @param {boolean} [opts.debug] - When true, enables debug logging. Default false. * * @param {Function} [opts.encode] - The function to encode outgoing messages.