fix(esp_http_server): prevent silent message drop in httpd_queue_work

Closes https://github.com/espressif/esp-idf/issues/18563
This commit is contained in:
Ashish Sharma
2026-05-14 17:16:28 +08:00
parent 17a9a5cf21
commit c911c781ae
4 changed files with 116 additions and 36 deletions

View File

@@ -53,9 +53,18 @@ menu "HTTP Server"
config HTTPD_QUEUE_WORK_BLOCKING
bool "httpd_queue_work as blocking API"
help
This makes httpd_queue_work() API to wait until a message space is available on UDP control socket.
It internally uses a counting semaphore with count set to `LWIP_UDP_RECVMBOX_SIZE` to achieve this.
This config will slightly change API behavior to block until message gets delivered on control socket.
Selects the wait policy for httpd_queue_work() when the UDP control-socket
mbox is at capacity. The HTTP server always uses a counting semaphore
(sized to LWIP_UDP_RECVMBOX_SIZE) to prevent silent mbox overflow drops
that would leak the caller's async-send callback context.
When disabled (default): httpd_queue_work() returns ESP_FAIL immediately
if the mbox is full, so the caller can free its callback context. This
preserves the non-blocking semantics of httpd_ws_send_data_async().
When enabled: httpd_queue_work() blocks until a slot is available. Use
this only if your application can tolerate the call blocking and wants
guaranteed delivery instead of a fast-fail.
config HTTPD_ENABLE_EVENTS
bool "Enable ESP_HTTP_SERVER_EVENT"

View File

@@ -17,6 +17,7 @@
#include <esp_http_server.h>
#include "osal.h"
#include "freertos/semphr.h"
#include "sdkconfig.h"
#ifdef __cplusplus
@@ -130,9 +131,7 @@ struct httpd_data {
httpd_config_t config; /*!< HTTPD server configuration */
int listen_fd; /*!< Server listener FD */
int ctrl_fd; /*!< Ctrl message receiver FD */
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
SemaphoreHandle_t ctrl_sock_semaphore; /*!< Ctrl socket semaphore */
#endif
SemaphoreHandle_t ctrl_sock_semaphore; /*!< Ctrl mbox slot reservation (sized to LWIP_UDP_RECVMBOX_SIZE) */
int msg_fd; /*!< Ctrl message sender FD */
struct thread_data hd_td; /*!< Information for the HTTPD thread */
struct sock_db *hd_sd; /*!< The socket database */

View File

@@ -154,24 +154,27 @@ esp_err_t httpd_queue_work(httpd_handle_t handle, httpd_work_fn_t work, void *ar
.hc_work = work,
.hc_work_arg = arg,
};
/* Reserve a slot in the control mbox before sending. In blocking mode
* the caller waits for a slot; in the default non-blocking mode we
* fail fast so the caller knows the work was not queued. */
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
// Semaphore is acquired here and released after work function is executed.
if (xSemaphoreTake(hd->ctrl_sock_semaphore, portMAX_DELAY) == pdTRUE) {
const TickType_t wait = portMAX_DELAY;
#else
const TickType_t wait = 0;
#endif
int ret = cs_send_to_ctrl_sock(hd->msg_fd, hd->config.ctrl_port, &msg, sizeof(msg));
if (ret < 0) {
ESP_LOGW(TAG, LOG_FMT("failed to queue work"));
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
xSemaphoreGive(hd->ctrl_sock_semaphore);
#endif
return ESP_FAIL;
}
return ESP_OK;
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
if (xSemaphoreTake(hd->ctrl_sock_semaphore, wait) != pdTRUE) {
ESP_LOGW(TAG, LOG_FMT("ctrl socket queue full, work not queued"));
return ESP_FAIL;
}
ESP_LOGE(TAG, "Unable to acquire semaphore");
return ESP_FAIL;
#endif
int ret = cs_send_to_ctrl_sock(hd->msg_fd, hd->config.ctrl_port, &msg, sizeof(msg));
if (ret < 0) {
ESP_LOGW(TAG, LOG_FMT("failed to queue work"));
xSemaphoreGive(hd->ctrl_sock_semaphore);
return ESP_FAIL;
}
return ESP_OK;
}
esp_err_t httpd_get_client_list(httpd_handle_t handle, size_t *fds, int *client_fds)
@@ -211,16 +214,16 @@ static void httpd_process_ctrl_msg(struct httpd_data *hd)
int ret = recv(hd->ctrl_fd, &msg, sizeof(msg), 0);
if (ret <= 0) {
ESP_LOGW(TAG, LOG_FMT("error in recv (%d)"), errno);
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
/* Returning the mbox slot to the producer side. xSemaphoreGive is a
* no-op once the counting semaphore is at its max, which keeps the
* accounting safe under producers that don't take (e.g. async
* handler wakeups, shutdown). */
xSemaphoreGive(hd->ctrl_sock_semaphore);
#endif
return;
}
if (ret != sizeof(msg)) {
ESP_LOGW(TAG, LOG_FMT("incomplete msg"));
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
xSemaphoreGive(hd->ctrl_sock_semaphore);
#endif
return;
}
@@ -238,9 +241,7 @@ static void httpd_process_ctrl_msg(struct httpd_data *hd)
default:
break;
}
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
xSemaphoreGive(hd->ctrl_sock_semaphore);
#endif
}
// Called for each session from httpd_server
@@ -493,6 +494,10 @@ static void httpd_delete(struct httpd_data *hd)
free(hd->err_handler_fns);
free(ra->resp_hdrs);
free(hd->hd_sd);
if (hd->ctrl_sock_semaphore) {
vSemaphoreDelete(hd->ctrl_sock_semaphore);
hd->ctrl_sock_semaphore = NULL;
}
/* Free registered URI handlers */
httpd_unregister_all_uri_handlers(hd);
@@ -528,18 +533,18 @@ esp_err_t httpd_start(httpd_handle_t *handle, const httpd_config_t *config)
/* Failed to allocate memory */
return ESP_ERR_HTTPD_ALLOC_MEM;
}
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
/* Using a Counting Semaphore with count equals CONFIG_LWIP_UDP_RECVMBOX_SIZE
* as the number of UDP messages which can be stored is equal to UDP mailbox size.
* Using this, we can make sure that the work function is always received by the ctrl socket.
*/
/* Counting semaphore sized to the UDP control-socket recv mbox. Each
* httpd_queue_work() take reserves one mbox slot; httpd_process_ctrl_msg()
* gives one back per drain. This bounds the producer to the mbox capacity
* and prevents silent lwIP-mbox overflow drops that would otherwise leak
* the caller's async-send context. Always created so the default
* (non-blocking) httpd_queue_work() path can also rely on it. */
hd->ctrl_sock_semaphore = xSemaphoreCreateCounting(CONFIG_LWIP_UDP_RECVMBOX_SIZE, CONFIG_LWIP_UDP_RECVMBOX_SIZE);
if (hd->ctrl_sock_semaphore == NULL) {
ESP_LOGE(TAG, "Failed to create Semaphore");
httpd_delete(hd);
return ESP_ERR_HTTPD_ALLOC_MEM;
}
#endif
if (httpd_server_init(hd) != ESP_OK) {
httpd_delete(hd);
@@ -553,6 +558,12 @@ esp_err_t httpd_start(httpd_handle_t *handle, const httpd_config_t *config)
httpd_thread, hd,
hd->config.core_id,
hd->config.task_caps) != ESP_OK) {
/* Close the open socket */
close(hd->listen_fd);
/* Close the control socket */
cs_free_ctrl_sock(hd->ctrl_fd);
/* Close the message socket */
close(hd->msg_fd);
/* Failed to launch task */
httpd_delete(hd);
return ESP_ERR_HTTPD_TASK;
@@ -607,9 +618,6 @@ esp_err_t httpd_stop(httpd_handle_t handle)
}
ESP_LOGD(TAG, LOG_FMT("server stopped"));
#if CONFIG_HTTPD_QUEUE_WORK_BLOCKING
vSemaphoreDelete(hd->ctrl_sock_semaphore);
#endif
httpd_delete(hd);
esp_http_server_dispatch_event(HTTP_SERVER_EVENT_STOP, NULL, 0);
return ESP_OK;

View File

@@ -11,6 +11,8 @@
#include <esp_heap_caps.h>
#include <net/if.h>
#include "esp_log.h"
#include "freertos/FreeRTOS.h"
#include "freertos/semphr.h"
#ifdef CONFIG_HTTPD_WS_SUPPORT
#include "../../src/esp_httpd_priv.h"
@@ -425,6 +427,68 @@ TEST_CASE("httpd_resp_set_type rejects CRLF in content type", "[HTTP SERVER][sec
httpd_resp_set_type(&fake_req, "text/html\nX-Injected: pwned"));
}
/* ---- httpd_queue_work backpressure ---- */
static SemaphoreHandle_t s_qw_gate;
static volatile int s_qw_work_runs;
static void qw_blocking_work(void *arg)
{
/* Hold the httpd thread inside this work fn so the ctrl-socket mbox
* stops draining. Auto-release after 2 s as a safety net in case the
* test asserts mid-way and never reaches the explicit give. */
xSemaphoreTake((SemaphoreHandle_t)arg, pdMS_TO_TICKS(2000));
}
static void qw_counting_work(void *arg)
{
(void)arg;
s_qw_work_runs++;
}
TEST_CASE("httpd_queue_work fast-fails on ctrl mbox saturation", "[HTTP SERVER]")
{
test_case_uses_tcpip();
httpd_handle_t hd = NULL;
httpd_config_t config = HTTPD_DEFAULT_CONFIG();
TEST_ASSERT_EQUAL(ESP_OK, httpd_start(&hd, &config));
s_qw_gate = xSemaphoreCreateBinary();
TEST_ASSERT_NOT_NULL(s_qw_gate);
s_qw_work_runs = 0;
/* Park the httpd thread in a blocked work item so the mbox can fill. */
TEST_ASSERT_EQUAL(ESP_OK, httpd_queue_work(hd, qw_blocking_work, s_qw_gate));
vTaskDelay(pdMS_TO_TICKS(100));
/* Spam queue_work past the mbox cap; first ones succeed, rest must
* return ESP_FAIL synchronously (default non-blocking behavior). */
int ok_count = 0;
int fail_count = 0;
for (int i = 0; i < CONFIG_LWIP_UDP_RECVMBOX_SIZE * 2 + 4; i++) {
esp_err_t err = httpd_queue_work(hd, qw_counting_work, NULL);
if (err == ESP_OK) {
ok_count++;
} else {
TEST_ASSERT_EQUAL(ESP_FAIL, err);
fail_count++;
}
}
TEST_ASSERT_GREATER_THAN(0, ok_count);
TEST_ASSERT_LESS_OR_EQUAL(CONFIG_LWIP_UDP_RECVMBOX_SIZE, ok_count);
TEST_ASSERT_GREATER_THAN(0, fail_count);
/* Release the parked work; every accepted item must now actually run. */
xSemaphoreGive(s_qw_gate);
vTaskDelay(pdMS_TO_TICKS(300));
TEST_ASSERT_EQUAL(ok_count, s_qw_work_runs);
vSemaphoreDelete(s_qw_gate);
s_qw_gate = NULL;
TEST_ASSERT_EQUAL(ESP_OK, httpd_stop(hd));
}
#ifdef CONFIG_HTTPD_WS_SUPPORT
TEST_CASE("WS recv failure marks close without dispatching handler", "[HTTP SERVER][websocket]")
{