From aa1b1676cb6e2ef663e8357b7a4fe18a976176b1 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 5 Feb 2026 11:51:00 +0000 Subject: [PATCH 1/2] Fix SFTP server hang on WS_WANT_WRITE with non-blocking sockets When wolfSSH_SFTP_buffer_send() called wolfSSH_stream_send(), the data would be consumed into the SSH output buffer even if the underlying socket returned EWOULDBLOCK/EAGAIN. SendChannelData() returns the positive dataSz on WS_WANT_WRITE, causing the SFTP layer to advance its buffer index as if the data was sent. The SSH output buffer still had pending data that was never flushed, leading to an indefinite hang. Fix: At the start of wolfSSH_SFTP_buffer_send(), check if there's pending data in ssh->outputBuffer from a previous WS_WANT_WRITE. If so, attempt to flush it first and return WS_WANT_WRITE if the flush fails. This ensures the caller retries until all pending data is sent. Also expose WS_SFTP_BUFFER and wolfSSH_SFTP_buffer_send() as WOLFSSH_LOCAL for unit testing, and add regression test that verifies the fix catches the bug. Fixes ZD 21157 --- .github/workflows/paramiko-sftp-test.yml | 68 +++++++++++++++--- src/wolfsftp.c | 30 ++++++-- tests/regress.c | 90 ++++++++++++++++++++++++ wolfssh/wolfsftp.h | 10 +++ 4 files changed, 183 insertions(+), 15 deletions(-) diff --git a/.github/workflows/paramiko-sftp-test.yml b/.github/workflows/paramiko-sftp-test.yml index 542eba102..d27469823 100644 --- a/.github/workflows/paramiko-sftp-test.yml +++ b/.github/workflows/paramiko-sftp-test.yml @@ -132,12 +132,29 @@ jobs: import os import time import sys - + import hashlib + import signal + + # Timeout handler for detecting hangs + class TimeoutError(Exception): + pass + + def timeout_handler(signum, frame): + raise TimeoutError("SFTP operation timed out - possible hang detected!") + + def get_file_hash(filepath): + """Calculate MD5 hash of a file.""" + hash_md5 = hashlib.md5() + with open(filepath, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + def run_sftp_test(): # Create SSH client ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - + # Connect to server using password authentication with testuser print("Connecting to wolfSSHd server...") try: @@ -145,32 +162,67 @@ jobs: except Exception as e: print(f"Connection error: {e}") raise - + # Open SFTP session print("Opening SFTP session...") sftp = ssh.open_sftp() - + # Upload test print("Uploading 20MB test file...") start_time = time.time() sftp.put('/tmp/sftp_upload/test_upload.dat', '/tmp/test_upload.dat') upload_time = time.time() - start_time print(f"Upload completed in {upload_time:.2f} seconds") - + # Download test print("Downloading 20MB test file...") start_time = time.time() sftp.get('/tmp/test_upload.dat', '/tmp/sftp_download/test_download.dat') download_time = time.time() - start_time print(f"Download completed in {download_time:.2f} seconds") - + + # Stress test: Repeated GET operations with prefetch + # This tests the WS_WANT_WRITE handling during repeated transfers + # which was the original bug trigger (SFTP hang on non-blocking sockets) + print("\n=== Starting repeated GET stress test (prefetch enabled) ===") + num_iterations = 10 + timeout_seconds = 30 # Per-operation timeout to detect hangs + + for i in range(num_iterations): + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(timeout_seconds) + try: + download_path = f'/tmp/sftp_download/stress_test_{i}.dat' + start_time = time.time() + # Paramiko uses prefetch by default for get() + sftp.get('/tmp/test_upload.dat', download_path) + elapsed = time.time() - start_time + signal.alarm(0) # Cancel alarm + + # Verify integrity + orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat') + down_hash = get_file_hash(download_path) + if orig_hash != down_hash: + print(f" Iteration {i+1}: FAILED - hash mismatch!") + return False + + print(f" Iteration {i+1}/{num_iterations}: OK ({elapsed:.2f}s)") + os.remove(download_path) # Cleanup + + except TimeoutError as e: + print(f" Iteration {i+1}: FAILED - {e}") + print(" This may indicate the WS_WANT_WRITE hang bug!") + return False + + print(f"=== Stress test completed: {num_iterations} iterations OK ===\n") + # Close connections sftp.close() ssh.close() - + print("SFTP session closed") return True - + if __name__ == "__main__": try: success = run_sftp_test() diff --git a/src/wolfsftp.c b/src/wolfsftp.c index 9386d157e..8c9f2b916 100644 --- a/src/wolfsftp.c +++ b/src/wolfsftp.c @@ -145,13 +145,8 @@ enum WS_SFTP_LSTAT_STATE_ID { STATE_LSTAT_CLEANUP }; -/* This structure is to help with nonblocking and keeping track of state. +/* WS_SFTP_BUFFER is defined in wolfsftp.h for use with nonblocking state. * If adding any read/writes use the wolfSSH_SFTP_buffer_read/send functions */ -typedef struct WS_SFTP_BUFFER { - byte* data; - word32 sz; - word32 idx; -} WS_SFTP_BUFFER; typedef struct WS_SFTP_CHMOD_STATE { enum WS_SFTP_CHMOD_STATE_ID state; @@ -513,7 +508,7 @@ static void wolfSSH_SFTP_buffer_rewind(WS_SFTP_BUFFER* buffer) /* try to send the rest of the buffer (buffer.sz - buffer.idx) * increments idx with amount sent */ -static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) +WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) { int ret = WS_SUCCESS; int err; @@ -526,6 +521,19 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) return WS_BUFFER_E; } + /* Flush any pending data in SSH output buffer first. + * Handles case where previous send returned WS_WANT_WRITE + * and data is still buffered at the SSH layer. */ + if (ssh->outputBuffer.length > ssh->outputBuffer.idx) { + ret = wolfSSH_SendPacket(ssh); + if (ret == WS_WANT_WRITE) { + return ret; + } + if (ret < 0) { + return ret; + } + } + /* Call wolfSSH worker if rekeying or adjusting window size */ err = wolfSSH_get_error(ssh); if (err == WS_WINDOW_FULL || err == WS_REKEYING) { @@ -1603,6 +1611,14 @@ int wolfSSH_SFTP_read(WOLFSSH* ssh) ssh->error = WS_WANT_WRITE; return WS_FATAL_ERROR; } + /* Check if SSH layer still has pending data from WS_WANT_WRITE. + * Even if SFTP buffer is fully consumed, the data may still be + * sitting in the SSH output buffer waiting to be sent. */ + if (ssh->error == WS_WANT_WRITE || + ssh->outputBuffer.length > ssh->outputBuffer.idx) { + ssh->error = WS_WANT_WRITE; + return WS_FATAL_ERROR; + } ret = WS_SUCCESS; state->toSend = 0; } diff --git a/tests/regress.c b/tests/regress.c index 5f6bcf170..69c24b627 100644 --- a/tests/regress.c +++ b/tests/regress.c @@ -40,6 +40,9 @@ #include #include #include +#ifdef WOLFSSH_SFTP + #include +#endif #include "apps/wolfssh/common.h" #ifndef WOLFSSH_NO_ABORT @@ -482,6 +485,89 @@ static void TestWorkerReadsWhenSendWouldBlock(void) #endif +#ifdef WOLFSSH_SFTP +/* Test that wolfSSH_SFTP_buffer_send() properly handles WS_WANT_WRITE when + * SSH output buffer has pending data. This is a regression test for + * the SFTP hang issue with non-blocking sockets. + * + * The fix checks for pending data in ssh->outputBuffer at the start of + * wolfSSH_SFTP_buffer_send() and returns WS_WANT_WRITE if the flush fails. */ +static int sftpWantWriteCallCount = 0; + +static int SftpWantWriteSendCb(WOLFSSH* ssh, void* buf, word32 sz, void* ctx) +{ + (void)ssh; (void)buf; (void)ctx; + sftpWantWriteCallCount++; + /* First call returns WANT_WRITE, subsequent calls succeed */ + if (sftpWantWriteCallCount == 1) { + return WS_CBIO_ERR_WANT_WRITE; + } + return (int)sz; +} + +static int SftpDummyRecv(WOLFSSH* ssh, void* buf, word32 sz, void* ctx) +{ + (void)ssh; (void)buf; (void)sz; (void)ctx; + return WS_CBIO_ERR_WANT_READ; +} + +static void TestSftpBufferSendPendingOutput(void) +{ + WOLFSSH_CTX* ctx; + WOLFSSH* ssh; + WS_SFTP_BUFFER sftpBuf; + byte testData[16]; + int ret; + + ctx = wolfSSH_CTX_new(WOLFSSH_ENDPOINT_SERVER, NULL); + AssertNotNull(ctx); + + wolfSSH_SetIOSend(ctx, SftpWantWriteSendCb); + wolfSSH_SetIORecv(ctx, SftpDummyRecv); + + ssh = wolfSSH_new(ctx); + AssertNotNull(ssh); + + /* Set up SFTP buffer with some data to send */ + WMEMSET(testData, 0x42, sizeof(testData)); + WMEMSET(&sftpBuf, 0, sizeof(sftpBuf)); + sftpBuf.data = testData; + sftpBuf.sz = sizeof(testData); + sftpBuf.idx = 0; + + /* Simulate pending data in SSH output buffer (as if previous send + * returned WS_WANT_WRITE and data was buffered). + * Note: outputBuffer is initialized by BufferInit() with bufferSz set + * to at least STATIC_BUFFER_LEN (16 bytes), so we use a smaller value. */ + ssh->outputBuffer.length = 8; /* 8 bytes pending */ + ssh->outputBuffer.idx = 0; /* none sent yet */ + + sftpWantWriteCallCount = 0; + + /* Call wolfSSH_SFTP_buffer_send - should return WS_WANT_WRITE because + * the fix detects pending data in outputBuffer and tries to flush it, + * which fails with WS_WANT_WRITE from our callback. + * + * Before the fix, the function would ignore the pending SSH output buffer + * data and proceed to send new SFTP data, leading to a hang because the + * pending data was never flushed. */ + ret = wolfSSH_SFTP_buffer_send(ssh, &sftpBuf); + AssertIntEQ(ret, WS_WANT_WRITE); + + /* Verify the SFTP buffer was NOT consumed (idx should still be 0). + * This is critical - the SFTP layer must not advance its buffer index + * until the SSH output buffer is flushed. */ + AssertIntEQ(sftpBuf.idx, 0); + + /* Verify the SSH output buffer still has pending data */ + AssertTrue(ssh->outputBuffer.length > ssh->outputBuffer.idx); + + wolfSSH_free(ssh); + wolfSSH_CTX_free(ctx); +} +#endif /* WOLFSSH_SFTP */ + + int main(int argc, char** argv) { WOLFSSH_CTX* ctx; @@ -515,6 +601,10 @@ int main(int argc, char** argv) TestWorkerReadsWhenSendWouldBlock(); #endif +#ifdef WOLFSSH_SFTP + TestSftpBufferSendPendingOutput(); +#endif + /* TODO: add app-level regressions that simulate stdin EOF/password * prompts and mid-session socket closes once the test harness can * drive the wolfssh client without real sockets/tty. */ diff --git a/wolfssh/wolfsftp.h b/wolfssh/wolfsftp.h index f2084524e..1e8b9a40d 100644 --- a/wolfssh/wolfsftp.h +++ b/wolfssh/wolfsftp.h @@ -173,6 +173,13 @@ struct WS_SFTPNAME { #define WOLFSSH_MAX_SFTP_RECV 32768 #endif +/* SFTP buffer for nonblocking state tracking */ +typedef struct WS_SFTP_BUFFER { + byte* data; + word32 sz; + word32 idx; +} WS_SFTP_BUFFER; + /* functions for establishing a connection */ WOLFSSH_API int wolfSSH_SFTP_accept(WOLFSSH* ssh); WOLFSSH_API int wolfSSH_SFTP_connect(WOLFSSH* ssh); @@ -282,6 +289,9 @@ WOLFSSL_LOCAL int SFTP_RemoveHandleNode(WOLFSSH* ssh, byte* handle, WOLFSSH_LOCAL void wolfSSH_SFTP_ShowSizes(void); +/* SFTP buffer send - exposed for testing */ +WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer); + #ifdef __cplusplus } #endif From 67c7b9cce97a868513f6cc8952845dc34b4dd706 Mon Sep 17 00:00:00 2001 From: Andrew Hutchings Date: Thu, 12 Feb 2026 06:40:29 +0000 Subject: [PATCH 2/2] Address items in the review --- .github/workflows/paramiko-sftp-test.yml | 6 +++-- src/internal.c | 6 +++++ src/wolfsftp.c | 28 +++++++++++++++++++----- tests/regress.c | 15 ++----------- wolfssh/internal.h | 1 + wolfssh/wolfsftp.h | 13 ++++------- 6 files changed, 40 insertions(+), 29 deletions(-) diff --git a/.github/workflows/paramiko-sftp-test.yml b/.github/workflows/paramiko-sftp-test.yml index d27469823..d33ed8a8f 100644 --- a/.github/workflows/paramiko-sftp-test.yml +++ b/.github/workflows/paramiko-sftp-test.yml @@ -188,6 +188,8 @@ jobs: num_iterations = 10 timeout_seconds = 30 # Per-operation timeout to detect hangs + orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat') + for i in range(num_iterations): signal.signal(signal.SIGALRM, timeout_handler) signal.alarm(timeout_seconds) @@ -197,10 +199,8 @@ jobs: # Paramiko uses prefetch by default for get() sftp.get('/tmp/test_upload.dat', download_path) elapsed = time.time() - start_time - signal.alarm(0) # Cancel alarm # Verify integrity - orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat') down_hash = get_file_hash(download_path) if orig_hash != down_hash: print(f" Iteration {i+1}: FAILED - hash mismatch!") @@ -213,6 +213,8 @@ jobs: print(f" Iteration {i+1}: FAILED - {e}") print(" This may indicate the WS_WANT_WRITE hang bug!") return False + finally: + signal.alarm(0) # Always cancel alarm print(f"=== Stress test completed: {num_iterations} iterations OK ===\n") diff --git a/src/internal.c b/src/internal.c index 72e909f45..a9ad38e25 100644 --- a/src/internal.c +++ b/src/internal.c @@ -3462,6 +3462,12 @@ int wolfSSH_SendPacket(WOLFSSH* ssh) } +int wolfSSH_OutputPending(WOLFSSH* ssh) +{ + return (ssh != NULL && ssh->outputBuffer.length > ssh->outputBuffer.idx); +} + + static int GetInputData(WOLFSSH* ssh, word32 size) { int in; diff --git a/src/wolfsftp.c b/src/wolfsftp.c index 8c9f2b916..90b31c944 100644 --- a/src/wolfsftp.c +++ b/src/wolfsftp.c @@ -145,8 +145,13 @@ enum WS_SFTP_LSTAT_STATE_ID { STATE_LSTAT_CLEANUP }; -/* WS_SFTP_BUFFER is defined in wolfsftp.h for use with nonblocking state. +/* SFTP buffer for nonblocking state tracking. * If adding any read/writes use the wolfSSH_SFTP_buffer_read/send functions */ +typedef struct WS_SFTP_BUFFER { + byte* data; + word32 sz; + word32 idx; +} WS_SFTP_BUFFER; typedef struct WS_SFTP_CHMOD_STATE { enum WS_SFTP_CHMOD_STATE_ID state; @@ -508,7 +513,7 @@ static void wolfSSH_SFTP_buffer_rewind(WS_SFTP_BUFFER* buffer) /* try to send the rest of the buffer (buffer.sz - buffer.idx) * increments idx with amount sent */ -WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) +static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) { int ret = WS_SUCCESS; int err; @@ -524,7 +529,7 @@ WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) /* Flush any pending data in SSH output buffer first. * Handles case where previous send returned WS_WANT_WRITE * and data is still buffered at the SSH layer. */ - if (ssh->outputBuffer.length > ssh->outputBuffer.idx) { + if (wolfSSH_OutputPending(ssh)) { ret = wolfSSH_SendPacket(ssh); if (ret == WS_WANT_WRITE) { return ret; @@ -554,6 +559,20 @@ WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) } +#ifdef WOLFSSH_TEST_INTERNAL +int wolfSSH_TestSftpBufferSend(WOLFSSH* ssh, + byte* data, word32 sz, word32 idx) +{ + WS_SFTP_BUFFER buffer; + + buffer.data = data; + buffer.sz = sz; + buffer.idx = idx; + return wolfSSH_SFTP_buffer_send(ssh, &buffer); +} +#endif + + /* returns the amount read on success */ static int wolfSSH_SFTP_buffer_read(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer, int readSz) @@ -1614,8 +1633,7 @@ int wolfSSH_SFTP_read(WOLFSSH* ssh) /* Check if SSH layer still has pending data from WS_WANT_WRITE. * Even if SFTP buffer is fully consumed, the data may still be * sitting in the SSH output buffer waiting to be sent. */ - if (ssh->error == WS_WANT_WRITE || - ssh->outputBuffer.length > ssh->outputBuffer.idx) { + if (wolfSSH_OutputPending(ssh)) { ssh->error = WS_WANT_WRITE; return WS_FATAL_ERROR; } diff --git a/tests/regress.c b/tests/regress.c index 69c24b627..bc406a63b 100644 --- a/tests/regress.c +++ b/tests/regress.c @@ -515,7 +515,6 @@ static void TestSftpBufferSendPendingOutput(void) { WOLFSSH_CTX* ctx; WOLFSSH* ssh; - WS_SFTP_BUFFER sftpBuf; byte testData[16]; int ret; @@ -528,12 +527,7 @@ static void TestSftpBufferSendPendingOutput(void) ssh = wolfSSH_new(ctx); AssertNotNull(ssh); - /* Set up SFTP buffer with some data to send */ WMEMSET(testData, 0x42, sizeof(testData)); - WMEMSET(&sftpBuf, 0, sizeof(sftpBuf)); - sftpBuf.data = testData; - sftpBuf.sz = sizeof(testData); - sftpBuf.idx = 0; /* Simulate pending data in SSH output buffer (as if previous send * returned WS_WANT_WRITE and data was buffered). @@ -544,21 +538,16 @@ static void TestSftpBufferSendPendingOutput(void) sftpWantWriteCallCount = 0; - /* Call wolfSSH_SFTP_buffer_send - should return WS_WANT_WRITE because + /* Call wolfSSH_TestSftpBufferSend - should return WS_WANT_WRITE because * the fix detects pending data in outputBuffer and tries to flush it, * which fails with WS_WANT_WRITE from our callback. * * Before the fix, the function would ignore the pending SSH output buffer * data and proceed to send new SFTP data, leading to a hang because the * pending data was never flushed. */ - ret = wolfSSH_SFTP_buffer_send(ssh, &sftpBuf); + ret = wolfSSH_TestSftpBufferSend(ssh, testData, sizeof(testData), 0); AssertIntEQ(ret, WS_WANT_WRITE); - /* Verify the SFTP buffer was NOT consumed (idx should still be 0). - * This is critical - the SFTP layer must not advance its buffer index - * until the SSH output buffer is flushed. */ - AssertIntEQ(sftpBuf.idx, 0); - /* Verify the SSH output buffer still has pending data */ AssertTrue(ssh->outputBuffer.length > ssh->outputBuffer.idx); diff --git a/wolfssh/internal.h b/wolfssh/internal.h index 4af8bf6c2..dd074c3da 100644 --- a/wolfssh/internal.h +++ b/wolfssh/internal.h @@ -1072,6 +1072,7 @@ enum ChannelOpenFailReasons { WOLFSSH_LOCAL int DoReceive(WOLFSSH*); WOLFSSH_LOCAL int DoProtoId(WOLFSSH*); WOLFSSH_LOCAL int wolfSSH_SendPacket(WOLFSSH*); +WOLFSSH_LOCAL int wolfSSH_OutputPending(WOLFSSH*); WOLFSSH_LOCAL int SendProtoId(WOLFSSH*); WOLFSSH_LOCAL int SendKexInit(WOLFSSH*); WOLFSSH_LOCAL int SendKexDhInit(WOLFSSH*); diff --git a/wolfssh/wolfsftp.h b/wolfssh/wolfsftp.h index 1e8b9a40d..f6c1add09 100644 --- a/wolfssh/wolfsftp.h +++ b/wolfssh/wolfsftp.h @@ -173,13 +173,6 @@ struct WS_SFTPNAME { #define WOLFSSH_MAX_SFTP_RECV 32768 #endif -/* SFTP buffer for nonblocking state tracking */ -typedef struct WS_SFTP_BUFFER { - byte* data; - word32 sz; - word32 idx; -} WS_SFTP_BUFFER; - /* functions for establishing a connection */ WOLFSSH_API int wolfSSH_SFTP_accept(WOLFSSH* ssh); WOLFSSH_API int wolfSSH_SFTP_connect(WOLFSSH* ssh); @@ -289,8 +282,10 @@ WOLFSSL_LOCAL int SFTP_RemoveHandleNode(WOLFSSH* ssh, byte* handle, WOLFSSH_LOCAL void wolfSSH_SFTP_ShowSizes(void); -/* SFTP buffer send - exposed for testing */ -WOLFSSH_LOCAL int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer); +#ifdef WOLFSSH_TEST_INTERNAL + WOLFSSH_API int wolfSSH_TestSftpBufferSend(WOLFSSH* ssh, + byte* data, word32 sz, word32 idx); +#endif #ifdef __cplusplus }