diff --git a/.github/workflows/paramiko-sftp-test.yml b/.github/workflows/paramiko-sftp-test.yml index 542eba102..d33ed8a8f 100644 --- a/.github/workflows/paramiko-sftp-test.yml +++ b/.github/workflows/paramiko-sftp-test.yml @@ -132,12 +132,29 @@ jobs: import os import time import sys - + import hashlib + import signal + + # Timeout handler for detecting hangs + class TimeoutError(Exception): + pass + + def timeout_handler(signum, frame): + raise TimeoutError("SFTP operation timed out - possible hang detected!") + + def get_file_hash(filepath): + """Calculate MD5 hash of a file.""" + hash_md5 = hashlib.md5() + with open(filepath, "rb") as f: + for chunk in iter(lambda: f.read(8192), b""): + hash_md5.update(chunk) + return hash_md5.hexdigest() + def run_sftp_test(): # Create SSH client ssh = paramiko.SSHClient() ssh.set_missing_host_key_policy(paramiko.AutoAddPolicy()) - + # Connect to server using password authentication with testuser print("Connecting to wolfSSHd server...") try: @@ -145,32 +162,69 @@ jobs: except Exception as e: print(f"Connection error: {e}") raise - + # Open SFTP session print("Opening SFTP session...") sftp = ssh.open_sftp() - + # Upload test print("Uploading 20MB test file...") start_time = time.time() sftp.put('/tmp/sftp_upload/test_upload.dat', '/tmp/test_upload.dat') upload_time = time.time() - start_time print(f"Upload completed in {upload_time:.2f} seconds") - + # Download test print("Downloading 20MB test file...") start_time = time.time() sftp.get('/tmp/test_upload.dat', '/tmp/sftp_download/test_download.dat') download_time = time.time() - start_time print(f"Download completed in {download_time:.2f} seconds") - + + # Stress test: Repeated GET operations with prefetch + # This tests the WS_WANT_WRITE handling during repeated transfers + # which was the original bug trigger (SFTP hang on non-blocking sockets) + print("\n=== Starting repeated GET stress test (prefetch enabled) ===") + num_iterations = 10 + timeout_seconds = 30 # Per-operation timeout to detect hangs + + orig_hash = get_file_hash('/tmp/sftp_upload/test_upload.dat') + + for i in range(num_iterations): + signal.signal(signal.SIGALRM, timeout_handler) + signal.alarm(timeout_seconds) + try: + download_path = f'/tmp/sftp_download/stress_test_{i}.dat' + start_time = time.time() + # Paramiko uses prefetch by default for get() + sftp.get('/tmp/test_upload.dat', download_path) + elapsed = time.time() - start_time + + # Verify integrity + down_hash = get_file_hash(download_path) + if orig_hash != down_hash: + print(f" Iteration {i+1}: FAILED - hash mismatch!") + return False + + print(f" Iteration {i+1}/{num_iterations}: OK ({elapsed:.2f}s)") + os.remove(download_path) # Cleanup + + except TimeoutError as e: + print(f" Iteration {i+1}: FAILED - {e}") + print(" This may indicate the WS_WANT_WRITE hang bug!") + return False + finally: + signal.alarm(0) # Always cancel alarm + + print(f"=== Stress test completed: {num_iterations} iterations OK ===\n") + # Close connections sftp.close() ssh.close() - + print("SFTP session closed") return True - + if __name__ == "__main__": try: success = run_sftp_test() diff --git a/src/internal.c b/src/internal.c index 72e909f45..a9ad38e25 100644 --- a/src/internal.c +++ b/src/internal.c @@ -3462,6 +3462,12 @@ int wolfSSH_SendPacket(WOLFSSH* ssh) } +int wolfSSH_OutputPending(WOLFSSH* ssh) +{ + return (ssh != NULL && ssh->outputBuffer.length > ssh->outputBuffer.idx); +} + + static int GetInputData(WOLFSSH* ssh, word32 size) { int in; diff --git a/src/wolfsftp.c b/src/wolfsftp.c index 9386d157e..90b31c944 100644 --- a/src/wolfsftp.c +++ b/src/wolfsftp.c @@ -145,7 +145,7 @@ enum WS_SFTP_LSTAT_STATE_ID { STATE_LSTAT_CLEANUP }; -/* This structure is to help with nonblocking and keeping track of state. +/* SFTP buffer for nonblocking state tracking. * If adding any read/writes use the wolfSSH_SFTP_buffer_read/send functions */ typedef struct WS_SFTP_BUFFER { byte* data; @@ -526,6 +526,19 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) return WS_BUFFER_E; } + /* Flush any pending data in SSH output buffer first. + * Handles case where previous send returned WS_WANT_WRITE + * and data is still buffered at the SSH layer. */ + if (wolfSSH_OutputPending(ssh)) { + ret = wolfSSH_SendPacket(ssh); + if (ret == WS_WANT_WRITE) { + return ret; + } + if (ret < 0) { + return ret; + } + } + /* Call wolfSSH worker if rekeying or adjusting window size */ err = wolfSSH_get_error(ssh); if (err == WS_WINDOW_FULL || err == WS_REKEYING) { @@ -546,6 +559,20 @@ static int wolfSSH_SFTP_buffer_send(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer) } +#ifdef WOLFSSH_TEST_INTERNAL +int wolfSSH_TestSftpBufferSend(WOLFSSH* ssh, + byte* data, word32 sz, word32 idx) +{ + WS_SFTP_BUFFER buffer; + + buffer.data = data; + buffer.sz = sz; + buffer.idx = idx; + return wolfSSH_SFTP_buffer_send(ssh, &buffer); +} +#endif + + /* returns the amount read on success */ static int wolfSSH_SFTP_buffer_read(WOLFSSH* ssh, WS_SFTP_BUFFER* buffer, int readSz) @@ -1603,6 +1630,13 @@ int wolfSSH_SFTP_read(WOLFSSH* ssh) ssh->error = WS_WANT_WRITE; return WS_FATAL_ERROR; } + /* Check if SSH layer still has pending data from WS_WANT_WRITE. + * Even if SFTP buffer is fully consumed, the data may still be + * sitting in the SSH output buffer waiting to be sent. */ + if (wolfSSH_OutputPending(ssh)) { + ssh->error = WS_WANT_WRITE; + return WS_FATAL_ERROR; + } ret = WS_SUCCESS; state->toSend = 0; } diff --git a/tests/regress.c b/tests/regress.c index 5f6bcf170..bc406a63b 100644 --- a/tests/regress.c +++ b/tests/regress.c @@ -40,6 +40,9 @@ #include #include #include +#ifdef WOLFSSH_SFTP + #include +#endif #include "apps/wolfssh/common.h" #ifndef WOLFSSH_NO_ABORT @@ -482,6 +485,78 @@ static void TestWorkerReadsWhenSendWouldBlock(void) #endif +#ifdef WOLFSSH_SFTP +/* Test that wolfSSH_SFTP_buffer_send() properly handles WS_WANT_WRITE when + * SSH output buffer has pending data. This is a regression test for + * the SFTP hang issue with non-blocking sockets. + * + * The fix checks for pending data in ssh->outputBuffer at the start of + * wolfSSH_SFTP_buffer_send() and returns WS_WANT_WRITE if the flush fails. */ +static int sftpWantWriteCallCount = 0; + +static int SftpWantWriteSendCb(WOLFSSH* ssh, void* buf, word32 sz, void* ctx) +{ + (void)ssh; (void)buf; (void)ctx; + sftpWantWriteCallCount++; + /* First call returns WANT_WRITE, subsequent calls succeed */ + if (sftpWantWriteCallCount == 1) { + return WS_CBIO_ERR_WANT_WRITE; + } + return (int)sz; +} + +static int SftpDummyRecv(WOLFSSH* ssh, void* buf, word32 sz, void* ctx) +{ + (void)ssh; (void)buf; (void)sz; (void)ctx; + return WS_CBIO_ERR_WANT_READ; +} + +static void TestSftpBufferSendPendingOutput(void) +{ + WOLFSSH_CTX* ctx; + WOLFSSH* ssh; + byte testData[16]; + int ret; + + ctx = wolfSSH_CTX_new(WOLFSSH_ENDPOINT_SERVER, NULL); + AssertNotNull(ctx); + + wolfSSH_SetIOSend(ctx, SftpWantWriteSendCb); + wolfSSH_SetIORecv(ctx, SftpDummyRecv); + + ssh = wolfSSH_new(ctx); + AssertNotNull(ssh); + + WMEMSET(testData, 0x42, sizeof(testData)); + + /* Simulate pending data in SSH output buffer (as if previous send + * returned WS_WANT_WRITE and data was buffered). + * Note: outputBuffer is initialized by BufferInit() with bufferSz set + * to at least STATIC_BUFFER_LEN (16 bytes), so we use a smaller value. */ + ssh->outputBuffer.length = 8; /* 8 bytes pending */ + ssh->outputBuffer.idx = 0; /* none sent yet */ + + sftpWantWriteCallCount = 0; + + /* Call wolfSSH_TestSftpBufferSend - should return WS_WANT_WRITE because + * the fix detects pending data in outputBuffer and tries to flush it, + * which fails with WS_WANT_WRITE from our callback. + * + * Before the fix, the function would ignore the pending SSH output buffer + * data and proceed to send new SFTP data, leading to a hang because the + * pending data was never flushed. */ + ret = wolfSSH_TestSftpBufferSend(ssh, testData, sizeof(testData), 0); + AssertIntEQ(ret, WS_WANT_WRITE); + + /* Verify the SSH output buffer still has pending data */ + AssertTrue(ssh->outputBuffer.length > ssh->outputBuffer.idx); + + wolfSSH_free(ssh); + wolfSSH_CTX_free(ctx); +} +#endif /* WOLFSSH_SFTP */ + + int main(int argc, char** argv) { WOLFSSH_CTX* ctx; @@ -515,6 +590,10 @@ int main(int argc, char** argv) TestWorkerReadsWhenSendWouldBlock(); #endif +#ifdef WOLFSSH_SFTP + TestSftpBufferSendPendingOutput(); +#endif + /* TODO: add app-level regressions that simulate stdin EOF/password * prompts and mid-session socket closes once the test harness can * drive the wolfssh client without real sockets/tty. */ diff --git a/wolfssh/internal.h b/wolfssh/internal.h index 4af8bf6c2..dd074c3da 100644 --- a/wolfssh/internal.h +++ b/wolfssh/internal.h @@ -1072,6 +1072,7 @@ enum ChannelOpenFailReasons { WOLFSSH_LOCAL int DoReceive(WOLFSSH*); WOLFSSH_LOCAL int DoProtoId(WOLFSSH*); WOLFSSH_LOCAL int wolfSSH_SendPacket(WOLFSSH*); +WOLFSSH_LOCAL int wolfSSH_OutputPending(WOLFSSH*); WOLFSSH_LOCAL int SendProtoId(WOLFSSH*); WOLFSSH_LOCAL int SendKexInit(WOLFSSH*); WOLFSSH_LOCAL int SendKexDhInit(WOLFSSH*); diff --git a/wolfssh/wolfsftp.h b/wolfssh/wolfsftp.h index f2084524e..f6c1add09 100644 --- a/wolfssh/wolfsftp.h +++ b/wolfssh/wolfsftp.h @@ -282,6 +282,11 @@ WOLFSSL_LOCAL int SFTP_RemoveHandleNode(WOLFSSH* ssh, byte* handle, WOLFSSH_LOCAL void wolfSSH_SFTP_ShowSizes(void); +#ifdef WOLFSSH_TEST_INTERNAL + WOLFSSH_API int wolfSSH_TestSftpBufferSend(WOLFSSH* ssh, + byte* data, word32 sz, word32 idx); +#endif + #ifdef __cplusplus } #endif