From 99957f3c3fb55b0bcec01949ddf6af6772b8cd9b Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Wed, 4 Feb 2026 19:52:34 +0530 Subject: [PATCH 1/3] fix: sanitize 502 HTML responses from error messages --- src/lingodotdev/engine.py | 39 ++++++++++++++++++----- tests/test_502_handling.py | 63 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 7 deletions(-) create mode 100644 tests/test_502_handling.py diff --git a/src/lingodotdev/engine.py b/src/lingodotdev/engine.py index dbdcbd7..5587a70 100644 --- a/src/lingodotdev/engine.py +++ b/src/lingodotdev/engine.py @@ -217,9 +217,18 @@ async def _localize_chunk( if not response.is_success: response_preview = self._truncate_response(response.text) if 500 <= response.status_code < 600: + error_details = "" + try: + error_json = response.json() + if isinstance(error_json, dict) and "error" in error_json: + error_details = f" {error_json['error']}" + except Exception: + pass + + raise RuntimeError( raise RuntimeError( - f"Server error ({response.status_code}): {response.reason_phrase}. " - f"This may be due to temporary service issues. Response: {response_preview}" + f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " + "This may be due to temporary service issues." ) elif response.status_code == 400: raise ValueError( @@ -463,9 +472,18 @@ async def recognize_locale(self, text: str) -> str: if not response.is_success: response_preview = self._truncate_response(response.text) if 500 <= response.status_code < 600: + error_details = "" + try: + error_json = response.json() + if isinstance(error_json, dict) and "error" in error_json: + error_details = f" {error_json['error']}" + except Exception: + pass + raise RuntimeError( - f"Server error ({response.status_code}): {response.reason_phrase}. " - f"This may be due to temporary service issues. Response: {response_preview}" + raise RuntimeError( + f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " + "This may be due to temporary service issues." ) raise RuntimeError( f"Error recognizing locale ({response.status_code}): {response.reason_phrase}. " @@ -498,10 +516,17 @@ async def whoami(self) -> Optional[Dict[str, str]]: return {"email": payload["email"], "id": payload["id"]} if 500 <= response.status_code < 600: - response_preview = self._truncate_response(response.text) + error_details = "" + try: + error_json = response.json() + if isinstance(error_json, dict) and "error" in error_json: + error_details = f" {error_json['error']}" + except Exception: + pass + raise RuntimeError( - f"Server error ({response.status_code}): {response.reason_phrase}. " - f"This may be due to temporary service issues. Response: {response_preview}" + f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " + "This may be due to temporary service issues." ) return None diff --git a/tests/test_502_handling.py b/tests/test_502_handling.py new file mode 100644 index 0000000..e9f9210 --- /dev/null +++ b/tests/test_502_handling.py @@ -0,0 +1,63 @@ +import pytest +import json +from unittest.mock import Mock, patch +from lingodotdev import LingoDotDevEngine + + +@pytest.mark.asyncio +async def test_502_html_handling(): + """Test that 502 errors with HTML bodies are sanitized""" + config = {"api_key": "test_key", "api_url": "https://api.test.com"} + + html_body = "" + ("

502 Bad Gateway

" * 50) + "" + assert len(html_body) > 200 # Ensure it triggers truncation + + with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post: + mock_response = Mock() + mock_response.is_success = False + mock_response.status_code = 502 + mock_response.reason_phrase = "Bad Gateway" + mock_response.text = html_body + mock_response.json.side_effect = ValueError( + "Not JSON" + ) # simulating non-JSON response + mock_post.return_value = mock_response + + async with LingoDotDevEngine(config) as engine: + with pytest.raises(RuntimeError) as exc_info: + await engine.localize_text("hello", {"target_locale": "es"}) + + error_msg = str(exc_info.value) + + # Assertions + assert "Server error (502): Bad Gateway." in error_msg + assert "This may be due to temporary service issues." in error_msg + assert "Response:" not in error_msg + assert "" not in error_msg + assert "" not in error_msg + + +@pytest.mark.asyncio +async def test_500_json_handling(): + """Test that 500 errors with JSON bodies are preserved""" + config = {"api_key": "test_key", "api_url": "https://api.test.com"} + error_json = {"error": "Specific internal error message"} + + with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post: + mock_response = Mock() + mock_response.is_success = False + mock_response.status_code = 500 + mock_response.reason_phrase = "Internal Server Error" + mock_response.text = json.dumps(error_json) # Needed for response_preview + mock_response.json.return_value = error_json + mock_post.return_value = mock_response + + async with LingoDotDevEngine(config) as engine: + with pytest.raises(RuntimeError) as exc_info: + await engine.localize_text("hello", {"target_locale": "es"}) + + error_msg = str(exc_info.value) + + # Assertions + assert "Server error (500): Internal Server Error." in error_msg + assert "Specific internal error message" in error_msg From 1e7ede5a56b1339d944b4c61c46bfc23254529a5 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Wed, 4 Feb 2026 20:12:49 +0530 Subject: [PATCH 2/3] fix: handle UnicodeDecodeError when parsing API responses --- src/lingodotdev/engine.py | 14 +++++++----- tests/test_unicode_handling.py | 39 ++++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 5 deletions(-) create mode 100644 tests/test_unicode_handling.py diff --git a/src/lingodotdev/engine.py b/src/lingodotdev/engine.py index 5587a70..57dd460 100644 --- a/src/lingodotdev/engine.py +++ b/src/lingodotdev/engine.py @@ -104,12 +104,17 @@ def _safe_parse_json(response: httpx.Response) -> Dict[str, Any]: """ try: return response.json() - except json.JSONDecodeError: - preview = LingoDotDevEngine._truncate_response(response.text) + except (json.JSONDecodeError, UnicodeDecodeError) as err: + try: + text = response.text + except UnicodeDecodeError: + text = response.content.decode("utf-8", errors="replace") + + preview = LingoDotDevEngine._truncate_response(text) raise RuntimeError( f"Failed to parse API response as JSON (status {response.status_code}). " f"This may indicate a gateway or proxy error. Response: {preview}" - ) + ) from err async def _localize_raw( self, @@ -225,7 +230,7 @@ async def _localize_chunk( except Exception: pass - raise RuntimeError( + raise RuntimeError( f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " "This may be due to temporary service issues." @@ -480,7 +485,6 @@ async def recognize_locale(self, text: str) -> str: except Exception: pass - raise RuntimeError( raise RuntimeError( f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " "This may be due to temporary service issues." diff --git a/tests/test_unicode_handling.py b/tests/test_unicode_handling.py new file mode 100644 index 0000000..0cdc755 --- /dev/null +++ b/tests/test_unicode_handling.py @@ -0,0 +1,39 @@ +import pytest +import json +from unittest.mock import Mock, patch, PropertyMock +from lingodotdev import LingoDotDevEngine + +@pytest.mark.asyncio +async def test_malformed_unicode_handling(): + """Test that malformed unicode responses are handled gracefully""" + config = {"api_key": "test_key", "api_url": "https://api.test.com"} + + # Invalid utf-8 sequence (0xFF) + invalid_bytes = b"\xff\xfe\xfd" + + # Re-writing the test to target a successful status code (e.g. 200) but invalid body + # This triggers _safe_parse_json which is where the fix was applied + with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post: + mock_response = Mock() + mock_response.is_success = True + mock_response.status_code = 200 + # json() raises UnicodeDecodeError + mock_response.json.side_effect = UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte") + # text property also raises UnicodeDecodeError + type(mock_response).text = PropertyMock(side_effect=UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte")) + # content property returns the bytes + mock_response.content = invalid_bytes + + mock_post.return_value = mock_response + + async with LingoDotDevEngine(config) as engine: + try: + await engine.localize_text("hello", {"target_locale": "es"}) + pytest.fail("RuntimeError was not raised") + except RuntimeError as exc: + print(f"Caught expected RuntimeError: {exc}") + error_msg = str(exc) + assert "Failed to parse API response as JSON" in error_msg + assert "Response:" in error_msg + except Exception as e: + pytest.fail(f"Caught unexpected exception: {type(e).__name__}: {e}") From 8cfe813b7abab9f587fe831b76bd426d42d4d332 Mon Sep 17 00:00:00 2001 From: Hellnight2005 Date: Wed, 4 Feb 2026 20:21:13 +0530 Subject: [PATCH 3/3] fix: guard response.text access to prevent unhandled UnicodeDecodeError --- src/lingodotdev/engine.py | 19 +++++++++++++++---- tests/test_unicode_handling.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 48 insertions(+), 4 deletions(-) diff --git a/src/lingodotdev/engine.py b/src/lingodotdev/engine.py index 57dd460..bb3f2a9 100644 --- a/src/lingodotdev/engine.py +++ b/src/lingodotdev/engine.py @@ -220,7 +220,6 @@ async def _localize_chunk( response = await self._client.post(url, json=request_data) if not response.is_success: - response_preview = self._truncate_response(response.text) if 500 <= response.status_code < 600: error_details = "" try: @@ -229,13 +228,19 @@ async def _localize_chunk( error_details = f" {error_json['error']}" except Exception: pass - raise RuntimeError( f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " "This may be due to temporary service issues." ) - elif response.status_code == 400: + + try: + text = response.text + except UnicodeDecodeError: + text = response.content.decode("utf-8", errors="replace") + response_preview = self._truncate_response(text) + + if response.status_code == 400: raise ValueError( f"Invalid request ({response.status_code}): {response.reason_phrase}. " f"Response: {response_preview}" @@ -475,7 +480,6 @@ async def recognize_locale(self, text: str) -> str: response = await self._client.post(url, json={"text": text}) if not response.is_success: - response_preview = self._truncate_response(response.text) if 500 <= response.status_code < 600: error_details = "" try: @@ -489,6 +493,13 @@ async def recognize_locale(self, text: str) -> str: f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} " "This may be due to temporary service issues." ) + + try: + text = response.text + except UnicodeDecodeError: + text = response.content.decode("utf-8", errors="replace") + response_preview = self._truncate_response(text) + raise RuntimeError( f"Error recognizing locale ({response.status_code}): {response.reason_phrase}. " f"Response: {response_preview}" diff --git a/tests/test_unicode_handling.py b/tests/test_unicode_handling.py index 0cdc755..baf4065 100644 --- a/tests/test_unicode_handling.py +++ b/tests/test_unicode_handling.py @@ -37,3 +37,36 @@ async def test_malformed_unicode_handling(): assert "Response:" in error_msg except Exception as e: pytest.fail(f"Caught unexpected exception: {type(e).__name__}: {e}") + +@pytest.mark.asyncio +async def test_unicode_error_in_400_response(): + """Test that a 400 response with invalid unicode is handled safely""" + config = {"api_key": "test_key", "api_url": "https://api.test.com"} + invalid_bytes = b"\xff\xfe\xfd" + + with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post: + mock_response = Mock() + mock_response.is_success = False + mock_response.status_code = 400 + mock_response.reason_phrase = "Bad Request" + # json() raises UnicodeDecodeError + mock_response.json.side_effect = UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte") + # text property raises UnicodeDecodeError (simulating access to .text) + type(mock_response).text = PropertyMock(side_effect=UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte")) + # content returning bytes + mock_response.content = invalid_bytes + + mock_post.return_value = mock_response + + async with LingoDotDevEngine(config) as engine: + try: + # Should raise ValueError for 400 + await engine.localize_text("hello", {"target_locale": "es"}) + pytest.fail("ValueError was not raised") + except ValueError as exc: + error_msg = str(exc) + assert "Invalid request (400)" in error_msg + # Verify that we fell back to safe decoding + assert "Response:" in error_msg + except Exception as e: + pytest.fail(f"Caught unexpected exception: {type(e).__name__}: {e}")