diff --git a/src/lingodotdev/engine.py b/src/lingodotdev/engine.py
index dbdcbd7..bb3f2a9 100644
--- a/src/lingodotdev/engine.py
+++ b/src/lingodotdev/engine.py
@@ -104,12 +104,17 @@ def _safe_parse_json(response: httpx.Response) -> Dict[str, Any]:
"""
try:
return response.json()
- except json.JSONDecodeError:
- preview = LingoDotDevEngine._truncate_response(response.text)
+ except (json.JSONDecodeError, UnicodeDecodeError) as err:
+ try:
+ text = response.text
+ except UnicodeDecodeError:
+ text = response.content.decode("utf-8", errors="replace")
+
+ preview = LingoDotDevEngine._truncate_response(text)
raise RuntimeError(
f"Failed to parse API response as JSON (status {response.status_code}). "
f"This may indicate a gateway or proxy error. Response: {preview}"
- )
+ ) from err
async def _localize_raw(
self,
@@ -215,13 +220,27 @@ async def _localize_chunk(
response = await self._client.post(url, json=request_data)
if not response.is_success:
- response_preview = self._truncate_response(response.text)
if 500 <= response.status_code < 600:
+ error_details = ""
+ try:
+ error_json = response.json()
+ if isinstance(error_json, dict) and "error" in error_json:
+ error_details = f" {error_json['error']}"
+ except Exception:
+ pass
+
raise RuntimeError(
- f"Server error ({response.status_code}): {response.reason_phrase}. "
- f"This may be due to temporary service issues. Response: {response_preview}"
+ f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} "
+ "This may be due to temporary service issues."
)
- elif response.status_code == 400:
+
+ try:
+ text = response.text
+ except UnicodeDecodeError:
+ text = response.content.decode("utf-8", errors="replace")
+ response_preview = self._truncate_response(text)
+
+ if response.status_code == 400:
raise ValueError(
f"Invalid request ({response.status_code}): {response.reason_phrase}. "
f"Response: {response_preview}"
@@ -461,12 +480,26 @@ async def recognize_locale(self, text: str) -> str:
response = await self._client.post(url, json={"text": text})
if not response.is_success:
- response_preview = self._truncate_response(response.text)
if 500 <= response.status_code < 600:
+ error_details = ""
+ try:
+ error_json = response.json()
+ if isinstance(error_json, dict) and "error" in error_json:
+ error_details = f" {error_json['error']}"
+ except Exception:
+ pass
+
raise RuntimeError(
- f"Server error ({response.status_code}): {response.reason_phrase}. "
- f"This may be due to temporary service issues. Response: {response_preview}"
+ f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} "
+ "This may be due to temporary service issues."
)
+
+ try:
+ text = response.text
+ except UnicodeDecodeError:
+ text = response.content.decode("utf-8", errors="replace")
+ response_preview = self._truncate_response(text)
+
raise RuntimeError(
f"Error recognizing locale ({response.status_code}): {response.reason_phrase}. "
f"Response: {response_preview}"
@@ -498,10 +531,17 @@ async def whoami(self) -> Optional[Dict[str, str]]:
return {"email": payload["email"], "id": payload["id"]}
if 500 <= response.status_code < 600:
- response_preview = self._truncate_response(response.text)
+ error_details = ""
+ try:
+ error_json = response.json()
+ if isinstance(error_json, dict) and "error" in error_json:
+ error_details = f" {error_json['error']}"
+ except Exception:
+ pass
+
raise RuntimeError(
- f"Server error ({response.status_code}): {response.reason_phrase}. "
- f"This may be due to temporary service issues. Response: {response_preview}"
+ f"Server error ({response.status_code}): {response.reason_phrase}.{error_details} "
+ "This may be due to temporary service issues."
)
return None
diff --git a/tests/test_502_handling.py b/tests/test_502_handling.py
new file mode 100644
index 0000000..e9f9210
--- /dev/null
+++ b/tests/test_502_handling.py
@@ -0,0 +1,63 @@
+import pytest
+import json
+from unittest.mock import Mock, patch
+from lingodotdev import LingoDotDevEngine
+
+
+@pytest.mark.asyncio
+async def test_502_html_handling():
+ """Test that 502 errors with HTML bodies are sanitized"""
+ config = {"api_key": "test_key", "api_url": "https://api.test.com"}
+
+ html_body = "
" + ("502 Bad Gateway
" * 50) + ""
+ assert len(html_body) > 200 # Ensure it triggers truncation
+
+ with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post:
+ mock_response = Mock()
+ mock_response.is_success = False
+ mock_response.status_code = 502
+ mock_response.reason_phrase = "Bad Gateway"
+ mock_response.text = html_body
+ mock_response.json.side_effect = ValueError(
+ "Not JSON"
+ ) # simulating non-JSON response
+ mock_post.return_value = mock_response
+
+ async with LingoDotDevEngine(config) as engine:
+ with pytest.raises(RuntimeError) as exc_info:
+ await engine.localize_text("hello", {"target_locale": "es"})
+
+ error_msg = str(exc_info.value)
+
+ # Assertions
+ assert "Server error (502): Bad Gateway." in error_msg
+ assert "This may be due to temporary service issues." in error_msg
+ assert "Response:" not in error_msg
+ assert "" not in error_msg
+ assert "" not in error_msg
+
+
+@pytest.mark.asyncio
+async def test_500_json_handling():
+ """Test that 500 errors with JSON bodies are preserved"""
+ config = {"api_key": "test_key", "api_url": "https://api.test.com"}
+ error_json = {"error": "Specific internal error message"}
+
+ with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post:
+ mock_response = Mock()
+ mock_response.is_success = False
+ mock_response.status_code = 500
+ mock_response.reason_phrase = "Internal Server Error"
+ mock_response.text = json.dumps(error_json) # Needed for response_preview
+ mock_response.json.return_value = error_json
+ mock_post.return_value = mock_response
+
+ async with LingoDotDevEngine(config) as engine:
+ with pytest.raises(RuntimeError) as exc_info:
+ await engine.localize_text("hello", {"target_locale": "es"})
+
+ error_msg = str(exc_info.value)
+
+ # Assertions
+ assert "Server error (500): Internal Server Error." in error_msg
+ assert "Specific internal error message" in error_msg
diff --git a/tests/test_unicode_handling.py b/tests/test_unicode_handling.py
new file mode 100644
index 0000000..baf4065
--- /dev/null
+++ b/tests/test_unicode_handling.py
@@ -0,0 +1,72 @@
+import pytest
+import json
+from unittest.mock import Mock, patch, PropertyMock
+from lingodotdev import LingoDotDevEngine
+
+@pytest.mark.asyncio
+async def test_malformed_unicode_handling():
+ """Test that malformed unicode responses are handled gracefully"""
+ config = {"api_key": "test_key", "api_url": "https://api.test.com"}
+
+ # Invalid utf-8 sequence (0xFF)
+ invalid_bytes = b"\xff\xfe\xfd"
+
+ # Re-writing the test to target a successful status code (e.g. 200) but invalid body
+ # This triggers _safe_parse_json which is where the fix was applied
+ with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post:
+ mock_response = Mock()
+ mock_response.is_success = True
+ mock_response.status_code = 200
+ # json() raises UnicodeDecodeError
+ mock_response.json.side_effect = UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte")
+ # text property also raises UnicodeDecodeError
+ type(mock_response).text = PropertyMock(side_effect=UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte"))
+ # content property returns the bytes
+ mock_response.content = invalid_bytes
+
+ mock_post.return_value = mock_response
+
+ async with LingoDotDevEngine(config) as engine:
+ try:
+ await engine.localize_text("hello", {"target_locale": "es"})
+ pytest.fail("RuntimeError was not raised")
+ except RuntimeError as exc:
+ print(f"Caught expected RuntimeError: {exc}")
+ error_msg = str(exc)
+ assert "Failed to parse API response as JSON" in error_msg
+ assert "Response:" in error_msg
+ except Exception as e:
+ pytest.fail(f"Caught unexpected exception: {type(e).__name__}: {e}")
+
+@pytest.mark.asyncio
+async def test_unicode_error_in_400_response():
+ """Test that a 400 response with invalid unicode is handled safely"""
+ config = {"api_key": "test_key", "api_url": "https://api.test.com"}
+ invalid_bytes = b"\xff\xfe\xfd"
+
+ with patch("lingodotdev.engine.httpx.AsyncClient.post") as mock_post:
+ mock_response = Mock()
+ mock_response.is_success = False
+ mock_response.status_code = 400
+ mock_response.reason_phrase = "Bad Request"
+ # json() raises UnicodeDecodeError
+ mock_response.json.side_effect = UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte")
+ # text property raises UnicodeDecodeError (simulating access to .text)
+ type(mock_response).text = PropertyMock(side_effect=UnicodeDecodeError("utf-8", invalid_bytes, 0, 1, "invalid start byte"))
+ # content returning bytes
+ mock_response.content = invalid_bytes
+
+ mock_post.return_value = mock_response
+
+ async with LingoDotDevEngine(config) as engine:
+ try:
+ # Should raise ValueError for 400
+ await engine.localize_text("hello", {"target_locale": "es"})
+ pytest.fail("ValueError was not raised")
+ except ValueError as exc:
+ error_msg = str(exc)
+ assert "Invalid request (400)" in error_msg
+ # Verify that we fell back to safe decoding
+ assert "Response:" in error_msg
+ except Exception as e:
+ pytest.fail(f"Caught unexpected exception: {type(e).__name__}: {e}")