diff --git a/configs/default_config.yaml b/configs/default_config.yaml index 928465bf5a..14ae54556b 100644 --- a/configs/default_config.yaml +++ b/configs/default_config.yaml @@ -78,10 +78,15 @@ prompt: # Feature extraction and program labeling thresholds # These control how the LLM perceives and categorizes programs suggest_simplification_after_chars: 500 # Suggest simplifying if program exceeds this many characters - include_changes_under_chars: 100 # Include change descriptions in features if under this length + include_changes_under_chars: 100 # Include change descriptions in features if under this length concise_implementation_max_lines: 10 # Label as "concise" if program has this many lines or fewer comprehensive_implementation_min_lines: 50 # Label as "comprehensive" if program has this many lines or more + # Diff summary formatting for "Previous Attempts" section + # Controls how SEARCH/REPLACE blocks are displayed in prompts + diff_summary_max_line_len: 100 # Truncate lines longer than this (with "...") + diff_summary_max_lines: 30 # Max lines per SEARCH/REPLACE block + # Note: meta-prompting features are not yet implemented # Database configuration diff --git a/openevolve/config.py b/openevolve/config.py index 86f65d7655..9ec2314ae1 100644 --- a/openevolve/config.py +++ b/openevolve/config.py @@ -281,6 +281,10 @@ class PromptConfig: 50 # Label as "comprehensive" if program has this many lines or more ) + # Diff summary formatting for "Previous Attempts" section + diff_summary_max_line_len: int = 100 # Truncate lines longer than this + diff_summary_max_lines: int = 30 # Max lines per SEARCH/REPLACE block + # Backward compatibility - deprecated code_length_threshold: Optional[int] = ( None # Deprecated: use suggest_simplification_after_chars @@ -340,7 +344,9 @@ class DatabaseConfig: artifact_size_threshold: int = 32 * 1024 # 32KB threshold cleanup_old_artifacts: bool = True artifact_retention_days: int = 30 - max_snapshot_artifacts: Optional[int] = 100 # Max artifacts in worker snapshots (None=unlimited) + max_snapshot_artifacts: Optional[int] = ( + 100 # Max artifacts in worker snapshots (None=unlimited) + ) novelty_llm: Optional["LLMInterface"] = None embedding_model: Optional[str] = None diff --git a/openevolve/iteration.py b/openevolve/iteration.py index 401731ff0a..7afaff75b5 100644 --- a/openevolve/iteration.py +++ b/openevolve/iteration.py @@ -1,12 +1,12 @@ import asyncio -import os -import uuid import logging +import os import time +import uuid from dataclasses import dataclass -from openevolve.database import Program, ProgramDatabase from openevolve.config import Config +from openevolve.database import Program, ProgramDatabase from openevolve.evaluator import Evaluator from openevolve.llm.ensemble import LLMEnsemble from openevolve.prompt.sampler import PromptSampler @@ -63,8 +63,7 @@ async def run_iteration_with_shared_db( # Build prompt if config.prompt.programs_as_changes_description: parent_changes_desc = ( - parent.changes_description - or config.prompt.initial_changes_description + parent.changes_description or config.prompt.initial_changes_description ) child_changes_desc = parent_changes_desc else: @@ -115,20 +114,34 @@ async def run_iteration_with_shared_db( return None child_code, _ = apply_diff_blocks(parent.code, code_blocks) - child_changes_desc, desc_applied = apply_diff_blocks(parent_changes_desc, desc_blocks) + child_changes_desc, desc_applied = apply_diff_blocks( + parent_changes_desc, desc_blocks + ) # Must update the previous changes description - if desc_applied == 0 or not child_changes_desc.strip() or child_changes_desc.strip() == parent_changes_desc.strip(): + if ( + desc_applied == 0 + or not child_changes_desc.strip() + or child_changes_desc.strip() == parent_changes_desc.strip() + ): logger.warning( f"Iteration {iteration+1}: changes_description was not updated or empty, program is discarded" ) return None - changes_summary = format_diff_summary(code_blocks) + changes_summary = format_diff_summary( + code_blocks, + max_line_len=config.prompt.diff_summary_max_line_len, + max_lines=config.prompt.diff_summary_max_lines, + ) else: # All diffs applied only to code child_code = apply_diff(parent.code, llm_response, config.diff_pattern) - changes_summary = format_diff_summary(diff_blocks) + changes_summary = format_diff_summary( + diff_blocks, + max_line_len=config.prompt.diff_summary_max_line_len, + max_lines=config.prompt.diff_summary_max_lines, + ) else: # Parse full rewrite new_code = parse_full_rewrite(llm_response, config.language) diff --git a/openevolve/process_parallel.py b/openevolve/process_parallel.py index 2d65b6ce46..64e945bb10 100644 --- a/openevolve/process_parallel.py +++ b/openevolve/process_parallel.py @@ -170,8 +170,7 @@ def _run_iteration_worker( # Build prompt if _worker_config.prompt.programs_as_changes_description: parent_changes_desc = ( - parent.changes_description - or _worker_config.prompt.initial_changes_description + parent.changes_description or _worker_config.prompt.initial_changes_description ) child_changes_desc = parent_changes_desc else: @@ -223,7 +222,9 @@ def _run_iteration_worker( diff_blocks = extract_diffs(llm_response, _worker_config.diff_pattern) if not diff_blocks: - return SerializableResult(error="No valid diffs found in response", iteration=iteration) + return SerializableResult( + error="No valid diffs found in response", iteration=iteration + ) if _worker_config.prompt.programs_as_changes_description: try: @@ -236,20 +237,34 @@ def _run_iteration_worker( return SerializableResult(error=str(e), iteration=iteration) child_code, _ = apply_diff_blocks(parent.code, code_blocks) - child_changes_desc, desc_applied = apply_diff_blocks(parent_changes_desc, desc_blocks) + child_changes_desc, desc_applied = apply_diff_blocks( + parent_changes_desc, desc_blocks + ) # Must update the previous changes description - if desc_applied == 0 or not child_changes_desc.strip() or child_changes_desc.strip() == parent_changes_desc.strip(): + if ( + desc_applied == 0 + or not child_changes_desc.strip() + or child_changes_desc.strip() == parent_changes_desc.strip() + ): return SerializableResult( error="changes_description was not updated or empty, program is discarded", iteration=iteration, ) - changes_summary = format_diff_summary(code_blocks) + changes_summary = format_diff_summary( + code_blocks, + max_line_len=_worker_config.prompt.diff_summary_max_line_len, + max_lines=_worker_config.prompt.diff_summary_max_lines, + ) else: # All diffs applied only to code child_code = apply_diff(parent.code, llm_response, _worker_config.diff_pattern) - changes_summary = format_diff_summary(diff_blocks) + changes_summary = format_diff_summary( + diff_blocks, + max_line_len=_worker_config.prompt.diff_summary_max_line_len, + max_lines=_worker_config.prompt.diff_summary_max_lines, + ) else: from openevolve.utils.code_utils import parse_full_rewrite @@ -588,10 +603,8 @@ async def run_evolution( # Island management # get current program island id - island_id = child_program.metadata.get( - "island", self.database.current_island - ) - #use this to increment island generation + island_id = child_program.metadata.get("island", self.database.current_island) + # use this to increment island generation self.database.increment_island_generation(island_idx=island_id) # Check migration @@ -709,7 +722,7 @@ async def run_evolution( f"(best score: {best_score:.4f})" ) break - + else: # Event-based early stopping if current_score == self.config.convergence_threshold: diff --git a/openevolve/utils/code_utils.py b/openevolve/utils/code_utils.py index db506232ac..cde6a971af 100644 --- a/openevolve/utils/code_utils.py +++ b/openevolve/utils/code_utils.py @@ -120,12 +120,32 @@ def parse_full_rewrite(llm_response: str, language: str = "python") -> Optional[ return llm_response -def format_diff_summary(diff_blocks: List[Tuple[str, str]]) -> str: +def _format_block_lines(lines: List[str], max_line_len: int = 100, max_lines: int = 30) -> str: + """Format a block of lines for diff summary: show all lines (truncated per line, optional cap).""" + truncated = [] + for line in lines[:max_lines]: + s = line.rstrip() + if len(s) > max_line_len: + s = s[: max_line_len - 3] + "..." + truncated.append(" " + s) + if len(lines) > max_lines: + truncated.append(f" ... ({len(lines) - max_lines} more lines)") + return "\n".join(truncated) if truncated else " (empty)" + + +def format_diff_summary( + diff_blocks: List[Tuple[str, str]], + max_line_len: int = 100, + max_lines: int = 30, +) -> str: """ - Create a human-readable summary of the diff + Create a human-readable summary of the diff. + For multi-line blocks, shows the full search and replace content (all lines). Args: diff_blocks: List of (search_text, replace_text) tuples + max_line_len: Maximum characters per line before truncation (default: 100) + max_lines: Maximum lines per SEARCH/REPLACE block (default: 30) Returns: Summary string @@ -136,17 +156,12 @@ def format_diff_summary(diff_blocks: List[Tuple[str, str]]) -> str: search_lines = search_text.strip().split("\n") replace_lines = replace_text.strip().split("\n") - # Create a short summary if len(search_lines) == 1 and len(replace_lines) == 1: summary.append(f"Change {i+1}: '{search_lines[0]}' to '{replace_lines[0]}'") else: - search_summary = ( - f"{len(search_lines)} lines" if len(search_lines) > 1 else search_lines[0] - ) - replace_summary = ( - f"{len(replace_lines)} lines" if len(replace_lines) > 1 else replace_lines[0] - ) - summary.append(f"Change {i+1}: Replace {search_summary} with {replace_summary}") + search_block = _format_block_lines(search_lines, max_line_len, max_lines) + replace_block = _format_block_lines(replace_lines, max_line_len, max_lines) + summary.append(f"Change {i+1}: Replace:\n{search_block}\nwith:\n{replace_block}") return "\n".join(summary) diff --git a/tests/test_code_utils.py b/tests/test_code_utils.py index 1446b01fd5..20f269b7a9 100644 --- a/tests/test_code_utils.py +++ b/tests/test_code_utils.py @@ -3,7 +3,13 @@ """ import unittest -from openevolve.utils.code_utils import apply_diff, extract_diffs + +from openevolve.utils.code_utils import ( + _format_block_lines, + apply_diff, + extract_diffs, + format_diff_summary, +) class TestCodeUtils(unittest.TestCase): @@ -89,5 +95,92 @@ def hello(): ) +class TestFormatDiffSummary(unittest.TestCase): + """Tests for format_diff_summary showing actual diff content""" + + def test_single_line_changes(self): + """Single-line changes should show inline format""" + diff_blocks = [("x = 1", "x = 2")] + result = format_diff_summary(diff_blocks) + self.assertEqual(result, "Change 1: 'x = 1' to 'x = 2'") + + def test_multi_line_changes_show_actual_content(self): + """Multi-line changes should show actual SEARCH/REPLACE content""" + diff_blocks = [ + ( + "def old():\n return False", + "def new():\n return True", + ) + ] + result = format_diff_summary(diff_blocks) + # Should contain actual code, not "2 lines" + self.assertIn("def old():", result) + self.assertIn("return False", result) + self.assertIn("def new():", result) + self.assertIn("return True", result) + self.assertIn("Replace:", result) + self.assertIn("with:", result) + # Should NOT contain generic line count + self.assertNotIn("2 lines", result) + + def test_multiple_diff_blocks(self): + """Multiple diff blocks should be numbered""" + diff_blocks = [ + ("a = 1", "a = 2"), + ("def foo():\n pass", "def bar():\n return 1"), + ] + result = format_diff_summary(diff_blocks) + self.assertIn("Change 1:", result) + self.assertIn("Change 2:", result) + self.assertIn("'a = 1' to 'a = 2'", result) + self.assertIn("def foo():", result) + self.assertIn("def bar():", result) + + def test_configurable_max_line_len(self): + """max_line_len parameter should control line truncation""" + long_line = "x" * 50 + # Must be multi-line to trigger block format (single-line uses inline format) + diff_blocks = [(long_line + "\nline2", "short\nline2")] + # With default (100), no truncation + result_default = format_diff_summary(diff_blocks) + self.assertNotIn("...", result_default) + # With max_line_len=30, should truncate the long line + result_short = format_diff_summary(diff_blocks, max_line_len=30) + self.assertIn("...", result_short) + + def test_configurable_max_lines(self): + """max_lines parameter should control block truncation""" + many_lines = "\n".join([f"line{i}" for i in range(20)]) + diff_blocks = [(many_lines, "replacement")] + # With max_lines=10, should truncate + result = format_diff_summary(diff_blocks, max_lines=10) + self.assertIn("... (10 more lines)", result) + + def test_block_lines_basic_formatting(self): + """Lines should be indented with 2 spaces""" + lines = ["line1", "line2"] + result = _format_block_lines(lines) + self.assertEqual(result, " line1\n line2") + + def test_block_lines_long_line_truncation(self): + """Lines over 100 chars should be truncated by default""" + long_line = "x" * 150 + result = _format_block_lines([long_line]) + self.assertIn("...", result) + self.assertLess(len(result.split("\n")[0]), 110) + + def test_block_lines_many_lines_truncation(self): + """More than 30 lines should show truncation message by default""" + lines = [f"line{i}" for i in range(50)] + result = _format_block_lines(lines) + self.assertIn("... (20 more lines)", result) + self.assertEqual(len(result.split("\n")), 31) + + def test_block_lines_empty_input(self): + """Empty input should return '(empty)'""" + result = _format_block_lines([]) + self.assertEqual(result, " (empty)") + + if __name__ == "__main__": unittest.main()