-
Notifications
You must be signed in to change notification settings - Fork 22
Expand file tree
/
Copy pathstructure_template_usage.py
More file actions
298 lines (228 loc) · 8.88 KB
/
structure_template_usage.py
File metadata and controls
298 lines (228 loc) · 8.88 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
"""Examples demonstrating TOON structure template generation for LLM prompts."""
from toon import generate_structure
try:
from pydantic import BaseModel, Field
from toon import generate_structure_from_pydantic
PYDANTIC_AVAILABLE = True
except ImportError:
PYDANTIC_AVAILABLE = False
print("Note: Pydantic examples will be skipped (pydantic not installed)")
def example_simple_response_structure():
"""Example: Generate a simple response structure template."""
print("=== Simple Response Structure ===")
print("Use case: Telling an LLM what format to return data in\n")
schema = {
"name": "name of the person",
"age": "age of the person",
"occupation": "job description of the person"
}
structure = generate_structure(schema)
print("Schema definition:")
print(schema)
print("\nGenerated TOON structure template:")
print(structure)
print("\nHow to use in LLM prompt:")
print(' "Please extract person information and return it in this format:')
print(f' {structure}"')
print()
def example_nested_response_structure():
"""Example: Generate a nested response structure."""
print("=== Nested Response Structure ===")
print("Use case: Complex data with nested objects\n")
schema = {
"company": {
"name": "company name",
"location": {
"city": "city name",
"country": "country name"
}
},
"employee_count": "number of employees"
}
structure = generate_structure(schema)
print("Generated structure template:")
print(structure)
print()
def example_array_response_structure():
"""Example: Generate structure for array responses."""
print("=== Array Response Structure ===")
print("Use case: Extracting multiple items in tabular format\n")
schema = {
"products": [{
"id": "product identifier",
"name": "product name",
"price": "product price in USD",
"in_stock": "availability status"
}]
}
structure = generate_structure(schema)
print("Generated structure template:")
print(structure)
print("\nHow to use in LLM prompt:")
print(' "Extract all products from the page and return them in this format:')
print(f' {structure}"')
print()
def example_list_response_structure():
"""Example: Generate structure for list of items."""
print("=== Root-Level Array Structure ===")
print("Use case: Returning an array of similar objects\n")
schema = [{
"title": "article title",
"author": "article author",
"date": "publication date",
"summary": "brief summary"
}]
structure = generate_structure(schema)
print("Generated structure template:")
print(structure)
print()
def example_mixed_response_structure():
"""Example: Complex structure with mixed types."""
print("=== Mixed Response Structure ===")
print("Use case: Complex extraction with various data types\n")
schema = {
"page_title": "title of the page",
"metadata": {
"published": "publication date",
"author": "author name"
},
"tags": ["tag name"],
"sections": [{
"heading": "section heading",
"word_count": "number of words"
}]
}
structure = generate_structure(schema)
print("Generated structure template:")
print(structure)
print()
def example_delimiter_options():
"""Example: Using different delimiters."""
print("=== Custom Delimiters ===")
print("Use case: When data might contain commas\n")
schema = [{
"address": "full address (may contain commas)",
"city": "city name",
"zipcode": "zip code"
}]
print("With pipe delimiter (recommended for addresses):")
structure_pipe = generate_structure(schema, {"delimiter": "|"})
print(structure_pipe)
print("\nWith tab delimiter (good for spreadsheet-like data):")
structure_tab = generate_structure(schema, {"delimiter": "\t"})
print(structure_tab)
print()
# Pydantic-specific examples
if PYDANTIC_AVAILABLE:
class Person(BaseModel):
"""Person information model."""
id: int = Field(description="unique identifier")
name: str = Field(description="full name")
email: str = Field(description="email address")
age: int = Field(description="age in years")
occupation: str = Field(description="job title or profession")
class Article(BaseModel):
"""Article model."""
title: str = Field(description="article title")
author: str = Field(description="author name")
published_date: str = Field(description="publication date in YYYY-MM-DD format")
tags: list[str] = Field(description="article tags")
word_count: int = Field(description="number of words")
def example_pydantic_simple_model():
"""Example: Generate structure from Pydantic model."""
print("=== Pydantic Model Structure ===")
print("Use case: Generate structure from existing data models\n")
structure = generate_structure_from_pydantic(Person)
print(f"Model: {Person.__name__}")
print("\nGenerated structure template:")
print(structure)
print()
def example_pydantic_for_llm_prompt():
"""Example: Using Pydantic structure in LLM prompts."""
print("=== Complete LLM Prompt Example ===")
print("Use case: Full example of using structure in a prompt\n")
structure = generate_structure_from_pydantic(Article)
prompt = f"""Extract the article information from the following text and return it in TOON format.
Expected structure:
{structure}
Text to extract from:
[Article content would go here...]
Please return only the TOON formatted data."""
print("Complete prompt:")
print("-" * 60)
print(prompt)
print("-" * 60)
print()
def example_pydantic_array_structure():
"""Example: Array of Pydantic models."""
print("=== Pydantic Array Structure ===")
print("Use case: Extracting multiple items of the same type\n")
# To generate array structure, we pass a list schema
schema = [{
"title": "article title",
"author": "author name",
"published_date": "publication date",
"word_count": "word count"
}]
structure = generate_structure(schema)
print("Generated structure for array of articles:")
print(structure)
print()
def example_real_world_use_case():
"""Example: Real-world use case for web scraping."""
print("=== Real-World Use Case: Product Scraping ===")
print("Use case: Instructing an LLM to extract product data\n")
schema = {
"products": [{
"name": "product name",
"sku": "product SKU or ID",
"price": "price in USD",
"rating": "average rating (1-5)",
"reviews_count": "number of reviews",
"availability": "in stock or out of stock"
}]
}
structure = generate_structure(schema)
prompt = f"""You are a web scraping assistant. Extract all product information from the HTML and return it in TOON format.
Return the data in this exact structure:
{structure}
Important notes:
- Extract ALL products from the page
- Price should be numeric (remove currency symbols)
- Rating should be a number between 1 and 5
- If a field is missing, use null
HTML content:
[HTML content would go here...]"""
print("Complete prompt for web scraping:")
print("=" * 60)
print(prompt)
print("=" * 60)
print()
def main():
"""Run all examples."""
print("\n" + "="*60)
print(" TOON STRUCTURE TEMPLATE EXAMPLES")
print(" Generate response structures for LLM prompts")
print("="*60 + "\n")
example_simple_response_structure()
example_nested_response_structure()
example_array_response_structure()
example_list_response_structure()
example_mixed_response_structure()
example_delimiter_options()
if PYDANTIC_AVAILABLE:
example_pydantic_simple_model()
example_pydantic_for_llm_prompt()
example_pydantic_array_structure()
example_real_world_use_case()
print("="*60)
print(" Summary")
print("="*60)
print("✨ Use generate_structure() to create response templates")
print("✨ Perfect for LLM prompts - no need to provide examples")
print("✨ Supports nested objects, arrays, and custom delimiters")
print("✨ Works with Pydantic models for type-safe schemas")
print("✨ Reduces token usage while maintaining clarity")
print()
if __name__ == "__main__":
main()