Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/on_schedule_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ jobs:
fail-fast: false
max-parallel: 12
matrix:
crawler-type: ["playwright_camoufox", "playwright", "parsel", "beautifulsoup"]
http-client: [ "httpx", "curl_impersonate"]
crawler-type: ["playwright_camoufox", "playwright_chrome", "playwright_firefox", "playwright_webkit", "playwright", "parsel", "beautifulsoup"]
http-client: ["httpx", "curl_impersonate"]
package-manager: ["pip", "uv", "poetry"]

runs-on: "ubuntu-latest"
Expand Down
15 changes: 9 additions & 6 deletions src/crawlee/browsers/_playwright_browser_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,20 +82,23 @@ def __init__(
'executable_path': config.default_browser_path,
'chromium_sandbox': not config.disable_browser_sandbox,
}

if browser_type == 'chrome' and default_launch_browser_options['executable_path']:
raise ValueError(
'Cannot use browser_type `chrome` with `Configuration.default_browser_path` or `executable_path` set.'
)
explicit_browser_launch_options = browser_launch_options or {}

# Map 'chrome' to 'chromium' with the 'chrome' channel.
if browser_type == 'chrome':
browser_type = 'chromium'
# Chromium parameter 'channel' set to 'chrome' enables using installed Google Chrome.
default_launch_browser_options['channel'] = 'chrome'

if executable_path := explicit_browser_launch_options.get(
'executable_path', default_launch_browser_options.get('executable_path')
):
logger.debug(
f"Using browser executable from {executable_path}, which takes precedence over 'chrome' channel."
)

self._browser_type: BrowserType = browser_type
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | (browser_launch_options or {})
self._browser_launch_options: dict[str, Any] = default_launch_browser_options | explicit_browser_launch_options
self._browser_new_context_options = browser_new_context_options or {}
self._max_open_pages_per_browser = max_open_pages_per_browser
self._use_incognito_pages = use_incognito_pages
Expand Down
2 changes: 1 addition & 1 deletion src/crawlee/project_template/cookiecutter.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"project_name": "crawlee-python-project",
"__package_name": "{{ cookiecutter.project_name|lower|replace('-', '_') }}",
"crawler_type": ["beautifulsoup", "parsel", "playwright", "playwright-camoufox"],
"crawler_type": ["beautifulsoup", "parsel", "playwright", "playwright-camoufox", "playwright-chrome", "playwright-firefox", "playwright-webkit"],
"__crawler_type": "{{ cookiecutter.crawler_type|lower|replace('-', '_') }}",
"http_client": ["impit", "httpx", "curl-impersonate"],
"package_manager": ["poetry", "pip", "uv"],
Expand Down
15 changes: 15 additions & 0 deletions src/crawlee/project_template/templates/main_playwright_chrome.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# % extends 'main.py'
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Closing parenthesis formatting

Files: templates/main_playwright_chrome.py,
templates/main_playwright_firefox.py, templates/main_playwright_webkit.py

The closing ) is on the same line as the last argument:

  {{ self.http_client_instantiation() }})

Consider:

  {{ self.http_client_instantiation() }}

)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done


# % block import
from crawlee.crawlers import PlaywrightCrawler
# % endblock

# % block instantiation
crawler = PlaywrightCrawler(
request_handler=router,
headless=True,
max_requests_per_crawl=10,
browser_type="chrome",
{{ self.http_client_instantiation() }}
)
# % endblock
15 changes: 15 additions & 0 deletions src/crawlee/project_template/templates/main_playwright_firefox.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# % extends 'main.py'

# % block import
from crawlee.crawlers import PlaywrightCrawler
# % endblock

# % block instantiation
crawler = PlaywrightCrawler(
request_handler=router,
headless=True,
max_requests_per_crawl=10,
browser_type="firefox",
{{ self.http_client_instantiation() }}
)
# % endblock
15 changes: 15 additions & 0 deletions src/crawlee/project_template/templates/main_playwright_webkit.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# % extends 'main.py'

# % block import
from crawlee.crawlers import PlaywrightCrawler
# % endblock

# % block instantiation
crawler = PlaywrightCrawler(
request_handler=router,
headless=True,
max_requests_per_crawl=10,
browser_type="webkit",
{{ self.http_client_instantiation() }}
)
# % endblock
19 changes: 0 additions & 19 deletions src/crawlee/project_template/templates/routes_camoufox.py

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,13 @@
# % if cookiecutter.crawler_type == 'playwright'
FROM apify/actor-python-playwright:3.13
# % elif cookiecutter.crawler_type == 'playwright-camoufox'
# Currently camoufox has issues installing on Python 3.13
FROM apify/actor-python-playwright:3.12
FROM apify/actor-python-playwright-camoufox:3.13
# % elif cookiecutter.crawler_type == 'playwright-chrome'
FROM apify/actor-python-playwright-chrome:3.13
# % elif cookiecutter.crawler_type == 'playwright-firefox'
FROM apify/actor-python-playwright-firefox:3.13
# % elif cookiecutter.crawler_type == 'playwright-webkit'
FROM apify/actor-python-playwright-webkit:3.13
# % else
FROM apify/actor-python:3.13
# % endif
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# % if cookiecutter.crawler_type == 'playwright-camoufox'
# % if cookiecutter.crawler_type.startswith('playwright')
# % set extras = ['playwright']
# % else
# % set extras = [cookiecutter.crawler_type]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
# % if cookiecutter.crawler_type == 'playwright-camoufox'
camoufox[geoip]~=0.4.5
# % endif
# % if cookiecutter.crawler_type.startswith('playwright')
# % set extras = ['playwright']
# % else
# % set extras = [cookiecutter.crawler_type]
Expand Down
Original file line number Diff line number Diff line change
@@ -1 +1,5 @@
# % if cookiecutter.crawler_type.startswith('playwright')
# % include 'routes_playwright.py'
# % else
# % include 'routes_%s.py' % cookiecutter.__crawler_type
# % endif
3 changes: 3 additions & 0 deletions tests/e2e/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,9 @@ def pytest_configure(config: Config) -> None:
'impit',
'playwright',
'playwright_camoufox',
'playwright_chrome',
'playwright_firefox',
'playwright_webkit',
'parsel',
'beautifulsoup',
'uv',
Expand Down
3 changes: 3 additions & 0 deletions tests/e2e/project_template/test_static_crawlers_templates.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@
'crawler_type',
[
pytest.param('playwright-camoufox', marks=pytest.mark.playwright_camoufox),
pytest.param('playwright-chrome', marks=pytest.mark.playwright_chrome),
pytest.param('playwright-firefox', marks=pytest.mark.playwright_firefox),
pytest.param('playwright-webkit', marks=pytest.mark.playwright_webkit),
pytest.param('playwright', marks=pytest.mark.playwright),
pytest.param('parsel', marks=pytest.mark.parsel),
pytest.param('beautifulsoup', marks=pytest.mark.beautifulsoup),
Expand Down