diff --git a/model2vec/distill/distillation.py b/model2vec/distill/distillation.py
index 898a199..3e1f800 100644
--- a/model2vec/distill/distillation.py
+++ b/model2vec/distill/distillation.py
@@ -7,6 +7,7 @@
 
 import numpy as np
 from huggingface_hub.hf_api import model_info
+from skeletoken import TokenizerModel
 from transformers import AutoModel, AutoTokenizer
 from transformers.modeling_utils import PreTrainedModel
 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
@@ -15,7 +16,7 @@
 from model2vec.distill.utils import select_optimal_device
 from model2vec.model import StaticModel
 from model2vec.quantization import DType, quantize_embeddings
-from model2vec.tokenizer import clean_and_create_vocabulary, replace_vocabulary, turn_tokens_into_ids
+from model2vec.tokenizer import clean_and_create_vocabulary, turn_tokens_into_ids
 from model2vec.vocabulary_quantization import quantize_vocabulary
 
 logger = logging.getLogger(__name__)
@@ -37,7 +38,8 @@ def distill_from_model(
     Distill a staticmodel from a sentence transformer.
 
     This function creates a set of embeddings from a sentence transformer. It does this by doing either
-    a forward pass for all subword tokens in the tokenizer, or by doing a forward pass for all tokens in a passed vocabulary.
+    a forward pass for all subword tokens in the tokenizer, or by doing a forward pass for all tokens in a passed
+    vocabulary.
 
     If you pass through a vocabulary, we create a custom word tokenizer for that vocabulary.
     If you don't pass a vocabulary, we use the model's tokenizer directly.
@@ -51,10 +53,13 @@ def distill_from_model(
         If this is 'auto', we don't reduce dimensionality, but still apply PCA.
     :param sif_coefficient: The SIF coefficient to use. If this is None, no weighting is applied.
         Should be a value > 0 and < 1.0. A value of 1e-4 is a good default.
-    :param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to this regex pattern will be removed from the vocabulary.
-        If the pattern is so general that it removes all tokens, we throw an error. If the pattern can't be compiled into a valid regex, we also throw an error.
+    :param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to
+        this regex pattern will be removed from the vocabulary.
+        If the pattern is so general that it removes all tokens, we throw an error. If the pattern can't be compiled
+        into a valid regex, we also throw an error.
     :param quantize_to: The data type to quantize to. Can be any of the DType enum members or their string equivalents.
-    :param vocabulary_quantization: The number of clusters to use for vocabulary quantization. If this is None, no quantization is performed.
+    :param vocabulary_quantization: The number of clusters to use for vocabulary quantization. If this is None, no
+         quantization is performed.
     :param pooling: The pooling mode to use for creating embeddings. Can be one of:
         'mean' (default): mean over all tokens. Robust and works well in most cases.
         'last': use the last token's hidden state (often the [EOS] token). Common for decoder-style models.
@@ -65,59 +70,43 @@ def distill_from_model(
 
     """
     quantize_to = DType(quantize_to)
-    backend_tokenizer = tokenizer.backend_tokenizer
     sif_coefficient, token_remove_regex = _validate_parameters(sif_coefficient, token_remove_pattern)
 
     if vocabulary is None:
         vocabulary = []
 
     device = select_optimal_device(device)
+    original_tokenizer_model = TokenizerModel.from_transformers_tokenizer(tokenizer)
 
-    n_tokens_before = len(vocabulary)
     # Clean the vocabulary by removing duplicate tokens and tokens that are in the internal vocabulary.
-    all_tokens, backend_tokenizer = clean_and_create_vocabulary(
-        tokenizer, vocabulary, token_remove_regex=token_remove_regex
-    )
-    n_tokens_after = len([token for token in all_tokens if not token.is_internal])
-    if n_tokens_before:
-        logger.info(
-            f"Adding {n_tokens_after} tokens to the vocabulary. Removed {n_tokens_before - n_tokens_after} tokens during preprocessing."
-        )
-
+    # Copy the original tokenizer model.
+    tokenizer_model = original_tokenizer_model._deep_copy()
+    if tokenizer_model.adds_prefix_space is not None:
+        tokenizer_model.adds_prefix_space = True
+
+    # Create the vocabulary in the new tokenizer.
+    tokenizer_model = clean_and_create_vocabulary(tokenizer_model, vocabulary, token_remove_regex=token_remove_regex)
+    # Remove the post processor, this is not necessary.
+    tokenizer_model.post_processor = None
+
+    # All tokens in a single list.
+    all_tokens = tokenizer_model.sorted_vocabulary
     if not all_tokens:
         raise ValueError("The vocabulary is empty after preprocessing. Please check your token_remove_pattern.")
 
-    unk_token = cast(str | None, tokenizer.special_tokens_map.get("unk_token"))
-    pad_token = cast(str | None, tokenizer.special_tokens_map.get("pad_token"))
-
-    # Weird if to satsify mypy
-    if pad_token is None:
-        if unk_token is not None:
-            pad_token = unk_token
-            logger.warning(
-                "The pad token is not set. Setting it to the unk token. This is a workaround for models that don't have a pad token."
-            )
-        else:
-            pad_token = unk_token or all_tokens[0].form
-            logger.warning(
-                "The pad token is not set. Setting it to the first token in the vocabulary. This is a workaround for models that don't have a pad token."
-            )
-
-    # Replace the vocabulary in the tokenizer with the new vocabulary.
-    backend_tokenizer = replace_vocabulary(backend_tokenizer, all_tokens, unk_token=unk_token, pad_token=pad_token)
-    logger.info(f"Creating embeddings for {len(all_tokens)} tokens")
-    # Convert tokens to IDs
-    token_ids = turn_tokens_into_ids(all_tokens, tokenizer, unk_token)
-
-    # Create the embeddings
+    # Turn all _new_ tokens into ids using the original tokenizer
+    token_ids = turn_tokens_into_ids(all_tokens, original_tokenizer_model)
+
+    # Create the embeddings using the ids from the original tokenizer.
     embeddings = create_embeddings(
         tokenized=token_ids,
         model=model,
         device=device,
-        pad_token_id=tokenizer.get_vocab()[pad_token],
+        pad_token_id=tokenizer_model.pad_token_id or 0,
         pooling=pooling,
     )
 
+    # Maybe apply quantization
     if vocabulary_quantization is not None:
         _, weights = post_process_embeddings(np.asarray(embeddings), None, sif_coefficient=sif_coefficient)
         embeddings, token_mapping, weights = quantize_vocabulary(
@@ -163,7 +152,7 @@ def distill_from_model(
         vectors=embeddings,
         weights=weights,
         token_mapping=token_mapping,
-        tokenizer=backend_tokenizer,
+        tokenizer=tokenizer_model.to_tokenizer(),
         config=config,
         base_model_name=model_name,
         language=language,
@@ -174,13 +163,14 @@ def distill_from_model(
 def _validate_parameters(
     sif_coefficient: float | None,
     token_remove_pattern: str | None,
-) -> tuple[float | None, re.Pattern | None]:
+) -> tuple[float | None, re.Pattern[str] | None]:
     """
     Validate the parameters passed to the distillation function.
 
     :param sif_coefficient: The SIF coefficient to use. If this is None, no weighting is applied.
         Should be a value >= 0 and < 1.0. A value of 1e-4 is a good default.
-    :param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to this regex pattern will be removed from the vocabulary.
+    :param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to
+        this regex pattern will be removed from the vocabulary.
     :return: The SIF coefficient to use.
     :raises: ValueError if the regex can't be compiled.
 
@@ -189,7 +179,7 @@ def _validate_parameters(
         if not 0 < sif_coefficient < 1.0:
             raise ValueError("SIF coefficient must be a value > 0 and < 1.0.")
 
-    token_remove_regex: re.Pattern | None = None
+    token_remove_regex: re.Pattern[str] | None = None
     if token_remove_pattern is not None:
         try:
             token_remove_regex = re.compile(token_remove_pattern)
@@ -215,7 +205,8 @@ def distill(
     Distill a staticmodel from a sentence transformer.
 
     This function creates a set of embeddings from a sentence transformer. It does this by doing either
-    a forward pass for all subword tokens in the tokenizer, or by doing a forward pass for all tokens in a passed vocabulary.
+    a forward pass for all subword tokens in the tokenizer, or by doing a forward pass for all tokens in a passed
+    vocabulary.
 
     If you pass through a vocabulary, we create a custom word tokenizer for that vocabulary.
     If you don't pass a vocabulary, we use the model's tokenizer directly.
@@ -228,10 +219,13 @@ def distill(
         If this is 'auto', we don't reduce dimenionality, but still apply PCA.
     :param sif_coefficient: The SIF coefficient to use. If this is None, no weighting is applied.
         Should be a value >= 0 and < 1.0. A value of 1e-4 is a good default.
-    :param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to this regex pattern will be removed from the vocabulary.
-    :param trust_remote_code: Whether to trust the remote code. If this is False, we will only load components coming from `transformers`. If this is True, we will load all components.
+    :param token_remove_pattern: If this is set to a string, we compile this into a regex. Any tokens that conform to
+        this regex pattern will be removed from the vocabulary.
+    :param trust_remote_code: Whether to trust the remote code. If this is False, we will only load components coming
+        from `transformers`. If this is True, we will load all components.
     :param quantize_to: The data type to quantize to. Can be any of the DType enum members or their string equivalents.
-    :param vocabulary_quantization: The number of clusters to use for vocabulary quantization. If this is None, no quantization is performed.
+    :param vocabulary_quantization: The number of clusters to use for vocabulary quantization. If this is None, no
+        quantization is performed.
     :param pooling: The pooling mode to use for creating embeddings. Can be one of:
         'mean' (default): mean over all tokens. Robust and works well in most cases.
         'last': use the last token's hidden state (often the [EOS] token). Common for decoder-style models.
diff --git a/model2vec/tokenizer/__init__.py b/model2vec/tokenizer/__init__.py
index 4cfe498..7ed0d8e 100644
--- a/model2vec/tokenizer/__init__.py
+++ b/model2vec/tokenizer/__init__.py
@@ -4,9 +4,7 @@
 
 from model2vec.tokenizer.tokenizer import (
     clean_and_create_vocabulary,
-    create_tokenizer,
-    replace_vocabulary,
     turn_tokens_into_ids,
 )
 
-__all__ = ["clean_and_create_vocabulary", "create_tokenizer", "turn_tokens_into_ids", "replace_vocabulary"]
+__all__ = ["clean_and_create_vocabulary", "turn_tokens_into_ids"]
diff --git a/model2vec/tokenizer/datamodels.py b/model2vec/tokenizer/datamodels.py
deleted file mode 100644
index 6aa5ecf..0000000
--- a/model2vec/tokenizer/datamodels.py
+++ /dev/null
@@ -1,14 +0,0 @@
-from dataclasses import dataclass
-
-
-@dataclass
-class Token:
-    """A class to represent a token."""
-
-    form: str
-    # The normalized and pretokenized form of the token
-    normalized_form: str
-    # Whether the word is a continuing subword.
-    is_subword: bool
-    # Whether the token is internal to the model.
-    is_internal: bool
diff --git a/model2vec/tokenizer/model.py b/model2vec/tokenizer/model.py
deleted file mode 100644
index 12dd388..0000000
--- a/model2vec/tokenizer/model.py
+++ /dev/null
@@ -1,43 +0,0 @@
-from __future__ import annotations
-
-from typing import Any
-
-import numpy as np
-
-
-def process_tokenizer(
-    tokenizer_json: dict[str, Any], pre_tokenized_tokens: list[str], unk_token: str | None
-) -> dict[str, Any]:
-    """Process the WordPiece tokenizer JSON."""
-    if tokenizer_json["model"]["type"] == "Unigram":
-        return _process_unigram(tokenizer_json, pre_tokenized_tokens, unk_token)
-    tokenizer_json["model"]["type"] = "Unigram"
-    tokenizer_json["model"]["unk_id"] = pre_tokenized_tokens.index(unk_token) if unk_token else None
-
-    token_weights = np.asarray([_calculate_token_weight_for_unigram(token) for token in pre_tokenized_tokens])
-    proba = (token_weights / np.sum(token_weights)).tolist()
-    tokenizer_json["model"]["vocab"] = [(token, np.log(p)) for token, p in zip(pre_tokenized_tokens, proba)]
-
-    return tokenizer_json
-
-
-def _process_unigram(
-    tokenizer_json: dict[str, Any], pre_tokenized_tokens: list[str], unk_token: str | None
-) -> dict[str, Any]:
-    """Process the Unigram tokenizer JSON."""
-    current_probas = dict(tokenizer_json["model"]["vocab"])
-    avg_proba = sum(current_probas.values()) / len(current_probas)
-    new_probas = [[word, current_probas.get(word, avg_proba)] for word in pre_tokenized_tokens]
-    tokenizer_json["model"]["vocab"] = new_probas
-
-    tokens, _ = zip(*tokenizer_json["model"]["vocab"])
-    if unk_token is not None:
-        tokenizer_json["model"]["unk_id"] = list(tokens).index(unk_token)
-
-    return tokenizer_json
-
-
-def _calculate_token_weight_for_unigram(token: str) -> float:
-    """Calculate the token weight for Unigram."""
-    # Always prefer longer tokens.
-    return len(token) + token.count("▁") + token.count("Ġ")
diff --git a/model2vec/tokenizer/normalizer.py b/model2vec/tokenizer/normalizer.py
deleted file mode 100644
index 15cb11e..0000000
--- a/model2vec/tokenizer/normalizer.py
+++ /dev/null
@@ -1,42 +0,0 @@
-from string import punctuation
-
-from tokenizers import Regex, Tokenizer
-from tokenizers.normalizers import Replace, Sequence, Strip
-
-
-def replace_normalizer(
-    tokenizer: Tokenizer,
-) -> Tokenizer:
-    """
-    Replace the normalizer for the tokenizer.
-
-    The new normalizer will replace punctuation with a space before and after the punctuation.
-    It will also replace multiple spaces with a single space and strip the right side of the string.
-    If the tokenizer already has a normalizer, it will be added to the new normalizer.
-    If the tokenizer does not have a normalizer, a new normalizer will be created.
-
-    :param tokenizer: The tokenizer to change.
-    :return: The tokenizer with a replaced normalizer.
-    """
-    spaces_punctuation = tokenizer.encode("a, ,", add_special_tokens=False).tokens
-    if len(spaces_punctuation) != 3:
-        add_space = False
-    else:
-        _, first_comma, second_comma = spaces_punctuation
-        add_space = first_comma == second_comma == ","
-
-    normalizer = tokenizer.normalizer
-    new_normalizers = []
-    for char in punctuation:
-        replacement = f" {char} " if add_space else f"{char} "
-        new_normalizers.append(Replace(char, replacement))
-
-    new_normalizers.append(Replace(Regex(r"\s+"), " "))
-    new_normalizers.append(Strip(right=True))
-    if normalizer is None:
-        normalizer = Sequence(new_normalizers)  # type: ignore
-    else:
-        normalizer = Sequence([normalizer] + new_normalizers)  # type: ignore
-    tokenizer.normalizer = normalizer  # type: ignore
-
-    return tokenizer
diff --git a/model2vec/tokenizer/pretokenizer.py b/model2vec/tokenizer/pretokenizer.py
deleted file mode 100644
index ac7ee42..0000000
--- a/model2vec/tokenizer/pretokenizer.py
+++ /dev/null
@@ -1,57 +0,0 @@
-from __future__ import annotations
-
-import json
-from typing import Any
-
-from tokenizers import Tokenizer
-
-_FORBIDDEN_PRETOKENIZERS = (
-    "WhiteSpace",
-    "WhitespaceSplit",
-    "BertPreTokenizer",
-    "CharDelimiterSplit",
-    "Punctuation",
-    "Split",
-    "UnicodeScripts",
-)
-_BASIC_METASPACE = {"type": "Metaspace", "replacement": "▁", "prepend_scheme": "always", "split": False}
-
-
-def _fix_single_pretokenizer(pre_tokenizer: dict[str, Any]) -> dict[str, Any] | None:
-    """Fixes a single pretokenizer to allow multiword units."""
-    if pre_tokenizer["type"] in _FORBIDDEN_PRETOKENIZERS:
-        return None
-    if pre_tokenizer["type"] == "ByteLevel":
-        pre_tokenizer["add_prefix_space"] = True
-        pre_tokenizer["use_regex"] = False
-    if pre_tokenizer["type"] == "Metaspace":
-        pre_tokenizer["split"] = False
-        pre_tokenizer["prepend_scheme"] = "always"
-
-    return pre_tokenizer
-
-
-def replace_pretokenizer(tokenizer: Tokenizer) -> Tokenizer:
-    """Fixes a single pretokenizer to allow multiword units."""
-    tokenizer_json = json.loads(tokenizer.to_str())
-    pre_tokenizer_json = tokenizer_json.get("pre_tokenizer", None)
-
-    if pre_tokenizer_json is None:
-        pre_tokenizer_json = _BASIC_METASPACE
-
-    elif pre_tokenizer_json["type"] == "Sequence":
-        new_pretokenizers = []
-        for single_pretokenizer in pre_tokenizer_json["pretokenizers"]:
-            new_pretokenizer = _fix_single_pretokenizer(single_pretokenizer)
-            if new_pretokenizer is not None:
-                new_pretokenizers.append(new_pretokenizer)
-
-        if new_pretokenizers:
-            pre_tokenizer_json["pretokenizers"] = new_pretokenizers
-        else:
-            pre_tokenizer_json = _BASIC_METASPACE
-
-    pre_tokenizer_json = _fix_single_pretokenizer(pre_tokenizer_json) or _BASIC_METASPACE
-    tokenizer_json["pre_tokenizer"] = pre_tokenizer_json
-
-    return tokenizer.from_str(json.dumps(tokenizer_json))
diff --git a/model2vec/tokenizer/tokenizer.py b/model2vec/tokenizer/tokenizer.py
index 24d082a..fbcb804 100644
--- a/model2vec/tokenizer/tokenizer.py
+++ b/model2vec/tokenizer/tokenizer.py
@@ -1,398 +1,98 @@
 from __future__ import annotations
 
-import json
 import logging
 import re
-from typing import Any, cast
 
-from tokenizers import Tokenizer
-from tokenizers.normalizers import Normalizer
-from tokenizers.pre_tokenizers import (
-    PreTokenizer,
-)
-from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
-
-from model2vec.tokenizer.datamodels import Token
-from model2vec.tokenizer.model import process_tokenizer
-from model2vec.tokenizer.normalizer import replace_normalizer
-from model2vec.tokenizer.pretokenizer import replace_pretokenizer
+from skeletoken import TokenizerModel
 
 logger = logging.getLogger(__name__)
 
 
-_DEFAULT_POST_PROCESSOR_TEMPLATE = {
-    "type": "TemplateProcessing",
-    "single": [{"Sequence": {"id": "A", "type_id": 0}}],
-    "pair": [{"Sequence": {"id": "A", "type_id": 0}}, {"Sequence": {"id": "B", "type_id": 0}}],
-    "special_tokens": {},
-}
-
-
-def _remap_added_tokens(
-    special_tokens: list[dict[str, Any]],
-    vocabulary: list[str],
-) -> list[dict[str, Any]]:
-    """
-    Remap special tokens in the tokenizer.
-
-    This function updates the special tokens in the tokenizer based on a mapping provided.
-    It also ensures that the special tokens are present in the vocabulary.
-
-    :param special_tokens: The special tokens to remap.
-    :param vocabulary: The vocabulary as a list of tokens.
-    :return: The updated special tokens.
-    """
-    # Deepcopy
-    special_tokens = [{**x} for x in special_tokens]
-    for token in special_tokens:
-        token["id"] = vocabulary.index(token["content"])
-
-    return special_tokens
-
-
-def replace_vocabulary(
-    tokenizer: Tokenizer, new_vocabulary: list[Token], unk_token: str | None, pad_token: str | None
-) -> Tokenizer:
-    """Replace the vocabulary of a tokenizer with a new one."""
-    tokenizer_json: dict[str, Any] = json.loads(tokenizer.to_str())
-    added_tokens: list[dict[str, Any]] = tokenizer_json["added_tokens"]
-
-    pre_tokenized_tokens = [x.normalized_form for x in new_vocabulary]
-
-    # We need to remove the added tokens but keep [UNK] and [PAD] tokens.
-    added_tokens = _rename_added_token(unk_token, "[UNK]", added_tokens, pre_tokenized_tokens)
-    added_tokens = _rename_added_token(pad_token, "[PAD]", added_tokens, pre_tokenized_tokens)
-
-    # Remove old added tokens from added tokens
-    tokenizer_json["added_tokens"] = [x for x in added_tokens if x["content"] in {"[UNK]", "[PAD]"}]
-    tokenizer_json = process_tokenizer(
-        tokenizer_json, pre_tokenized_tokens, "[UNK]" if "[UNK]" in pre_tokenized_tokens else None
-    )
-
-    # Remap special tokens
-    tokenizer_json["added_tokens"] = _remap_added_tokens(
-        special_tokens=tokenizer_json["added_tokens"],
-        vocabulary=pre_tokenized_tokens,
-    )
-    tokenizer_json["post_processor"] = _DEFAULT_POST_PROCESSOR_TEMPLATE
-
-    return Tokenizer.from_str(json.dumps(tokenizer_json))
-
-
-def _rename_added_token(
-    form: str | None, new_form: str, added_tokens: list[dict[str, Any]], vocabulary: list[str]
-) -> list[dict[str, Any]]:
-    """Rename added tokens in the tokenizer."""
-    if form is None:
-        return added_tokens
-
-    idx = vocabulary.index(form)
-    added_token = [x for x in added_tokens if x["content"] == form]
-    if added_token:
-        added_token[0]["id"] = idx
-        added_token[0]["content"] = new_form
-        vocabulary[idx] = new_form
-
-    return added_tokens
-
-
 def clean_and_create_vocabulary(
-    tokenizer: PreTrainedTokenizerFast,
-    vocabulary: list[str],
-    token_remove_regex: re.Pattern | None,
-) -> tuple[list[Token], Tokenizer]:
+    model: TokenizerModel,
+    vocabulary_to_add: list[str],
+    token_remove_regex: re.Pattern[str] | None,
+) -> TokenizerModel:
     """Cleans a vocabulary by removing duplicates and tokens that were already in the vocabulary."""
     seen_tokens = set()
-    post_normalize_seen_tokens = set()
-    n_empty = 0
-    n_duplicates = 0
-
-    backend_tokenizer = tokenizer.backend_tokenizer
-
-    # Make a base list of tokens.
-    internal_vocab: dict[str, int] = tokenizer.get_vocab()
-    internal_tokens: list[str] = [k for k, _ in sorted(internal_vocab.items(), key=lambda x: x[1])]
 
-    cleaned_vocabulary = _process_internal_tokens(tokenizer, backend_tokenizer, internal_tokens, token_remove_regex)
-    # Copy the backend tokenizer to avoid modifying the original.
-    backend_tokenizer = backend_tokenizer.from_str(backend_tokenizer.to_str())
-    backend_tokenizer = replace_normalizer(backend_tokenizer)
-
-    internal_tokens_set = {token.form for token in cleaned_vocabulary}
-
-    normalizer: Normalizer | None = backend_tokenizer.normalizer
-    for token in vocabulary:
-        if normalizer is not None:
-            token = cast(str, normalizer.normalize_str(token))
-
-        if not token:
+    n_duplicate = 0
+    n_empty = 0
+    n_regex_removed = 0
+
+    internal_tokens: list[str] = model.sorted_vocabulary
+    if token_remove_regex:
+        len_before = len(internal_tokens)
+        tokens_to_remove = [token for token in internal_tokens if token_remove_regex.match(token)]
+        model = model.remove_tokens_from_vocabulary(tokens_to_remove)
+        n_regex_removed = len_before - len(internal_tokens)
+    preprocessor = model.preprocessor
+
+    seen_tokens = set(internal_tokens)
+    tokens_to_add: list[str] = []
+    added_tokens_to_add: list[str] = []
+    for token in vocabulary_to_add:
+        preprocessed = preprocessor.preprocess(token)
+        if len(preprocessed) < 1:
+            logger.warning(f"Token '{token}' was empty after preprocessing.")
             n_empty += 1
             continue
-
-        pre_tokenizer: PreTokenizer | None = backend_tokenizer.pre_tokenizer
-        normalized_token = token
-        if pre_tokenizer is not None:
-            normalized_token = _normalize_vocabulary_token(
-                token=token,
-                pre_tokenizer=pre_tokenizer,
-            )
-
-        # We need to check whether the pretokenized token is in the vocabulary.
-        # But we need to return the original token, because that will be tokenized
-        # again by the tokenizer during featurization.
-        if normalized_token in seen_tokens or normalized_token in internal_tokens_set:
-            n_duplicates += 1
+        if len(preprocessed) > 1:
+            tokens_as_str = [f"'{subword}'" for subword in token]
+            split_into = ",".join(tokens_as_str)
+            logger.warning(f"Token '{token}' was split into multiple tokens after preprocessing: [{split_into}]")
+            added_tokens_to_add.append(token)
             continue
-
-        # Add the possibly pretokenized token to seen
-        seen_tokens.add(normalized_token)
-
-        # After checking the token exists, we need to normalize it into the token
-        # it will become. For byte tokens, this means we don't do anything. For
-        # other types of tokens, we will insert a metaspace.
-        # In the case of multiword tokens, we replace any spaces with the metaspace
-        # or byte prefix token.
-        if not normalized_token.startswith(("▁", "Ġ")):
-            normalized_token = normalized_token.replace(" ", "▁")
-            normalized_token = f"▁{normalized_token}"
-        else:
-            normalized_token = normalized_token.replace(" ", normalized_token[0])
-
-        if normalized_token in post_normalize_seen_tokens:
-            n_duplicates += 1
+        token = preprocessed[0]
+        if token in seen_tokens:
+            logger.warning(f"Token '{token}' was already in the vocabulary.")
+            n_duplicate += 1
             continue
+        if token_remove_regex and token_remove_regex.match(token):
+            logger.warning(f"Token '{token}' was removed due to regex match.")
+            n_regex_removed += 1
+            continue
+        seen_tokens.add(token)
+        tokens_to_add.append(token)
 
-        post_normalize_seen_tokens.add(normalized_token)
-        # Add the original string to the vocabulary.
-        cleaned_vocabulary.append(
-            Token(form=token, normalized_form=normalized_token, is_subword=False, is_internal=False)
-        )
-
-    if n_duplicates:
-        logger.warning(f"Removed {n_duplicates} duplicate tokens.")
-    if n_empty:
-        logger.warning(f"Removed {n_empty} empty tokens.")
-
-    return cleaned_vocabulary, replace_pretokenizer(backend_tokenizer)
-
-
-def _process_internal_tokens(
-    tokenizer: PreTrainedTokenizerFast,
-    backend_tokenizer: Tokenizer,
-    internal_tokens: list[str],
-    token_remove_regex: re.Pattern | None,
-) -> list[Token]:
-    """Clean internal tokens."""
-    # Get the pad and unk token from the tokenizer.
-    pad_token: str | None = tokenizer.special_tokens_map.get("pad_token")  # type: ignore[assignment]
-    unk_token: str | None = tokenizer.special_tokens_map.get("unk_token")  # type: ignore[assignment]
-    # Empty set if no pad or unk token is set.
-    added_tokens_to_keep: set[str] = {x for x in (pad_token, unk_token) if x is not None}
-    added_tokens_to_remove = set(tokenizer.added_tokens_encoder) - added_tokens_to_keep
-    cleaned_internal_tokens: list[Token] = []
-
-    # Figure out whether token is a subword or not.
-    encoded = backend_tokenizer.encode(f" {'a' * 25}", add_special_tokens=False)
-    first_token, second_token, *_ = encoded.tokens
-    # Isolate the prefix. We can't do first_token[0] because we don't know
-    # how long the prefix is.
-    # e.g., "Ġaaaa" -> "Ġ"
-    a_index = None if "a" not in first_token else first_token.index("a")
-    word_prefix = first_token[:a_index]
-    is_byte_prefix = word_prefix == "Ġ"
-    second_token = encoded.tokens[1]
-    # The second token is the first subword token.
-    # If a tokenizer uses subwords, this token will have been prefixed.
-    # We don't know how long the prefix is.
-    a_index = None if "a" not in second_token else second_token.index("a")
-    subword_prefix = second_token[:a_index]
-
-    pre_tokenizer: PreTokenizer | None = backend_tokenizer.pre_tokenizer
-
-    for token in internal_tokens:
-        # Create the token objects. If this returns None, it was unsucessful for some reason.
-        if token_object := _create_single_internal_token(
-            token=token,
-            subword_prefix=subword_prefix,
-            word_prefix=word_prefix,
-            pre_tokenizer=pre_tokenizer,
-            is_byte_prefix=is_byte_prefix,
-            token_remove_regex=token_remove_regex,
-            added_tokens_to_keep=added_tokens_to_keep,
-            added_tokens_to_remove=added_tokens_to_remove,
-        ):
-            cleaned_internal_tokens.append(token_object)
-
-    if len(cleaned_internal_tokens) != len(internal_tokens):
-        logger.info(
-            f"Removed {len(internal_tokens) - len(cleaned_internal_tokens)} internal tokens from the vocabulary."
-        )
-
-    return cleaned_internal_tokens
-
-
-def _create_single_internal_token(
-    token: str,
-    subword_prefix: str,
-    word_prefix: str,
-    pre_tokenizer: PreTokenizer | None,
-    is_byte_prefix: bool,
-    token_remove_regex: re.Pattern | None,
-    added_tokens_to_keep: set[str],
-    added_tokens_to_remove: set[str],
-) -> Token | None:
-    """Create a token object from a string."""
-    if token in added_tokens_to_remove:
-        # We remove any tokens that are added tokens that aren't [UNK] or [PAD].
-        return None
-    if token in added_tokens_to_keep:
-        # Don't put added tokens through the regular motions.
-        return Token(form=token, normalized_form=token, is_subword=False, is_internal=True)
-    if token_remove_regex and token_remove_regex.match(token):
-        # If the regex matches, remove the token.
-        return None
-
-    # A token is a subword if there is a subword prefix and the word
-    # starts with a subword prefix, or if there is a WORD prefix, and the word
-    # does not start with this prefix. For metaspace tokenizers, for example:
-    # "doghouse" -> ["_dog", "house"]
-    # So we can only tell that "house" is a subword by knowing that it is not prefixed
-    # and word-initial tokens are.
-    is_subword = False
-    if subword_prefix:
-        is_subword = bool(token.startswith(subword_prefix))
-    if word_prefix:
-        is_subword = not bool(token.startswith(word_prefix))
-
-    # Byte prefixed tokenizers don't need to be checked.
-    if pre_tokenizer is not None and not is_byte_prefix:
-        # We need to check the thing without prefixes. If we have a word prefix,
-        # we need to check tokens that have are subwords. Other way around for subword
-        # prefixes.
-        if (subword_prefix and not is_subword) or (word_prefix and is_subword):
-            # If this is True, the token is unreachable, even though it is a subword token.
-            if len(pre_tokenizer.pre_tokenize_str(token)) > 1:
-                return None
+    model = model.add_tokens_to_vocabulary(tokens_to_add, preprocess_tokens=True)
+    model = model.add_addedtokens(added_tokens_to_add, is_special=False, single_word=False, normalized=True)
 
-    # Turn a token into a normalized form for later processing.
-    normalized_form = _create_normalized_form(token, subword_prefix, word_prefix, is_byte_prefix, is_subword)
+    n_multiword = len(added_tokens_to_add)
+    _report_statistics(n_multiword, n_duplicate, n_regex_removed, n_empty)
 
-    return Token(form=token, normalized_form=normalized_form, is_subword=is_subword, is_internal=True)
+    return model
 
 
-def _create_normalized_form(
-    token: str, subword_prefix: str, word_prefix: str, is_byte_prefix: bool, is_subword: bool
-) -> str:
-    """Turn an internal token string into a normalized form."""
-    # We don't need to check byte prefixed strings.
-    if is_byte_prefix:
-        return token
-    # We need to check if the token is a subword or not and remove the prefix.
-    if is_subword:
-        return token.removeprefix(subword_prefix)
-    # If the token is not a subword, we need to remove the word prefix, and add metaspace.
-    return f"▁{token.removeprefix(word_prefix)}"
+def _report_statistics(n_multiword: int, n_duplicate: int, n_regex_removed: int, n_empty: int) -> None:
+    """Helper function to avoid increasing complexity in main function."""
+    if n_multiword:
+        logger.info(f"Added {n_multiword} multi-word tokens to the vocabulary.")
+    if n_duplicate:
+        logger.info(f"Removed {n_duplicate} duplicate tokens.")
+    if n_regex_removed:
+        logger.info(f"Removed {n_regex_removed} tokens due to regex match.")
+    if n_empty:
+        logger.info(f"Removed {n_empty} empty tokens.")
 
 
-def turn_tokens_into_ids(
-    tokens: list[Token], tokenizer: PreTrainedTokenizerFast, unk_token: str | None
-) -> list[list[int]]:
+def turn_tokens_into_ids(tokens: list[str], model: TokenizerModel) -> list[list[int]]:
     """
     Convert a list of Token objects to their corresponding token ID sequences.
 
     :param tokens: List of Token objects to convert
-    :param tokenizer: The tokenizer to use for converting tokens to IDs
-    :param unk_token: The string form of the unk token.
+    :param model: The tokenizermodel of the tokenizer.
     :return: List of token IDs corresponding to the input tokens
     """
-    unk_id = None if unk_token is None else tokenizer.convert_tokens_to_ids(unk_token)
-    prefix, suffix = find_eos_bos(tokenizer)
+    prefix, suffix = model.bos_ids or [], model.eos_ids or []
+    vocabulary = model.vocabulary
+    tokenizer = model.to_tokenizer()
 
     token_ids: list[list[int]] = []
     for token in tokens:
-        if token.is_internal:
-            # Careful. Any incorrect tokens will just get `[UNK]``, so this could go horribly wrong
-            # Cast because return type is wrong.
-            token_id: int = cast(int, tokenizer.convert_tokens_to_ids(token.form)) or 0
-            # Explicitly check and warn if `unk_id` appears, but don't crash.
-            if unk_id is not None and token_id == unk_id and token.form != unk_token:
-                logger.warning(f"Token {token.form} was set to unk. This is wrong.")
+        if token_id := vocabulary.get(token):
             token_ids.append([*prefix, token_id, *suffix])
         else:
-            token_ids.append(tokenizer.encode(token.form))
+            token_ids.append(tokenizer.encode(token).ids)
 
     return token_ids
-
-
-def find_eos_bos(tokenizer: PreTrainedTokenizerFast) -> tuple[list[int], list[int]]:
-    """Finds the eos and bos tokens for a tokenizer."""
-    # Little bit complicated, because not all tokenizers have eos and bos tokens.
-    encoding = tokenizer.encode("a", add_special_tokens=True)
-    if len(encoding) != 3:
-        a_encoded = tokenizer.encode("a", add_special_tokens=False)
-        if len(a_encoded) != 1:
-            raise ValueError(
-                f"Error while encoding, couldn't determine eos and bos tokens. The model tokenizes 'a' to '{a_encoded}'"
-            )
-        a_idx = encoding.index(a_encoded[0])
-        prefix, suffix = encoding[:a_idx], encoding[a_idx + 1 :]
-    else:
-        prefix, suffix = encoding[:1], encoding[2:]
-    return prefix, suffix
-
-
-def _normalize_vocabulary_token(token: str, pre_tokenizer: PreTokenizer) -> str:
-    """Normalize a token that is not in the initial token vocabulary."""
-    # Add prefix space for byte tokenizers.
-    prefixed_token = f" {token}"
-    pretokenized_tokens: tuple[str, ...]
-    pretokenized_tokens, offsets = zip(*pre_tokenizer.pre_tokenize_str(prefixed_token))
-    # The first item is always the start of the token.
-    new_token = [pretokenized_tokens[0]]
-    # Loop over the subtokens and offsets.
-    for t, (s, _) in zip(pretokenized_tokens[1:], offsets[1:]):
-        # Do not prefix the token with a space if it starts with a metaspace.
-        if t.startswith("▁"):
-            new_token.append(t)
-        # If the character before the subtoken is a space, we have a
-        # multiword token. e.g., "room for the moon", which is split into
-        # ["room", "for", "the", "moon"].
-        # If it doesn't have a space, it is part of a complex multiword token,
-        # e.g., "chat-gpt", which is split into ["chat", "-", "gpt"].
-        elif prefixed_token[s - 1] == " ":
-            new_token.append(f" {t}")
-        else:
-            new_token.append(t)
-    normalized_token = "".join(new_token)
-
-    return normalized_token
-
-
-def create_tokenizer(
-    tokenizer: PreTrainedTokenizerFast,
-    vocabulary: list[str],
-    token_remove_regex: re.Pattern | None = None,
-) -> PreTrainedTokenizerFast:
-    """
-    Create a tokenizer by adding tokens to the vocabulary.
-
-    This function turns any tokenizer into a supertoken tokenizer. It does the following:
-    1. Turns the tokenizer model into a unigram model.
-    2. Adds a new pretokenizer, splitting on punctuation.
-    3. Adds all tokens in vocabulary to the model.
-    4. Removes any internal tokens that conform to the regex.
-
-    :param tokenizer: The tokenizer to use.
-    :param vocabulary: The vocabulary to use.
-    :param token_remove_regex: The regex to use to remove tokens from the vocabulary.
-    :return: The created tokenizer.
-    """
-    unk_token = cast(str | None, tokenizer.special_tokens_map.get("unk_token"))
-    pad_token = cast(str | None, tokenizer.special_tokens_map.get("pad_token"))
-    cleaned_vocabulary, backend_tokenizer = clean_and_create_vocabulary(tokenizer, vocabulary, token_remove_regex)
-    new_tokenizer = replace_vocabulary(backend_tokenizer, cleaned_vocabulary, unk_token, pad_token)
-
-    tokenizer_object = PreTrainedTokenizerFast(tokenizer_object=new_tokenizer)
-    tokenizer_object.add_special_tokens({"pad_token": "[PAD]", "unk_token": "[UNK]"})
-
-    return tokenizer_object
diff --git a/pyproject.toml b/pyproject.toml
index 812638e..c17f554 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -60,7 +60,7 @@ dev = [
     "ruff",
 ]
 
-distill = ["torch", "transformers", "scikit-learn"]
+distill = ["torch", "transformers", "scikit-learn", "skeletoken>=0.3.0"]
 onnx = ["onnx", "torch"]
 # train also installs inference
 train = ["torch", "lightning", "scikit-learn", "skops"]
diff --git a/tests/conftest.py b/tests/conftest.py
index dae9b1c..676392a 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -5,6 +5,7 @@
 import numpy as np
 import pytest
 import torch
+from skeletoken import TokenizerModel
 from tokenizers import Tokenizer
 from tokenizers.models import BPE, Unigram, WordPiece
 from tokenizers.pre_tokenizers import Whitespace
@@ -54,6 +55,12 @@ def mock_berttokenizer() -> PreTrainedTokenizerFast:
     return cast(PreTrainedTokenizerFast, AutoTokenizer.from_pretrained("tests/data/test_tokenizer"))
 
 
+@pytest.fixture(scope="function")
+def mock_tokenizermodel() -> TokenizerModel:
+    """Loads the tokenizer as a TokenizerModel."""
+    return TokenizerModel.from_pretrained("tests/data/test_tokenizer")
+
+
 @pytest.fixture
 def mock_transformer() -> PreTrainedModel:
     """Create a mock transformer model."""
diff --git a/tests/test_distillation.py b/tests/test_distillation.py
index 3c50ae6..4a9fc30 100644
--- a/tests/test_distillation.py
+++ b/tests/test_distillation.py
@@ -8,6 +8,8 @@
 import numpy as np
 import pytest
 from pytest import LogCaptureFixture
+from skeletoken import TokenizerModel
+from transformers import BertTokenizerFast
 from transformers.modeling_utils import PreTrainedModel
 from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
 
@@ -78,6 +80,28 @@ def test_distill_from_model(
     assert static_model.base_model_name == static_model2.base_model_name
 
 
+@patch.object(import_module("model2vec.distill.distillation"), "model_info")
+@patch("transformers.AutoModel.from_pretrained")
+def test_distill_removal_pattern_all_tokens(
+    mock_auto_model: MagicMock,
+    mock_model_info: MagicMock,
+    mock_berttokenizer: BertTokenizerFast,
+    mock_transformer: PreTrainedModel,
+) -> None:
+    """Test the removal pattern."""
+    mock_model_info.return_value = type("ModelInfo", (object,), {"cardData": {"language": "en"}})
+    mock_auto_model.return_value = mock_transformer
+
+    with pytest.raises(ValueError):
+        distill_from_model(
+            model=mock_transformer,
+            tokenizer=mock_berttokenizer,
+            vocabulary=None,
+            device="cpu",
+            token_remove_pattern=r".*",
+        )
+
+
 @patch.object(import_module("model2vec.distill.distillation"), "model_info")
 @patch("transformers.AutoModel.from_pretrained")
 def test_distill_removal_pattern(
@@ -90,8 +114,7 @@ def test_distill_removal_pattern(
     mock_model_info.return_value = type("ModelInfo", (object,), {"cardData": {"language": "en"}})
     mock_auto_model.return_value = mock_transformer
 
-    # The vocab size is 30522, but we remove 998 tokens: [CLS], [SEP], and [MASK], and all [unused] tokens.
-    expected_vocab_size = mock_berttokenizer.vocab_size - 998
+    expected_vocab_size = mock_berttokenizer.vocab_size
 
     static_model = distill_from_model(
         model=mock_transformer,
@@ -112,6 +135,16 @@ def test_distill_removal_pattern(
     )
     assert len(static_model.embedding) == expected_vocab_size
 
+    # Test whether regexes remove words from the vocabulary
+    static_model = distill_from_model(
+        model=mock_transformer,
+        tokenizer=mock_berttokenizer,
+        vocabulary=["hellooooooo"],
+        device="cpu",
+        token_remove_pattern="hellooooooo",
+    )
+    assert "hellooooooo" not in static_model.tokens
+
     # Weird pattern.
     with pytest.raises(ValueError):
         _ = distill_from_model(
@@ -126,14 +159,14 @@ def test_distill_removal_pattern(
 @pytest.mark.parametrize(
     "vocabulary, pca_dims, sif_coefficient, expected_shape",
     [
-        (None, 256, None, (29524, 256)),  # PCA applied, SIF off
-        (None, "auto", None, (29524, 768)),  # PCA 'auto', SIF off
-        (None, "auto", 1e-4, (29524, 768)),  # PCA 'auto', SIF on
+        (None, 256, None, (30522, 256)),  # PCA applied, SIF off
+        (None, "auto", None, (30522, 768)),  # PCA 'auto', SIF off
+        (None, "auto", 1e-4, (30522, 768)),  # PCA 'auto', SIF on
         (None, "auto", 0, None),  # invalid SIF (too low) -> raises
         (None, "auto", 1, None),  # invalid SIF (too high) -> raises
-        (None, 1024, None, (29524, 768)),  # PCA set high (no reduction)
-        (["wordA", "wordB"], 4, None, (29526, 4)),  # Custom vocab, PCA applied
-        (None, None, None, (29524, 768)),  # No PCA, SIF off
+        (None, 1024, None, (30522, 768)),  # PCA set high (no reduction)
+        (["wordA", "wordB"], 4, None, (30524, 4)),  # Custom vocab, PCA applied
+        (None, None, None, (30522, 768)),  # No PCA, SIF off
     ],
 )
 @patch.object(import_module("model2vec.distill.distillation"), "model_info")
@@ -161,6 +194,7 @@ def test_distill(
                 device="cpu",
                 pca_dims=pca_dims,
                 sif_coefficient=sif_coefficient,
+                token_remove_pattern=None,
             )
     else:
         static_model = distill(
@@ -169,6 +203,7 @@ def test_distill(
             device="cpu",
             pca_dims=pca_dims,
             sif_coefficient=sif_coefficient,
+            token_remove_pattern=None,
         )
         assert isinstance(static_model, StaticModel)
         assert static_model.embedding.shape == expected_shape
@@ -231,15 +266,16 @@ def test__post_process_embeddings(
     "added_tokens, expected_output, expected_warnings",
     [
         # Case: duplicates ("2010", "government") and an empty token ("")
-        (["2010", "government", "nerv", ""], ["nerv"], ["Removed", "duplicate", "empty"]),
+        (["2010", "government", "nerv", ""], ["nerv"], ["already", "empty"]),
         # Case: No duplicates, no empty tokens
         (["worda", "wordb", "wordc"], ["worda", "wordb", "wordc"], []),
         # Case: Only empty token (""), should return an empty list
-        ([""], [], ["Removed", "empty"]),
+        ([""], [], ["empty"]),
+        (["multi word token"], ["multi word token"], []),
     ],
 )
 def test_clean_and_create_vocabulary(
-    mock_berttokenizer: PreTrainedTokenizerFast,
+    mock_tokenizermodel: TokenizerModel,
     added_tokens: list[str],
     expected_output: list[str],
     expected_warnings: list[str],
@@ -247,11 +283,12 @@ def test_clean_and_create_vocabulary(
 ) -> None:
     """Test the clean_and_create_vocabulary helper."""
     with caplog.at_level("WARNING"):
-        tokens, _ = clean_and_create_vocabulary(mock_berttokenizer, added_tokens, None)
+        old_tokens = mock_tokenizermodel.sorted_vocabulary
+        tokenizer_model = clean_and_create_vocabulary(mock_tokenizermodel, added_tokens, None)
+        tokens = set(tokenizer_model.sorted_vocabulary) - set(old_tokens)
 
-        cleaned_vocab = [token.form for token in tokens if not token.is_internal]
         # Check the cleaned vocabulary matches the expected output
-        assert cleaned_vocab == expected_output
+        assert tokens == set(expected_output)
 
         # Check the warnings were logged as expected
         logged_warnings = [record.message for record in caplog.records]
@@ -268,9 +305,11 @@ def test_clean_and_create_vocabulary(
         (PoolingMode.POOLER, True, [7.0, 7.0]),  # pooler_output used
     ],
 )
-def test_pooling_strategies(mock_transformer, pooling, with_pooler, expected_rows) -> None:
+def test_pooling_strategies(
+    mock_transformer: PreTrainedModel, pooling: PoolingMode, with_pooler: bool, expected_rows: tuple[float, float]
+) -> None:
     """Test different pooling strategies."""
-    mock_transformer.with_pooler = with_pooler
+    mock_transformer.with_pooler = with_pooler  # type: ignore
     tokenized = [[10, 11, 12], [20]]
     out = create_embeddings(
         model=mock_transformer,
@@ -284,9 +323,9 @@ def test_pooling_strategies(mock_transformer, pooling, with_pooler, expected_row
     assert np.allclose(out, expected, rtol=1e-6, atol=0.0)
 
 
-def test_pooler_raises_without_pooler_output(mock_transformer) -> None:
+def test_pooler_raises_without_pooler_output(mock_transformer: PreTrainedModel) -> None:
     """POOLER should raise when the model doesn't expose pooler_output."""
-    mock_transformer.with_pooler = False
+    mock_transformer.with_pooler = False  # type: ignore
     tokenized = [[10, 11, 12], [20]]
     with pytest.raises(ValueError, match="pooler_output"):
         _ = create_embeddings(
diff --git a/tests/test_tokenizer.py b/tests/test_tokenizer.py
deleted file mode 100644
index e1267df..0000000
--- a/tests/test_tokenizer.py
+++ /dev/null
@@ -1,123 +0,0 @@
-import json
-
-import pytest
-from transformers.tokenization_utils_fast import PreTrainedTokenizerFast
-
-from model2vec.tokenizer.model import _calculate_token_weight_for_unigram, _process_unigram, process_tokenizer
-from model2vec.tokenizer.normalizer import replace_normalizer
-from model2vec.tokenizer.pretokenizer import _FORBIDDEN_PRETOKENIZERS, _fix_single_pretokenizer, replace_pretokenizer
-from model2vec.tokenizer.tokenizer import _rename_added_token, create_tokenizer
-
-
-def test_fix_single_pretokenizer() -> None:
-    """Test the _fix_single_pretokenizer function."""
-    result = _fix_single_pretokenizer({"type": "ByteLevel", "add_prefix_space": False, "use_regex": True})
-    assert result == {"type": "ByteLevel", "add_prefix_space": True, "use_regex": False}
-
-    for tokenizer_type in _FORBIDDEN_PRETOKENIZERS:
-        result = _fix_single_pretokenizer({"type": tokenizer_type})
-        assert result is None
-
-    result = _fix_single_pretokenizer(
-        {"type": "Metaspace", "split": True, "prepend_scheme": "never", "replacement": "▁"}
-    )
-    assert result == {"type": "Metaspace", "replacement": "▁", "prepend_scheme": "always", "split": False}
-
-
-def test_replace_pretokenizer(mock_berttokenizer: PreTrainedTokenizerFast) -> None:
-    """Test the replace_pretokenizer function."""
-    tokenizer = replace_pretokenizer(mock_berttokenizer.backend_tokenizer)
-    assert tokenizer.pre_tokenizer is not None
-    assert tokenizer.pre_tokenizer.__class__.__name__ == "Metaspace"
-    assert tokenizer.pre_tokenizer.replacement == "▁"
-    assert tokenizer.pre_tokenizer.prepend_scheme == "always"
-    assert not tokenizer.pre_tokenizer.split
-
-    tokenizer.pre_tokenizer = None  # type: ignore
-    tokenizer = replace_pretokenizer(tokenizer)
-    assert tokenizer.pre_tokenizer is not None
-    assert tokenizer.pre_tokenizer.__class__.__name__ == "Metaspace"
-    assert tokenizer.pre_tokenizer.replacement == "▁"
-    assert tokenizer.pre_tokenizer.prepend_scheme == "always"
-    assert tokenizer.pre_tokenizer.split is False
-
-
-def test_replace_normalizer(mock_berttokenizer: PreTrainedTokenizerFast) -> None:
-    """Test the replace_normalizer function."""
-    tokenizer = replace_normalizer(mock_berttokenizer.backend_tokenizer)
-    assert tokenizer.normalizer is not None
-    assert tokenizer.normalizer.__class__.__name__ == "Sequence"
-
-    assert tokenizer.normalizer.normalize_str("Hello, World!") == "hello , world !"
-
-    tokenizer.normalizer = None  # type: ignore
-    tokenizer = replace_normalizer(tokenizer)
-    assert tokenizer.normalizer.normalize_str("Hello, World!") == "Hello , World !"
-
-
-@pytest.mark.parametrize(
-    "word,weight",
-    [
-        ("dog", 3),
-        ("cat", 3),
-        ("▁longer▁word", 14),
-        ("▁word", 6),
-        ("▁", 2),  # Single underscore
-        ("", 0),  # Empty string
-        ("▁a" * 100, 300),  # Long word with underscores
-    ],
-)
-def test_calculate_token_weight_for_unigram(word: str, weight: int) -> None:
-    """Test the _calculate_token_weight_for_unigram function."""
-    assert _calculate_token_weight_for_unigram(word) == weight
-
-
-def test_process_tokenizer(mock_berttokenizer: PreTrainedTokenizerFast) -> None:
-    """Test the process_tokenizer function."""
-    vocab = ["dog", "cat", "longer_word", "word", "a" * 100, "[UNK]"]
-    tokenizer_json = json.loads(mock_berttokenizer.backend_tokenizer.to_str())
-    tokenizer_json = process_tokenizer(tokenizer_json=tokenizer_json, pre_tokenized_tokens=vocab, unk_token="[UNK]")
-
-    assert tokenizer_json["model"]["type"] == "Unigram"
-    assert tokenizer_json["model"]["unk_id"] == 5  # Index of "[UNK]"
-    assert len(tokenizer_json["model"]["vocab"]) == 6
-    assert all(isinstance(token, tuple) and len(token) == 2 for token in tokenizer_json["model"]["vocab"])
-    for (x, _), y in zip(tokenizer_json["model"]["vocab"], vocab):
-        assert x == y, f"Expected {y}, but got {x}"
-
-
-def test_process_unigram() -> None:
-    """Test the _process_unigram function."""
-    vocab = ["dog", "cat", "longer_word", "word", "a" * 100, "[UNK]"]
-    orig_vocab = [("dog", 0), ("cat", 0)]
-    model = {"model": {"type": "Unigram", "vocab": orig_vocab}}
-    processed_model = _process_unigram(model, vocab, "[UNK]")
-    assert processed_model["model"]["type"] == "Unigram"
-    assert processed_model["model"]["unk_id"] == 5  # Index of "[UNK]"
-    assert len(processed_model["model"]["vocab"]) == 6
-    assert all(isinstance(token, list) and len(token) == 2 for token in processed_model["model"]["vocab"])
-
-    for (x, score), y in zip(processed_model["model"]["vocab"], vocab):
-        assert x == y, f"Expected {y}, but got {x}"
-        if x in orig_vocab:
-            assert score == 0
-
-    assert process_tokenizer(model, vocab, "[UNK]") == processed_model
-
-
-def test_rename_added_token() -> None:
-    """Test the _rename_added_token function."""
-    # Invalid input
-    result = _rename_added_token(None, "a", [{"content": "a", "id": 0}], ["a"])
-    assert result == [{"content": "a", "id": 0}]
-
-    # Rename 'a' to 'c'
-    result = _rename_added_token("a", "c", [{"content": "a"}], ["a"])
-    assert result == [{"content": "c", "id": 0}]
-
-
-def test_create_tokenizer(mock_berttokenizer: PreTrainedTokenizerFast) -> None:
-    """Test the create_tokenizer function."""
-    tokenizer = create_tokenizer(tokenizer=mock_berttokenizer, vocabulary=["dog", "catssssss"], token_remove_regex=None)
-    assert tokenizer.backend_tokenizer.get_vocab_size() == 29525
-    assert tokenizer.encode("catssssss") == [29524]
diff --git a/tests/test_utils.py b/tests/test_utils.py
index 7ae331c..e5f6187 100644
--- a/tests/test_utils.py
+++ b/tests/test_utils.py
@@ -76,7 +76,7 @@ def test_importable() -> None:
 def test_get_package_extras() -> None:
     """Test package extras."""
     extras = set(get_package_extras("model2vec", "distill"))
-    assert extras == {"torch", "transformers", "scikit-learn"}
+    assert extras == {"skeletoken", "torch", "transformers", "scikit-learn"}
 
 
 def test_get_package_extras_empty() -> None:
diff --git a/uv.lock b/uv.lock
index 231ed24..8c0668f 100644
--- a/uv.lock
+++ b/uv.lock
@@ -116,6 +116,15 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/fb/76/641ae371508676492379f16e2fa48f4e2c11741bd63c48be4b12a6b09cba/aiosignal-1.4.0-py3-none-any.whl", hash = "sha256:053243f8b92b990551949e63930a839ff0cf0b0ebbe0597b0f3fb19e1a0fe82e", size = 7490, upload-time = "2025-07-03T22:54:42.156Z" },
 ]
 
+[[package]]
+name = "annotated-types"
+version = "0.7.0"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/ee/67/531ea369ba64dcff5ec9c3402f9f51bf748cec26dde048a2f973a4eea7f5/annotated_types-0.7.0.tar.gz", hash = "sha256:aff07c09a53a08bc8cfccb9c85b05f1aa9a2a6f23728d790723543408344ce89", size = 16081, upload-time = "2024-05-20T21:33:25.928Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/78/b6/6307fbef88d9b5ee7421e68d78a9f162e0da4900bc5f5793f6d3d0e34fb8/annotated_types-0.7.0-py3-none-any.whl", hash = "sha256:1f02e8b43a8fbbc3f3e0d4f0f4bfc8131bcb4eebe8849b8e5c773f3a1c582a53", size = 13643, upload-time = "2024-05-20T21:33:24.1Z" },
+]
+
 [[package]]
 name = "asttokens"
 version = "3.0.0"
@@ -879,6 +888,7 @@ dev = [
 ]
 distill = [
     { name = "scikit-learn" },
+    { name = "skeletoken" },
     { name = "torch" },
     { name = "transformers" },
 ]
@@ -924,6 +934,7 @@ requires-dist = [
     { name = "scikit-learn", marker = "extra == 'quantization'" },
     { name = "scikit-learn", marker = "extra == 'train'" },
     { name = "setuptools" },
+    { name = "skeletoken", marker = "extra == 'distill'", specifier = ">=0.3.0" },
     { name = "skops", marker = "extra == 'inference'" },
     { name = "skops", marker = "extra == 'train'" },
     { name = "tokenizers", specifier = ">=0.20" },
@@ -1674,6 +1685,139 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/8e/37/efad0257dc6e593a18957422533ff0f87ede7c9c6ea010a2177d738fb82f/pure_eval-0.2.3-py3-none-any.whl", hash = "sha256:1db8e35b67b3d218d818ae653e27f06c3aa420901fa7b081ca98cbedc874e0d0", size = 11842, upload-time = "2024-07-21T12:58:20.04Z" },
 ]
 
+[[package]]
+name = "pydantic"
+version = "2.12.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "annotated-types" },
+    { name = "pydantic-core" },
+    { name = "typing-extensions" },
+    { name = "typing-inspection" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/69/44/36f1a6e523abc58ae5f928898e4aca2e0ea509b5aa6f6f392a5d882be928/pydantic-2.12.5.tar.gz", hash = "sha256:4d351024c75c0f085a9febbb665ce8c0c6ec5d30e903bdb6394b7ede26aebb49", size = 821591, upload-time = "2025-11-26T15:11:46.471Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/5a/87/b70ad306ebb6f9b585f114d0ac2137d792b48be34d732d60e597c2f8465a/pydantic-2.12.5-py3-none-any.whl", hash = "sha256:e561593fccf61e8a20fc46dfc2dfe075b8be7d0188df33f221ad1f0139180f9d", size = 463580, upload-time = "2025-11-26T15:11:44.605Z" },
+]
+
+[[package]]
+name = "pydantic-core"
+version = "2.41.5"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/71/70/23b021c950c2addd24ec408e9ab05d59b035b39d97cdc1130e1bce647bb6/pydantic_core-2.41.5.tar.gz", hash = "sha256:08daa51ea16ad373ffd5e7606252cc32f07bc72b28284b6bc9c6df804816476e", size = 460952, upload-time = "2025-11-04T13:43:49.098Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/c6/90/32c9941e728d564b411d574d8ee0cf09b12ec978cb22b294995bae5549a5/pydantic_core-2.41.5-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:77b63866ca88d804225eaa4af3e664c5faf3568cea95360d21f4725ab6e07146", size = 2107298, upload-time = "2025-11-04T13:39:04.116Z" },
+    { url = "https://files.pythonhosted.org/packages/fb/a8/61c96a77fe28993d9a6fb0f4127e05430a267b235a124545d79fea46dd65/pydantic_core-2.41.5-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:dfa8a0c812ac681395907e71e1274819dec685fec28273a28905df579ef137e2", size = 1901475, upload-time = "2025-11-04T13:39:06.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5d/b6/338abf60225acc18cdc08b4faef592d0310923d19a87fba1faf05af5346e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5921a4d3ca3aee735d9fd163808f5e8dd6c6972101e4adbda9a4667908849b97", size = 1918815, upload-time = "2025-11-04T13:39:10.41Z" },
+    { url = "https://files.pythonhosted.org/packages/d1/1c/2ed0433e682983d8e8cba9c8d8ef274d4791ec6a6f24c58935b90e780e0a/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e25c479382d26a2a41b7ebea1043564a937db462816ea07afa8a44c0866d52f9", size = 2065567, upload-time = "2025-11-04T13:39:12.244Z" },
+    { url = "https://files.pythonhosted.org/packages/b3/24/cf84974ee7d6eae06b9e63289b7b8f6549d416b5c199ca2d7ce13bbcf619/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f547144f2966e1e16ae626d8ce72b4cfa0caedc7fa28052001c94fb2fcaa1c52", size = 2230442, upload-time = "2025-11-04T13:39:13.962Z" },
+    { url = "https://files.pythonhosted.org/packages/fd/21/4e287865504b3edc0136c89c9c09431be326168b1eb7841911cbc877a995/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:6f52298fbd394f9ed112d56f3d11aabd0d5bd27beb3084cc3d8ad069483b8941", size = 2350956, upload-time = "2025-11-04T13:39:15.889Z" },
+    { url = "https://files.pythonhosted.org/packages/a8/76/7727ef2ffa4b62fcab916686a68a0426b9b790139720e1934e8ba797e238/pydantic_core-2.41.5-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:100baa204bb412b74fe285fb0f3a385256dad1d1879f0a5cb1499ed2e83d132a", size = 2068253, upload-time = "2025-11-04T13:39:17.403Z" },
+    { url = "https://files.pythonhosted.org/packages/d5/8c/a4abfc79604bcb4c748e18975c44f94f756f08fb04218d5cb87eb0d3a63e/pydantic_core-2.41.5-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:05a2c8852530ad2812cb7914dc61a1125dc4e06252ee98e5638a12da6cc6fb6c", size = 2177050, upload-time = "2025-11-04T13:39:19.351Z" },
+    { url = "https://files.pythonhosted.org/packages/67/b1/de2e9a9a79b480f9cb0b6e8b6ba4c50b18d4e89852426364c66aa82bb7b3/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:29452c56df2ed968d18d7e21f4ab0ac55e71dc59524872f6fc57dcf4a3249ed2", size = 2147178, upload-time = "2025-11-04T13:39:21Z" },
+    { url = "https://files.pythonhosted.org/packages/16/c1/dfb33f837a47b20417500efaa0378adc6635b3c79e8369ff7a03c494b4ac/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_armv7l.whl", hash = "sha256:d5160812ea7a8a2ffbe233d8da666880cad0cbaf5d4de74ae15c313213d62556", size = 2341833, upload-time = "2025-11-04T13:39:22.606Z" },
+    { url = "https://files.pythonhosted.org/packages/47/36/00f398642a0f4b815a9a558c4f1dca1b4020a7d49562807d7bc9ff279a6c/pydantic_core-2.41.5-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:df3959765b553b9440adfd3c795617c352154e497a4eaf3752555cfb5da8fc49", size = 2321156, upload-time = "2025-11-04T13:39:25.843Z" },
+    { url = "https://files.pythonhosted.org/packages/7e/70/cad3acd89fde2010807354d978725ae111ddf6d0ea46d1ea1775b5c1bd0c/pydantic_core-2.41.5-cp310-cp310-win32.whl", hash = "sha256:1f8d33a7f4d5a7889e60dc39856d76d09333d8a6ed0f5f1190635cbec70ec4ba", size = 1989378, upload-time = "2025-11-04T13:39:27.92Z" },
+    { url = "https://files.pythonhosted.org/packages/76/92/d338652464c6c367e5608e4488201702cd1cbb0f33f7b6a85a60fe5f3720/pydantic_core-2.41.5-cp310-cp310-win_amd64.whl", hash = "sha256:62de39db01b8d593e45871af2af9e497295db8d73b085f6bfd0b18c83c70a8f9", size = 2013622, upload-time = "2025-11-04T13:39:29.848Z" },
+    { url = "https://files.pythonhosted.org/packages/e8/72/74a989dd9f2084b3d9530b0915fdda64ac48831c30dbf7c72a41a5232db8/pydantic_core-2.41.5-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:a3a52f6156e73e7ccb0f8cced536adccb7042be67cb45f9562e12b319c119da6", size = 2105873, upload-time = "2025-11-04T13:39:31.373Z" },
+    { url = "https://files.pythonhosted.org/packages/12/44/37e403fd9455708b3b942949e1d7febc02167662bf1a7da5b78ee1ea2842/pydantic_core-2.41.5-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7f3bf998340c6d4b0c9a2f02d6a400e51f123b59565d74dc60d252ce888c260b", size = 1899826, upload-time = "2025-11-04T13:39:32.897Z" },
+    { url = "https://files.pythonhosted.org/packages/33/7f/1d5cab3ccf44c1935a359d51a8a2a9e1a654b744b5e7f80d41b88d501eec/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:378bec5c66998815d224c9ca994f1e14c0c21cb95d2f52b6021cc0b2a58f2a5a", size = 1917869, upload-time = "2025-11-04T13:39:34.469Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/6a/30d94a9674a7fe4f4744052ed6c5e083424510be1e93da5bc47569d11810/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:e7b576130c69225432866fe2f4a469a85a54ade141d96fd396dffcf607b558f8", size = 2063890, upload-time = "2025-11-04T13:39:36.053Z" },
+    { url = "https://files.pythonhosted.org/packages/50/be/76e5d46203fcb2750e542f32e6c371ffa9b8ad17364cf94bb0818dbfb50c/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:6cb58b9c66f7e4179a2d5e0f849c48eff5c1fca560994d6eb6543abf955a149e", size = 2229740, upload-time = "2025-11-04T13:39:37.753Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/ee/fed784df0144793489f87db310a6bbf8118d7b630ed07aa180d6067e653a/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:88942d3a3dff3afc8288c21e565e476fc278902ae4d6d134f1eeda118cc830b1", size = 2350021, upload-time = "2025-11-04T13:39:40.94Z" },
+    { url = "https://files.pythonhosted.org/packages/c8/be/8fed28dd0a180dca19e72c233cbf58efa36df055e5b9d90d64fd1740b828/pydantic_core-2.41.5-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f31d95a179f8d64d90f6831d71fa93290893a33148d890ba15de25642c5d075b", size = 2066378, upload-time = "2025-11-04T13:39:42.523Z" },
+    { url = "https://files.pythonhosted.org/packages/b0/3b/698cf8ae1d536a010e05121b4958b1257f0b5522085e335360e53a6b1c8b/pydantic_core-2.41.5-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:c1df3d34aced70add6f867a8cf413e299177e0c22660cc767218373d0779487b", size = 2175761, upload-time = "2025-11-04T13:39:44.553Z" },
+    { url = "https://files.pythonhosted.org/packages/b8/ba/15d537423939553116dea94ce02f9c31be0fa9d0b806d427e0308ec17145/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:4009935984bd36bd2c774e13f9a09563ce8de4abaa7226f5108262fa3e637284", size = 2146303, upload-time = "2025-11-04T13:39:46.238Z" },
+    { url = "https://files.pythonhosted.org/packages/58/7f/0de669bf37d206723795f9c90c82966726a2ab06c336deba4735b55af431/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_armv7l.whl", hash = "sha256:34a64bc3441dc1213096a20fe27e8e128bd3ff89921706e83c0b1ac971276594", size = 2340355, upload-time = "2025-11-04T13:39:48.002Z" },
+    { url = "https://files.pythonhosted.org/packages/e5/de/e7482c435b83d7e3c3ee5ee4451f6e8973cff0eb6007d2872ce6383f6398/pydantic_core-2.41.5-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:c9e19dd6e28fdcaa5a1de679aec4141f691023916427ef9bae8584f9c2fb3b0e", size = 2319875, upload-time = "2025-11-04T13:39:49.705Z" },
+    { url = "https://files.pythonhosted.org/packages/fe/e6/8c9e81bb6dd7560e33b9053351c29f30c8194b72f2d6932888581f503482/pydantic_core-2.41.5-cp311-cp311-win32.whl", hash = "sha256:2c010c6ded393148374c0f6f0bf89d206bf3217f201faa0635dcd56bd1520f6b", size = 1987549, upload-time = "2025-11-04T13:39:51.842Z" },
+    { url = "https://files.pythonhosted.org/packages/11/66/f14d1d978ea94d1bc21fc98fcf570f9542fe55bfcc40269d4e1a21c19bf7/pydantic_core-2.41.5-cp311-cp311-win_amd64.whl", hash = "sha256:76ee27c6e9c7f16f47db7a94157112a2f3a00e958bc626e2f4ee8bec5c328fbe", size = 2011305, upload-time = "2025-11-04T13:39:53.485Z" },
+    { url = "https://files.pythonhosted.org/packages/56/d8/0e271434e8efd03186c5386671328154ee349ff0354d83c74f5caaf096ed/pydantic_core-2.41.5-cp311-cp311-win_arm64.whl", hash = "sha256:4bc36bbc0b7584de96561184ad7f012478987882ebf9f9c389b23f432ea3d90f", size = 1972902, upload-time = "2025-11-04T13:39:56.488Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/5d/5f6c63eebb5afee93bcaae4ce9a898f3373ca23df3ccaef086d0233a35a7/pydantic_core-2.41.5-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f41a7489d32336dbf2199c8c0a215390a751c5b014c2c1c5366e817202e9cdf7", size = 2110990, upload-time = "2025-11-04T13:39:58.079Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/32/9c2e8ccb57c01111e0fd091f236c7b371c1bccea0fa85247ac55b1e2b6b6/pydantic_core-2.41.5-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:070259a8818988b9a84a449a2a7337c7f430a22acc0859c6b110aa7212a6d9c0", size = 1896003, upload-time = "2025-11-04T13:39:59.956Z" },
+    { url = "https://files.pythonhosted.org/packages/68/b8/a01b53cb0e59139fbc9e4fda3e9724ede8de279097179be4ff31f1abb65a/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e96cea19e34778f8d59fe40775a7a574d95816eb150850a85a7a4c8f4b94ac69", size = 1919200, upload-time = "2025-11-04T13:40:02.241Z" },
+    { url = "https://files.pythonhosted.org/packages/38/de/8c36b5198a29bdaade07b5985e80a233a5ac27137846f3bc2d3b40a47360/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed2e99c456e3fadd05c991f8f437ef902e00eedf34320ba2b0842bd1c3ca3a75", size = 2052578, upload-time = "2025-11-04T13:40:04.401Z" },
+    { url = "https://files.pythonhosted.org/packages/00/b5/0e8e4b5b081eac6cb3dbb7e60a65907549a1ce035a724368c330112adfdd/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:65840751b72fbfd82c3c640cff9284545342a4f1eb1586ad0636955b261b0b05", size = 2208504, upload-time = "2025-11-04T13:40:06.072Z" },
+    { url = "https://files.pythonhosted.org/packages/77/56/87a61aad59c7c5b9dc8caad5a41a5545cba3810c3e828708b3d7404f6cef/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:e536c98a7626a98feb2d3eaf75944ef6f3dbee447e1f841eae16f2f0a72d8ddc", size = 2335816, upload-time = "2025-11-04T13:40:07.835Z" },
+    { url = "https://files.pythonhosted.org/packages/0d/76/941cc9f73529988688a665a5c0ecff1112b3d95ab48f81db5f7606f522d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eceb81a8d74f9267ef4081e246ffd6d129da5d87e37a77c9bde550cb04870c1c", size = 2075366, upload-time = "2025-11-04T13:40:09.804Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/43/ebef01f69baa07a482844faaa0a591bad1ef129253ffd0cdaa9d8a7f72d3/pydantic_core-2.41.5-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d38548150c39b74aeeb0ce8ee1d8e82696f4a4e16ddc6de7b1d8823f7de4b9b5", size = 2171698, upload-time = "2025-11-04T13:40:12.004Z" },
+    { url = "https://files.pythonhosted.org/packages/b1/87/41f3202e4193e3bacfc2c065fab7706ebe81af46a83d3e27605029c1f5a6/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:c23e27686783f60290e36827f9c626e63154b82b116d7fe9adba1fda36da706c", size = 2132603, upload-time = "2025-11-04T13:40:13.868Z" },
+    { url = "https://files.pythonhosted.org/packages/49/7d/4c00df99cb12070b6bccdef4a195255e6020a550d572768d92cc54dba91a/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_armv7l.whl", hash = "sha256:482c982f814460eabe1d3bb0adfdc583387bd4691ef00b90575ca0d2b6fe2294", size = 2329591, upload-time = "2025-11-04T13:40:15.672Z" },
+    { url = "https://files.pythonhosted.org/packages/cc/6a/ebf4b1d65d458f3cda6a7335d141305dfa19bdc61140a884d165a8a1bbc7/pydantic_core-2.41.5-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:bfea2a5f0b4d8d43adf9d7b8bf019fb46fdd10a2e5cde477fbcb9d1fa08c68e1", size = 2319068, upload-time = "2025-11-04T13:40:17.532Z" },
+    { url = "https://files.pythonhosted.org/packages/49/3b/774f2b5cd4192d5ab75870ce4381fd89cf218af999515baf07e7206753f0/pydantic_core-2.41.5-cp312-cp312-win32.whl", hash = "sha256:b74557b16e390ec12dca509bce9264c3bbd128f8a2c376eaa68003d7f327276d", size = 1985908, upload-time = "2025-11-04T13:40:19.309Z" },
+    { url = "https://files.pythonhosted.org/packages/86/45/00173a033c801cacf67c190fef088789394feaf88a98a7035b0e40d53dc9/pydantic_core-2.41.5-cp312-cp312-win_amd64.whl", hash = "sha256:1962293292865bca8e54702b08a4f26da73adc83dd1fcf26fbc875b35d81c815", size = 2020145, upload-time = "2025-11-04T13:40:21.548Z" },
+    { url = "https://files.pythonhosted.org/packages/f9/22/91fbc821fa6d261b376a3f73809f907cec5ca6025642c463d3488aad22fb/pydantic_core-2.41.5-cp312-cp312-win_arm64.whl", hash = "sha256:1746d4a3d9a794cacae06a5eaaccb4b8643a131d45fbc9af23e353dc0a5ba5c3", size = 1976179, upload-time = "2025-11-04T13:40:23.393Z" },
+    { url = "https://files.pythonhosted.org/packages/87/06/8806241ff1f70d9939f9af039c6c35f2360cf16e93c2ca76f184e76b1564/pydantic_core-2.41.5-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:941103c9be18ac8daf7b7adca8228f8ed6bb7a1849020f643b3a14d15b1924d9", size = 2120403, upload-time = "2025-11-04T13:40:25.248Z" },
+    { url = "https://files.pythonhosted.org/packages/94/02/abfa0e0bda67faa65fef1c84971c7e45928e108fe24333c81f3bfe35d5f5/pydantic_core-2.41.5-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:112e305c3314f40c93998e567879e887a3160bb8689ef3d2c04b6cc62c33ac34", size = 1896206, upload-time = "2025-11-04T13:40:27.099Z" },
+    { url = "https://files.pythonhosted.org/packages/15/df/a4c740c0943e93e6500f9eb23f4ca7ec9bf71b19e608ae5b579678c8d02f/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cbaad15cb0c90aa221d43c00e77bb33c93e8d36e0bf74760cd00e732d10a6a0", size = 1919307, upload-time = "2025-11-04T13:40:29.806Z" },
+    { url = "https://files.pythonhosted.org/packages/9a/e3/6324802931ae1d123528988e0e86587c2072ac2e5394b4bc2bc34b61ff6e/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:03ca43e12fab6023fc79d28ca6b39b05f794ad08ec2feccc59a339b02f2b3d33", size = 2063258, upload-time = "2025-11-04T13:40:33.544Z" },
+    { url = "https://files.pythonhosted.org/packages/c9/d4/2230d7151d4957dd79c3044ea26346c148c98fbf0ee6ebd41056f2d62ab5/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:dc799088c08fa04e43144b164feb0c13f9a0bc40503f8df3e9fde58a3c0c101e", size = 2214917, upload-time = "2025-11-04T13:40:35.479Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/9f/eaac5df17a3672fef0081b6c1bb0b82b33ee89aa5cec0d7b05f52fd4a1fa/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:97aeba56665b4c3235a0e52b2c2f5ae9cd071b8a8310ad27bddb3f7fb30e9aa2", size = 2332186, upload-time = "2025-11-04T13:40:37.436Z" },
+    { url = "https://files.pythonhosted.org/packages/cf/4e/35a80cae583a37cf15604b44240e45c05e04e86f9cfd766623149297e971/pydantic_core-2.41.5-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:406bf18d345822d6c21366031003612b9c77b3e29ffdb0f612367352aab7d586", size = 2073164, upload-time = "2025-11-04T13:40:40.289Z" },
+    { url = "https://files.pythonhosted.org/packages/bf/e3/f6e262673c6140dd3305d144d032f7bd5f7497d3871c1428521f19f9efa2/pydantic_core-2.41.5-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:b93590ae81f7010dbe380cdeab6f515902ebcbefe0b9327cc4804d74e93ae69d", size = 2179146, upload-time = "2025-11-04T13:40:42.809Z" },
+    { url = "https://files.pythonhosted.org/packages/75/c7/20bd7fc05f0c6ea2056a4565c6f36f8968c0924f19b7d97bbfea55780e73/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:01a3d0ab748ee531f4ea6c3e48ad9dac84ddba4b0d82291f87248f2f9de8d740", size = 2137788, upload-time = "2025-11-04T13:40:44.752Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/8d/34318ef985c45196e004bc46c6eab2eda437e744c124ef0dbe1ff2c9d06b/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_armv7l.whl", hash = "sha256:6561e94ba9dacc9c61bce40e2d6bdc3bfaa0259d3ff36ace3b1e6901936d2e3e", size = 2340133, upload-time = "2025-11-04T13:40:46.66Z" },
+    { url = "https://files.pythonhosted.org/packages/9c/59/013626bf8c78a5a5d9350d12e7697d3d4de951a75565496abd40ccd46bee/pydantic_core-2.41.5-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:915c3d10f81bec3a74fbd4faebe8391013ba61e5a1a8d48c4455b923bdda7858", size = 2324852, upload-time = "2025-11-04T13:40:48.575Z" },
+    { url = "https://files.pythonhosted.org/packages/1a/d9/c248c103856f807ef70c18a4f986693a46a8ffe1602e5d361485da502d20/pydantic_core-2.41.5-cp313-cp313-win32.whl", hash = "sha256:650ae77860b45cfa6e2cdafc42618ceafab3a2d9a3811fcfbd3bbf8ac3c40d36", size = 1994679, upload-time = "2025-11-04T13:40:50.619Z" },
+    { url = "https://files.pythonhosted.org/packages/9e/8b/341991b158ddab181cff136acd2552c9f35bd30380422a639c0671e99a91/pydantic_core-2.41.5-cp313-cp313-win_amd64.whl", hash = "sha256:79ec52ec461e99e13791ec6508c722742ad745571f234ea6255bed38c6480f11", size = 2019766, upload-time = "2025-11-04T13:40:52.631Z" },
+    { url = "https://files.pythonhosted.org/packages/73/7d/f2f9db34af103bea3e09735bb40b021788a5e834c81eedb541991badf8f5/pydantic_core-2.41.5-cp313-cp313-win_arm64.whl", hash = "sha256:3f84d5c1b4ab906093bdc1ff10484838aca54ef08de4afa9de0f5f14d69639cd", size = 1981005, upload-time = "2025-11-04T13:40:54.734Z" },
+    { url = "https://files.pythonhosted.org/packages/ea/28/46b7c5c9635ae96ea0fbb779e271a38129df2550f763937659ee6c5dbc65/pydantic_core-2.41.5-cp314-cp314-macosx_10_12_x86_64.whl", hash = "sha256:3f37a19d7ebcdd20b96485056ba9e8b304e27d9904d233d7b1015db320e51f0a", size = 2119622, upload-time = "2025-11-04T13:40:56.68Z" },
+    { url = "https://files.pythonhosted.org/packages/74/1a/145646e5687e8d9a1e8d09acb278c8535ebe9e972e1f162ed338a622f193/pydantic_core-2.41.5-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:1d1d9764366c73f996edd17abb6d9d7649a7eb690006ab6adbda117717099b14", size = 1891725, upload-time = "2025-11-04T13:40:58.807Z" },
+    { url = "https://files.pythonhosted.org/packages/23/04/e89c29e267b8060b40dca97bfc64a19b2a3cf99018167ea1677d96368273/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:25e1c2af0fce638d5f1988b686f3b3ea8cd7de5f244ca147c777769e798a9cd1", size = 1915040, upload-time = "2025-11-04T13:41:00.853Z" },
+    { url = "https://files.pythonhosted.org/packages/84/a3/15a82ac7bd97992a82257f777b3583d3e84bdb06ba6858f745daa2ec8a85/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:506d766a8727beef16b7adaeb8ee6217c64fc813646b424d0804d67c16eddb66", size = 2063691, upload-time = "2025-11-04T13:41:03.504Z" },
+    { url = "https://files.pythonhosted.org/packages/74/9b/0046701313c6ef08c0c1cf0e028c67c770a4e1275ca73131563c5f2a310a/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4819fa52133c9aa3c387b3328f25c1facc356491e6135b459f1de698ff64d869", size = 2213897, upload-time = "2025-11-04T13:41:05.804Z" },
+    { url = "https://files.pythonhosted.org/packages/8a/cd/6bac76ecd1b27e75a95ca3a9a559c643b3afcd2dd62086d4b7a32a18b169/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2b761d210c9ea91feda40d25b4efe82a1707da2ef62901466a42492c028553a2", size = 2333302, upload-time = "2025-11-04T13:41:07.809Z" },
+    { url = "https://files.pythonhosted.org/packages/4c/d2/ef2074dc020dd6e109611a8be4449b98cd25e1b9b8a303c2f0fca2f2bcf7/pydantic_core-2.41.5-cp314-cp314-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:22f0fb8c1c583a3b6f24df2470833b40207e907b90c928cc8d3594b76f874375", size = 2064877, upload-time = "2025-11-04T13:41:09.827Z" },
+    { url = "https://files.pythonhosted.org/packages/18/66/e9db17a9a763d72f03de903883c057b2592c09509ccfe468187f2a2eef29/pydantic_core-2.41.5-cp314-cp314-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2782c870e99878c634505236d81e5443092fba820f0373997ff75f90f68cd553", size = 2180680, upload-time = "2025-11-04T13:41:12.379Z" },
+    { url = "https://files.pythonhosted.org/packages/d3/9e/3ce66cebb929f3ced22be85d4c2399b8e85b622db77dad36b73c5387f8f8/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_aarch64.whl", hash = "sha256:0177272f88ab8312479336e1d777f6b124537d47f2123f89cb37e0accea97f90", size = 2138960, upload-time = "2025-11-04T13:41:14.627Z" },
+    { url = "https://files.pythonhosted.org/packages/a6/62/205a998f4327d2079326b01abee48e502ea739d174f0a89295c481a2272e/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_armv7l.whl", hash = "sha256:63510af5e38f8955b8ee5687740d6ebf7c2a0886d15a6d65c32814613681bc07", size = 2339102, upload-time = "2025-11-04T13:41:16.868Z" },
+    { url = "https://files.pythonhosted.org/packages/3c/0d/f05e79471e889d74d3d88f5bd20d0ed189ad94c2423d81ff8d0000aab4ff/pydantic_core-2.41.5-cp314-cp314-musllinux_1_1_x86_64.whl", hash = "sha256:e56ba91f47764cc14f1daacd723e3e82d1a89d783f0f5afe9c364b8bb491ccdb", size = 2326039, upload-time = "2025-11-04T13:41:18.934Z" },
+    { url = "https://files.pythonhosted.org/packages/ec/e1/e08a6208bb100da7e0c4b288eed624a703f4d129bde2da475721a80cab32/pydantic_core-2.41.5-cp314-cp314-win32.whl", hash = "sha256:aec5cf2fd867b4ff45b9959f8b20ea3993fc93e63c7363fe6851424c8a7e7c23", size = 1995126, upload-time = "2025-11-04T13:41:21.418Z" },
+    { url = "https://files.pythonhosted.org/packages/48/5d/56ba7b24e9557f99c9237e29f5c09913c81eeb2f3217e40e922353668092/pydantic_core-2.41.5-cp314-cp314-win_amd64.whl", hash = "sha256:8e7c86f27c585ef37c35e56a96363ab8de4e549a95512445b85c96d3e2f7c1bf", size = 2015489, upload-time = "2025-11-04T13:41:24.076Z" },
+    { url = "https://files.pythonhosted.org/packages/4e/bb/f7a190991ec9e3e0ba22e4993d8755bbc4a32925c0b5b42775c03e8148f9/pydantic_core-2.41.5-cp314-cp314-win_arm64.whl", hash = "sha256:e672ba74fbc2dc8eea59fb6d4aed6845e6905fc2a8afe93175d94a83ba2a01a0", size = 1977288, upload-time = "2025-11-04T13:41:26.33Z" },
+    { url = "https://files.pythonhosted.org/packages/92/ed/77542d0c51538e32e15afe7899d79efce4b81eee631d99850edc2f5e9349/pydantic_core-2.41.5-cp314-cp314t-macosx_10_12_x86_64.whl", hash = "sha256:8566def80554c3faa0e65ac30ab0932b9e3a5cd7f8323764303d468e5c37595a", size = 2120255, upload-time = "2025-11-04T13:41:28.569Z" },
+    { url = "https://files.pythonhosted.org/packages/bb/3d/6913dde84d5be21e284439676168b28d8bbba5600d838b9dca99de0fad71/pydantic_core-2.41.5-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:b80aa5095cd3109962a298ce14110ae16b8c1aece8b72f9dafe81cf597ad80b3", size = 1863760, upload-time = "2025-11-04T13:41:31.055Z" },
+    { url = "https://files.pythonhosted.org/packages/5a/f0/e5e6b99d4191da102f2b0eb9687aaa7f5bea5d9964071a84effc3e40f997/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3006c3dd9ba34b0c094c544c6006cc79e87d8612999f1a5d43b769b89181f23c", size = 1878092, upload-time = "2025-11-04T13:41:33.21Z" },
+    { url = "https://files.pythonhosted.org/packages/71/48/36fb760642d568925953bcc8116455513d6e34c4beaa37544118c36aba6d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:72f6c8b11857a856bcfa48c86f5368439f74453563f951e473514579d44aa612", size = 2053385, upload-time = "2025-11-04T13:41:35.508Z" },
+    { url = "https://files.pythonhosted.org/packages/20/25/92dc684dd8eb75a234bc1c764b4210cf2646479d54b47bf46061657292a8/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:5cb1b2f9742240e4bb26b652a5aeb840aa4b417c7748b6f8387927bc6e45e40d", size = 2218832, upload-time = "2025-11-04T13:41:37.732Z" },
+    { url = "https://files.pythonhosted.org/packages/e2/09/f53e0b05023d3e30357d82eb35835d0f6340ca344720a4599cd663dca599/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:bd3d54f38609ff308209bd43acea66061494157703364ae40c951f83ba99a1a9", size = 2327585, upload-time = "2025-11-04T13:41:40Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/4e/2ae1aa85d6af35a39b236b1b1641de73f5a6ac4d5a7509f77b814885760c/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:2ff4321e56e879ee8d2a879501c8e469414d948f4aba74a2d4593184eb326660", size = 2041078, upload-time = "2025-11-04T13:41:42.323Z" },
+    { url = "https://files.pythonhosted.org/packages/cd/13/2e215f17f0ef326fc72afe94776edb77525142c693767fc347ed6288728d/pydantic_core-2.41.5-cp314-cp314t-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d0d2568a8c11bf8225044aa94409e21da0cb09dcdafe9ecd10250b2baad531a9", size = 2173914, upload-time = "2025-11-04T13:41:45.221Z" },
+    { url = "https://files.pythonhosted.org/packages/02/7a/f999a6dcbcd0e5660bc348a3991c8915ce6599f4f2c6ac22f01d7a10816c/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_aarch64.whl", hash = "sha256:a39455728aabd58ceabb03c90e12f71fd30fa69615760a075b9fec596456ccc3", size = 2129560, upload-time = "2025-11-04T13:41:47.474Z" },
+    { url = "https://files.pythonhosted.org/packages/3a/b1/6c990ac65e3b4c079a4fb9f5b05f5b013afa0f4ed6780a3dd236d2cbdc64/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_armv7l.whl", hash = "sha256:239edca560d05757817c13dc17c50766136d21f7cd0fac50295499ae24f90fdf", size = 2329244, upload-time = "2025-11-04T13:41:49.992Z" },
+    { url = "https://files.pythonhosted.org/packages/d9/02/3c562f3a51afd4d88fff8dffb1771b30cfdfd79befd9883ee094f5b6c0d8/pydantic_core-2.41.5-cp314-cp314t-musllinux_1_1_x86_64.whl", hash = "sha256:2a5e06546e19f24c6a96a129142a75cee553cc018ffee48a460059b1185f4470", size = 2331955, upload-time = "2025-11-04T13:41:54.079Z" },
+    { url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906, upload-time = "2025-11-04T13:41:56.606Z" },
+    { url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607, upload-time = "2025-11-04T13:41:58.889Z" },
+    { url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769, upload-time = "2025-11-04T13:42:01.186Z" },
+    { url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441, upload-time = "2025-11-04T13:42:39.557Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291, upload-time = "2025-11-04T13:42:42.169Z" },
+    { url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632, upload-time = "2025-11-04T13:42:44.564Z" },
+    { url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905, upload-time = "2025-11-04T13:42:47.156Z" },
+    { url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495, upload-time = "2025-11-04T13:42:49.689Z" },
+    { url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388, upload-time = "2025-11-04T13:42:52.215Z" },
+    { url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879, upload-time = "2025-11-04T13:42:56.483Z" },
+    { url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017, upload-time = "2025-11-04T13:42:59.471Z" },
+    { url = "https://files.pythonhosted.org/packages/e6/b0/1a2aa41e3b5a4ba11420aba2d091b2d17959c8d1519ece3627c371951e73/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b5819cd790dbf0c5eb9f82c73c16b39a65dd6dd4d1439dcdea7816ec9adddab8", size = 2103351, upload-time = "2025-11-04T13:43:02.058Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ee/31b1f0020baaf6d091c87900ae05c6aeae101fa4e188e1613c80e4f1ea31/pydantic_core-2.41.5-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:5a4e67afbc95fa5c34cf27d9089bca7fcab4e51e57278d710320a70b956d1b9a", size = 1925363, upload-time = "2025-11-04T13:43:05.159Z" },
+    { url = "https://files.pythonhosted.org/packages/e1/89/ab8e86208467e467a80deaca4e434adac37b10a9d134cd2f99b28a01e483/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ece5c59f0ce7d001e017643d8d24da587ea1f74f6993467d85ae8a5ef9d4f42b", size = 2135615, upload-time = "2025-11-04T13:43:08.116Z" },
+    { url = "https://files.pythonhosted.org/packages/99/0a/99a53d06dd0348b2008f2f30884b34719c323f16c3be4e6cc1203b74a91d/pydantic_core-2.41.5-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:16f80f7abe3351f8ea6858914ddc8c77e02578544a0ebc15b4c2e1a0e813b0b2", size = 2175369, upload-time = "2025-11-04T13:43:12.49Z" },
+    { url = "https://files.pythonhosted.org/packages/6d/94/30ca3b73c6d485b9bb0bc66e611cff4a7138ff9736b7e66bcf0852151636/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:33cb885e759a705b426baada1fe68cbb0a2e68e34c5d0d0289a364cf01709093", size = 2144218, upload-time = "2025-11-04T13:43:15.431Z" },
+    { url = "https://files.pythonhosted.org/packages/87/57/31b4f8e12680b739a91f472b5671294236b82586889ef764b5fbc6669238/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:c8d8b4eb992936023be7dee581270af5c6e0697a8559895f527f5b7105ecd36a", size = 2329951, upload-time = "2025-11-04T13:43:18.062Z" },
+    { url = "https://files.pythonhosted.org/packages/7d/73/3c2c8edef77b8f7310e6fb012dbc4b8551386ed575b9eb6fb2506e28a7eb/pydantic_core-2.41.5-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:242a206cd0318f95cd21bdacff3fcc3aab23e79bba5cac3db5a841c9ef9c6963", size = 2318428, upload-time = "2025-11-04T13:43:20.679Z" },
+    { url = "https://files.pythonhosted.org/packages/2f/02/8559b1f26ee0d502c74f9cca5c0d2fd97e967e083e006bbbb4e97f3a043a/pydantic_core-2.41.5-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:d3a978c4f57a597908b7e697229d996d77a6d3c94901e9edee593adada95ce1a", size = 2147009, upload-time = "2025-11-04T13:43:23.286Z" },
+    { url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980, upload-time = "2025-11-04T13:43:25.97Z" },
+    { url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865, upload-time = "2025-11-04T13:43:28.763Z" },
+    { url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256, upload-time = "2025-11-04T13:43:31.71Z" },
+    { url = "https://files.pythonhosted.org/packages/83/d9/8412d7f06f616bbc053d30cb4e5f76786af3221462ad5eee1f202021eb4e/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:287dad91cfb551c363dc62899a80e9e14da1f0e2b6ebde82c806612ca2a13ef1", size = 2174762, upload-time = "2025-11-04T13:43:34.744Z" },
+    { url = "https://files.pythonhosted.org/packages/55/4c/162d906b8e3ba3a99354e20faa1b49a85206c47de97a639510a0e673f5da/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:03b77d184b9eb40240ae9fd676ca364ce1085f203e1b1256f8ab9984dca80a84", size = 2143141, upload-time = "2025-11-04T13:43:37.701Z" },
+    { url = "https://files.pythonhosted.org/packages/1f/f2/f11dd73284122713f5f89fc940f370d035fa8e1e078d446b3313955157fe/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_armv7l.whl", hash = "sha256:a668ce24de96165bb239160b3d854943128f4334822900534f2fe947930e5770", size = 2330317, upload-time = "2025-11-04T13:43:40.406Z" },
+    { url = "https://files.pythonhosted.org/packages/88/9d/b06ca6acfe4abb296110fb1273a4d848a0bfb2ff65f3ee92127b3244e16b/pydantic_core-2.41.5-pp311-pypy311_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f14f8f046c14563f8eb3f45f499cc658ab8d10072961e07225e507adb700e93f", size = 2316992, upload-time = "2025-11-04T13:43:43.602Z" },
+    { url = "https://files.pythonhosted.org/packages/36/c7/cfc8e811f061c841d7990b0201912c3556bfeb99cdcb7ed24adc8d6f8704/pydantic_core-2.41.5-pp311-pypy311_pp73-win_amd64.whl", hash = "sha256:56121965f7a4dc965bff783d70b907ddf3d57f6eba29b6d2e5dabfaf07799c51", size = 2145302, upload-time = "2025-11-04T13:43:46.64Z" },
+]
+
 [[package]]
 name = "pygments"
 version = "2.19.2"
@@ -2117,6 +2261,22 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/a3/dc/17031897dae0efacfea57dfd3a82fdd2a2aeb58e0ff71b77b87e44edc772/setuptools-80.9.0-py3-none-any.whl", hash = "sha256:062d34222ad13e0cc312a4c02d73f059e86a4acbfbdea8f8f76b28c99f306922", size = 1201486, upload-time = "2025-05-27T00:56:49.664Z" },
 ]
 
+[[package]]
+name = "skeletoken"
+version = "0.3.0"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "protobuf" },
+    { name = "pydantic" },
+    { name = "regex" },
+    { name = "tokenizers" },
+    { name = "transformers" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/73/16/c4b9107914b6ff0408a93fe330c59ff6f2deb4684d3932d9e1823ba71b0b/skeletoken-0.3.0.tar.gz", hash = "sha256:d35c957e28a7484a9628752340928ba857fd44834ba2b528ffd3c18f088c9086", size = 230755, upload-time = "2026-02-06T05:20:19.367Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/26/0c/cda6fe8ce5e7eafac5ee9cdeec6d8ce832ff4e5a9576a40f043d03552d46/skeletoken-0.3.0-py3-none-any.whl", hash = "sha256:88e5e2338ba871d2a888511469bbdffa876881aa46dd428759188bd1b5440426", size = 38831, upload-time = "2026-02-06T05:20:17.721Z" },
+]
+
 [[package]]
 name = "skops"
 version = "0.13.0"
@@ -2369,6 +2529,18 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/18/67/36e9267722cc04a6b9f15c7f3441c2363321a3ea07da7ae0c0707beb2a9c/typing_extensions-4.15.0-py3-none-any.whl", hash = "sha256:f0fa19c6845758ab08074a0cfa8b7aecb71c999ca73d62883bc25cc018c4e548", size = 44614, upload-time = "2025-08-25T13:49:24.86Z" },
 ]
 
+[[package]]
+name = "typing-inspection"
+version = "0.4.2"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+    { name = "typing-extensions" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/e3/70399cb7dd41c10ac53367ae42139cf4b1ca5f36bb3dc6c9d33acdb43655/typing_inspection-0.4.2.tar.gz", hash = "sha256:ba561c48a67c5958007083d386c3295464928b01faa735ab8547c5692e87f464", size = 75949, upload-time = "2025-10-01T02:14:41.687Z" }
+wheels = [
+    { url = "https://files.pythonhosted.org/packages/dc/9b/47798a6c91d8bdb567fe2698fe81e0c6b7cb7ef4d13da4114b41d239f65d/typing_inspection-0.4.2-py3-none-any.whl", hash = "sha256:4ed1cacbdc298c220f1bd249ed5287caa16f34d44ef4e9c3d0cbad5b521545e7", size = 14611, upload-time = "2025-10-01T02:14:40.154Z" },
+]
+
 [[package]]
 name = "urllib3"
 version = "2.5.0"