add read me
This commit is contained in:
64
venv/lib/python3.12/site-packages/ctranslate2/__init__.py
Normal file
64
venv/lib/python3.12/site-packages/ctranslate2/__init__.py
Normal file
@@ -0,0 +1,64 @@
|
||||
import sys
|
||||
|
||||
if sys.platform == "win32":
|
||||
import ctypes
|
||||
import glob
|
||||
import os
|
||||
|
||||
module_name = sys.modules[__name__].__name__
|
||||
|
||||
# Adressing python 3.9 < version
|
||||
try:
|
||||
from importlib.resources import files
|
||||
|
||||
# Fixed the pkg_resources depreciation
|
||||
package_dir = str(files(module_name))
|
||||
except ImportError:
|
||||
import pkg_resources
|
||||
|
||||
package_dir = pkg_resources.resource_filename(module_name, "")
|
||||
|
||||
add_dll_directory = getattr(os, "add_dll_directory", None)
|
||||
if add_dll_directory is not None:
|
||||
add_dll_directory(package_dir)
|
||||
|
||||
for library in glob.glob(os.path.join(package_dir, "*.dll")):
|
||||
ctypes.CDLL(library)
|
||||
|
||||
try:
|
||||
from ctranslate2._ext import (
|
||||
AsyncGenerationResult,
|
||||
AsyncScoringResult,
|
||||
AsyncTranslationResult,
|
||||
DataType,
|
||||
Device,
|
||||
Encoder,
|
||||
EncoderForwardOutput,
|
||||
ExecutionStats,
|
||||
GenerationResult,
|
||||
GenerationStepResult,
|
||||
Generator,
|
||||
MpiInfo,
|
||||
ScoringResult,
|
||||
StorageView,
|
||||
TranslationResult,
|
||||
Translator,
|
||||
contains_model,
|
||||
get_cuda_device_count,
|
||||
get_supported_compute_types,
|
||||
set_random_seed,
|
||||
)
|
||||
from ctranslate2.extensions import register_extensions
|
||||
from ctranslate2.logging import get_log_level, set_log_level
|
||||
|
||||
register_extensions()
|
||||
del register_extensions
|
||||
except ImportError as e:
|
||||
# Allow using the Python package without the compiled extension.
|
||||
if "No module named" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
|
||||
from ctranslate2 import converters, models, specs
|
||||
from ctranslate2.version import __version__
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,8 @@
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.converters.fairseq import FairseqConverter
|
||||
from ctranslate2.converters.marian import MarianConverter
|
||||
from ctranslate2.converters.openai_gpt2 import OpenAIGPT2Converter
|
||||
from ctranslate2.converters.opennmt_py import OpenNMTPyConverter
|
||||
from ctranslate2.converters.opennmt_tf import OpenNMTTFConverter
|
||||
from ctranslate2.converters.opus_mt import OpusMTConverter
|
||||
from ctranslate2.converters.transformers import TransformersConverter
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,109 @@
|
||||
import abc
|
||||
import argparse
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from ctranslate2.specs.model_spec import ACCEPTED_MODEL_TYPES, ModelSpec
|
||||
|
||||
|
||||
class Converter(abc.ABC):
|
||||
"""Base class for model converters."""
|
||||
|
||||
@staticmethod
|
||||
def declare_arguments(parser: argparse.ArgumentParser) -> argparse.ArgumentParser:
|
||||
"""Adds common conversion options to the command line parser.
|
||||
|
||||
Arguments:
|
||||
parser: Command line argument parser.
|
||||
"""
|
||||
parser.add_argument(
|
||||
"--output_dir", required=True, help="Output model directory."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--vocab_mapping", default=None, help="Vocabulary mapping file (optional)."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--quantization",
|
||||
default=None,
|
||||
choices=ACCEPTED_MODEL_TYPES,
|
||||
help="Weight quantization type.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--force",
|
||||
action="store_true",
|
||||
help="Force conversion even if the output directory already exists.",
|
||||
)
|
||||
return parser
|
||||
|
||||
def convert_from_args(self, args: argparse.Namespace) -> str:
|
||||
"""Helper function to call :meth:`ctranslate2.converters.Converter.convert`
|
||||
with the parsed command line options.
|
||||
|
||||
Arguments:
|
||||
args: Namespace containing parsed arguments.
|
||||
|
||||
Returns:
|
||||
Path to the output directory.
|
||||
"""
|
||||
return self.convert(
|
||||
args.output_dir,
|
||||
vmap=args.vocab_mapping,
|
||||
quantization=args.quantization,
|
||||
force=args.force,
|
||||
)
|
||||
|
||||
def convert(
|
||||
self,
|
||||
output_dir: str,
|
||||
vmap: Optional[str] = None,
|
||||
quantization: Optional[str] = None,
|
||||
force: bool = False,
|
||||
) -> str:
|
||||
"""Converts the model to the CTranslate2 format.
|
||||
|
||||
Arguments:
|
||||
output_dir: Output directory where the CTranslate2 model is saved.
|
||||
vmap: Optional path to a vocabulary mapping file that will be included
|
||||
in the converted model directory.
|
||||
quantization: Weight quantization scheme (possible values are: int8, int8_float32,
|
||||
int8_float16, int8_bfloat16, int16, float16, bfloat16, float32).
|
||||
force: Override the output directory if it already exists.
|
||||
|
||||
Returns:
|
||||
Path to the output directory.
|
||||
|
||||
Raises:
|
||||
RuntimeError: If the output directory already exists and :obj:`force`
|
||||
is not set.
|
||||
NotImplementedError: If the converter cannot convert this model to the
|
||||
CTranslate2 format.
|
||||
"""
|
||||
if os.path.exists(output_dir) and not force:
|
||||
raise RuntimeError(
|
||||
"output directory %s already exists, use --force to override"
|
||||
% output_dir
|
||||
)
|
||||
|
||||
model_spec = self._load()
|
||||
if model_spec is None:
|
||||
raise NotImplementedError(
|
||||
"This model is not supported by CTranslate2 or this converter"
|
||||
)
|
||||
if vmap is not None:
|
||||
model_spec.register_vocabulary_mapping(vmap)
|
||||
|
||||
model_spec.validate()
|
||||
model_spec.optimize(quantization=quantization)
|
||||
|
||||
# Create model directory.
|
||||
if os.path.exists(output_dir):
|
||||
shutil.rmtree(output_dir)
|
||||
os.makedirs(output_dir)
|
||||
model_spec.save(output_dir)
|
||||
return output_dir
|
||||
|
||||
@abc.abstractmethod
|
||||
def _load(self):
|
||||
raise NotImplementedError()
|
||||
@@ -0,0 +1,352 @@
|
||||
import argparse
|
||||
|
||||
from eole.config.run import PredictConfig
|
||||
from eole.constants import PositionEncodingType
|
||||
from eole.inputters.inputter import vocabs_to_dict
|
||||
from eole.models.model import BaseModel
|
||||
|
||||
from ctranslate2.converters import utils
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.specs import common_spec, transformer_spec
|
||||
|
||||
_SUPPORTED_ACTIVATIONS = {
|
||||
"gelu": common_spec.Activation.GELU,
|
||||
"fast_gelu": common_spec.Activation.GELUTanh,
|
||||
"relu": common_spec.Activation.RELU,
|
||||
"gated-silu": common_spec.Activation.SWISH,
|
||||
}
|
||||
|
||||
|
||||
def _get_model_spec_seq2seq(
|
||||
config, variables, src_vocabs, tgt_vocabs, num_source_embeddings
|
||||
):
|
||||
"""Creates a model specification from the model config."""
|
||||
with_relative_position = (
|
||||
getattr(config.embeddings, "position_encoding_type", None)
|
||||
== PositionEncodingType.Relative
|
||||
)
|
||||
with_rotary = (
|
||||
getattr(config.embeddings, "position_encoding_type", None)
|
||||
== PositionEncodingType.Rotary
|
||||
)
|
||||
if with_rotary:
|
||||
raise ValueError(
|
||||
"Rotary embeddings are not supported yet for encoder/decoder models"
|
||||
)
|
||||
with_alibi = (
|
||||
getattr(config.embeddings, "position_encoding_type", None)
|
||||
== PositionEncodingType.Alibi
|
||||
)
|
||||
if with_alibi:
|
||||
raise ValueError("Alibi is not supported yet for encoder/decoder models")
|
||||
activation_fn = getattr(config, "mlp_activation_fn", "relu")
|
||||
|
||||
# Return the first head of the last layer unless the model was trained with alignments.
|
||||
if getattr(config.decoder, "lambda_align", 0) == 0:
|
||||
alignment_layer = -1
|
||||
alignment_heads = 1
|
||||
else:
|
||||
alignment_layer = config.decoder.alignment_layer
|
||||
alignment_heads = config.decoder.alignment_heads
|
||||
|
||||
num_heads = getattr(config.decoder, "heads", 8)
|
||||
# num_kv = getattr(config.decoder, "heads_kv", 0)
|
||||
# if num_kv == num_heads or num_kv == 0:
|
||||
# num_kv = None
|
||||
# rotary_dim = 0 if with_rotary else None
|
||||
# rotary_interleave = getattr(config.rope_config, "rotary_interleave", True)
|
||||
ffn_glu = activation_fn == "gated-silu"
|
||||
sliding_window = getattr(config, "sliding_window", 0)
|
||||
if sliding_window != 0:
|
||||
raise ValueError(
|
||||
"Sliding window is not suported yet for encoder/decoder models"
|
||||
)
|
||||
|
||||
model_spec = transformer_spec.TransformerSpec.from_config(
|
||||
(config.encoder.layers, config.decoder.layers),
|
||||
num_heads,
|
||||
with_relative_position=with_relative_position,
|
||||
# alibi=with_alibi,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_fn],
|
||||
ffn_glu=ffn_glu,
|
||||
rms_norm=config.layer_norm == "rms",
|
||||
# rotary_dim=rotary_dim,
|
||||
# rotary_interleave=rotary_interleave,
|
||||
# num_heads_kv=num_kv,
|
||||
# sliding_window=sliding_window,
|
||||
alignment_layer=alignment_layer,
|
||||
alignment_heads=alignment_heads,
|
||||
num_source_embeddings=num_source_embeddings,
|
||||
# multi_query_attention=getattr(opt, "multiquery", False),
|
||||
)
|
||||
|
||||
set_transformer_spec(model_spec, variables)
|
||||
for src_vocab in src_vocabs:
|
||||
model_spec.register_source_vocabulary(src_vocab)
|
||||
for tgt_vocab in tgt_vocabs:
|
||||
model_spec.register_target_vocabulary(tgt_vocab)
|
||||
|
||||
return model_spec
|
||||
|
||||
|
||||
def _get_model_spec_lm(
|
||||
config, variables, src_vocabs, tgt_vocabs, num_source_embeddings
|
||||
):
|
||||
"""Creates a model specification from the model config."""
|
||||
with_relative_position = (
|
||||
getattr(config.embeddings, "position_encoding_type", None)
|
||||
== PositionEncodingType.Relative
|
||||
)
|
||||
with_rotary = (
|
||||
getattr(config.embeddings, "position_encoding_type", None)
|
||||
== PositionEncodingType.Rotary
|
||||
)
|
||||
with_alibi = (
|
||||
getattr(config.embeddings, "position_encoding_type", None)
|
||||
== PositionEncodingType.Alibi
|
||||
)
|
||||
activation_fn = getattr(config, "mlp_activation_fn", "relu")
|
||||
num_heads = getattr(config.decoder, "heads", 8)
|
||||
num_kv = getattr(config.decoder, "heads_kv", 0)
|
||||
if num_kv == num_heads or num_kv == 0:
|
||||
num_kv = None
|
||||
rotary_dim = 0 if with_rotary else None
|
||||
rotary_interleave = getattr(config.rope_config, "rotary_interleave", True)
|
||||
ffn_glu = activation_fn == "gated-silu"
|
||||
sliding_window = getattr(config, "sliding_window", 0)
|
||||
|
||||
model_spec = transformer_spec.TransformerDecoderModelSpec.from_config(
|
||||
config.decoder.layers,
|
||||
num_heads,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_fn],
|
||||
ffn_glu=ffn_glu,
|
||||
with_relative_position=with_relative_position,
|
||||
alibi=with_alibi,
|
||||
rms_norm=config.layer_norm == "rms",
|
||||
rotary_dim=rotary_dim,
|
||||
rotary_interleave=rotary_interleave,
|
||||
num_heads_kv=num_kv,
|
||||
sliding_window=sliding_window,
|
||||
# multi_query_attention=getattr(opt, "multiquery", False),
|
||||
)
|
||||
|
||||
set_transformer_decoder(
|
||||
model_spec.decoder,
|
||||
variables,
|
||||
with_encoder_attention=False,
|
||||
)
|
||||
|
||||
for tgt_vocab in tgt_vocabs:
|
||||
model_spec.register_vocabulary(tgt_vocab)
|
||||
|
||||
return model_spec
|
||||
|
||||
|
||||
def get_vocabs(vocab):
|
||||
src_vocabs = [vocab["src"]]
|
||||
tgt_vocabs = [vocab["tgt"]]
|
||||
return src_vocabs, tgt_vocabs
|
||||
|
||||
|
||||
class EoleConverter(Converter):
|
||||
"""Converts models generated by OpenNMT-py."""
|
||||
|
||||
def __init__(self, model_path: str):
|
||||
"""Initializes the OpenNMT-py converter.
|
||||
|
||||
Arguments:
|
||||
model_path: Path to the OpenNMT-py PyTorch model (.pt file).
|
||||
"""
|
||||
self._model_path = model_path
|
||||
|
||||
def _load(self):
|
||||
import torch
|
||||
|
||||
config = PredictConfig(model_path=self._model_path, src="dummy")
|
||||
|
||||
vocabs, model, model_config = BaseModel.load_test_model(config)
|
||||
vocabs_dict = vocabs_to_dict(vocabs)
|
||||
|
||||
config.model = model_config
|
||||
src_vocabs, tgt_vocabs = get_vocabs(vocabs_dict)
|
||||
|
||||
if config.model.decoder.decoder_type == "transformer_lm":
|
||||
spec = _get_model_spec_lm(
|
||||
config.model,
|
||||
model.state_dict(),
|
||||
src_vocabs,
|
||||
tgt_vocabs,
|
||||
num_source_embeddings=len(src_vocabs),
|
||||
)
|
||||
else:
|
||||
spec = _get_model_spec_seq2seq(
|
||||
config.model,
|
||||
model.state_dict(),
|
||||
src_vocabs,
|
||||
tgt_vocabs,
|
||||
num_source_embeddings=len(src_vocabs),
|
||||
)
|
||||
spec.config.decoder_start_token = vocabs["decoder_start_token"]
|
||||
|
||||
spec.config.bos_token = vocabs["specials"]["bos_token"]
|
||||
spec.config.eos_token = vocabs["specials"]["eos_token"]
|
||||
spec.config.unk_token = vocabs["specials"]["unk_token"]
|
||||
spec.config.layer_norm_epsilon = getattr(config, "norm_eps", 1e-6)
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def set_transformer_spec(spec, variables):
|
||||
set_transformer_encoder(spec.encoder, variables)
|
||||
set_transformer_decoder(spec.decoder, variables)
|
||||
|
||||
|
||||
def set_transformer_encoder(spec, variables):
|
||||
set_input_layers(spec, variables, "src_emb")
|
||||
set_layer_norm(spec.layer_norm, variables, "encoder.layer_norm")
|
||||
for i, layer in enumerate(spec.layer):
|
||||
set_transformer_encoder_layer(
|
||||
layer, variables, "encoder.transformer_layers.%d" % i
|
||||
)
|
||||
|
||||
|
||||
def set_transformer_decoder(spec, variables, with_encoder_attention=True):
|
||||
set_input_layers(spec, variables, "tgt_emb")
|
||||
set_layer_norm(spec.layer_norm, variables, "decoder.layer_norm")
|
||||
for i, layer in enumerate(spec.layer):
|
||||
set_transformer_decoder_layer(
|
||||
layer,
|
||||
variables,
|
||||
"decoder.transformer_layers.%d" % i,
|
||||
with_encoder_attention=with_encoder_attention,
|
||||
)
|
||||
|
||||
set_linear(spec.projection, variables, "generator")
|
||||
|
||||
|
||||
def set_input_layers(spec, variables, scope):
|
||||
if hasattr(spec, "position_encodings"):
|
||||
set_position_encodings(
|
||||
spec.position_encodings,
|
||||
variables,
|
||||
"%s.pe" % scope,
|
||||
)
|
||||
else:
|
||||
spec.scale_embeddings = False
|
||||
|
||||
embeddings_specs = spec.embeddings
|
||||
# encoder embeddings are stored in a list(onmt/ct2 legacy with features)
|
||||
if isinstance(embeddings_specs, list):
|
||||
embeddings_specs = embeddings_specs[0]
|
||||
set_embeddings(embeddings_specs, variables, "%s.embeddings" % scope)
|
||||
|
||||
|
||||
def set_transformer_encoder_layer(spec, variables, scope):
|
||||
set_multi_head_attention(
|
||||
spec.self_attention,
|
||||
variables,
|
||||
"%s.self_attn" % scope,
|
||||
self_attention=True,
|
||||
)
|
||||
set_layer_norm(
|
||||
spec.self_attention.layer_norm, variables, "%s.input_layernorm" % scope
|
||||
)
|
||||
set_layer_norm(
|
||||
spec.ffn.layer_norm, variables, "%s.post_attention_layernorm" % scope
|
||||
)
|
||||
set_ffn(spec.ffn, variables, "%s.mlp" % scope)
|
||||
|
||||
|
||||
def set_transformer_decoder_layer(spec, variables, scope, with_encoder_attention=True):
|
||||
set_multi_head_attention(
|
||||
spec.self_attention,
|
||||
variables,
|
||||
"%s.self_attn" % scope,
|
||||
self_attention=True,
|
||||
)
|
||||
set_layer_norm(
|
||||
spec.self_attention.layer_norm, variables, "%s.input_layernorm" % scope
|
||||
)
|
||||
if with_encoder_attention:
|
||||
set_multi_head_attention(spec.attention, variables, "%s.context_attn" % scope)
|
||||
set_layer_norm(
|
||||
spec.attention.layer_norm, variables, "%s.precontext_layernorm" % scope
|
||||
)
|
||||
set_layer_norm(
|
||||
spec.ffn.layer_norm, variables, "%s.post_attention_layernorm" % scope
|
||||
)
|
||||
set_ffn(spec.ffn, variables, "%s.mlp" % scope)
|
||||
|
||||
|
||||
def set_ffn(spec, variables, scope):
|
||||
set_linear(spec.linear_0, variables, "%s.gate_up_proj" % scope)
|
||||
set_linear(spec.linear_1, variables, "%s.down_proj" % scope)
|
||||
if hasattr(spec, "linear_0_noact"):
|
||||
set_linear(spec.linear_0_noact, variables, "%s.up_proj" % scope)
|
||||
|
||||
|
||||
def set_multi_head_attention(spec, variables, scope, self_attention=False):
|
||||
if self_attention:
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
||||
set_linear(split_layers[0], variables, "%s.linear_query" % scope)
|
||||
set_linear(split_layers[1], variables, "%s.linear_keys" % scope)
|
||||
set_linear(split_layers[2], variables, "%s.linear_values" % scope)
|
||||
utils.fuse_linear(spec.linear[0], split_layers)
|
||||
else:
|
||||
set_linear(spec.linear[0], variables, "%s.linear_query" % scope)
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(2)]
|
||||
set_linear(split_layers[0], variables, "%s.linear_keys" % scope)
|
||||
set_linear(split_layers[1], variables, "%s.linear_values" % scope)
|
||||
utils.fuse_linear(spec.linear[1], split_layers)
|
||||
set_linear(spec.linear[-1], variables, "%s.final_linear" % scope)
|
||||
if hasattr(spec, "relative_position_keys"):
|
||||
spec.relative_position_keys = _get_variable(
|
||||
variables, "%s.relative_positions_embeddings.weight" % scope
|
||||
)
|
||||
spec.relative_position_values = spec.relative_position_keys
|
||||
|
||||
|
||||
def set_layer_norm(spec, variables, scope):
|
||||
try:
|
||||
spec.gamma = _get_variable(variables, "%s.weight" % scope)
|
||||
except KeyError:
|
||||
# Compatibility with older models using a custom LayerNorm module.
|
||||
spec.gamma = _get_variable(variables, "%s.a_2" % scope)
|
||||
spec.beta = _get_variable(variables, "%s.b_2" % scope)
|
||||
try:
|
||||
spec.beta = _get_variable(variables, "%s.bias" % scope)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def set_linear(spec, variables, scope):
|
||||
spec.weight = _get_variable(variables, "%s.weight" % scope)
|
||||
bias = variables.get("%s.bias" % scope)
|
||||
if bias is not None:
|
||||
spec.bias = bias
|
||||
|
||||
|
||||
def set_embeddings(spec, variables, scope):
|
||||
spec.weight = _get_variable(variables, "%s.weight" % scope)
|
||||
|
||||
|
||||
def set_position_encodings(spec, variables, scope):
|
||||
spec.encodings = _get_variable(variables, "%s.pe" % scope).squeeze()
|
||||
|
||||
|
||||
def _get_variable(variables, name):
|
||||
return variables[name]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument("--model_path", required=True, help="Model path.")
|
||||
Converter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
EoleConverter(args.model_path).convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,345 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
from typing import Optional
|
||||
|
||||
from ctranslate2.converters import utils
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.specs import common_spec, transformer_spec
|
||||
|
||||
_SUPPORTED_MODELS = {
|
||||
"bart",
|
||||
"multilingual_transformer",
|
||||
"transformer",
|
||||
"transformer_align",
|
||||
"transformer_lm",
|
||||
}
|
||||
|
||||
|
||||
_SUPPORTED_ACTIVATIONS = {
|
||||
"gelu": common_spec.Activation.GELU,
|
||||
"gelu_accurate": common_spec.Activation.GELUTanh,
|
||||
"gelu_fast": common_spec.Activation.GELUTanh,
|
||||
"relu": common_spec.Activation.RELU,
|
||||
"swish": common_spec.Activation.SWISH,
|
||||
}
|
||||
|
||||
|
||||
def _get_model_spec(args):
|
||||
import fairseq
|
||||
|
||||
activation_fn = getattr(args, "activation_fn", "relu")
|
||||
model_name = fairseq.models.ARCH_MODEL_NAME_REGISTRY[args.arch]
|
||||
|
||||
check = utils.ConfigurationChecker()
|
||||
check(
|
||||
model_name in _SUPPORTED_MODELS,
|
||||
"Model '%s' used by architecture '%s' is not supported (supported models are: %s)"
|
||||
% (model_name, args.arch, ", ".join(_SUPPORTED_MODELS)),
|
||||
)
|
||||
check.validate()
|
||||
check(
|
||||
activation_fn in _SUPPORTED_ACTIVATIONS,
|
||||
"Option --activation-fn %s is not supported (supported activations are: %s)"
|
||||
% (activation_fn, ", ".join(_SUPPORTED_ACTIVATIONS.keys())),
|
||||
)
|
||||
check(
|
||||
not getattr(args, "no_token_positional_embeddings", False),
|
||||
"Option --no-token-positional-embeddings is not supported",
|
||||
)
|
||||
check(
|
||||
not getattr(args, "lang_tok_replacing_bos_eos", False),
|
||||
"Option --lang-tok-replacing-bos-eos is not supported",
|
||||
)
|
||||
|
||||
if model_name == "transformer_lm":
|
||||
check(
|
||||
not args.character_embeddings,
|
||||
"Option --character-embeddings is not supported",
|
||||
)
|
||||
check(
|
||||
not args.adaptive_input,
|
||||
"Option --adaptive-input is not supported",
|
||||
)
|
||||
check.validate()
|
||||
|
||||
return transformer_spec.TransformerDecoderModelSpec.from_config(
|
||||
args.decoder_layers,
|
||||
args.decoder_attention_heads,
|
||||
pre_norm=args.decoder_normalize_before,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_fn],
|
||||
layernorm_embedding=getattr(args, "layernorm_embedding", False),
|
||||
no_final_norm=args.no_decoder_final_norm,
|
||||
project_in_out=args.decoder_input_dim != args.decoder_embed_dim,
|
||||
)
|
||||
|
||||
else:
|
||||
check(
|
||||
args.encoder_normalize_before == args.decoder_normalize_before,
|
||||
"Options --encoder-normalize-before and --decoder-normalize-before "
|
||||
"must have the same value",
|
||||
)
|
||||
check(
|
||||
args.encoder_attention_heads == args.decoder_attention_heads,
|
||||
"Options --encoder-attention-heads and --decoder-attention-heads "
|
||||
"must have the same value",
|
||||
)
|
||||
check.validate()
|
||||
|
||||
return transformer_spec.TransformerSpec.from_config(
|
||||
(args.encoder_layers, args.decoder_layers),
|
||||
args.encoder_attention_heads,
|
||||
pre_norm=args.encoder_normalize_before,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_fn],
|
||||
alignment_layer=getattr(args, "alignment_layer", -1),
|
||||
alignment_heads=getattr(args, "alignment_heads", 0),
|
||||
layernorm_embedding=getattr(args, "layernorm_embedding", False),
|
||||
)
|
||||
|
||||
|
||||
def _get_vocab(dictionary):
|
||||
return ["<blank>" if token == "<pad>" else token for token in dictionary.symbols]
|
||||
|
||||
|
||||
class FairseqConverter(Converter):
|
||||
"""Converts models trained with Fairseq."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_path: str,
|
||||
data_dir: str,
|
||||
source_lang: Optional[str] = None,
|
||||
target_lang: Optional[str] = None,
|
||||
fixed_dictionary: Optional[str] = None,
|
||||
no_default_special_tokens: bool = False,
|
||||
user_dir: Optional[str] = None,
|
||||
):
|
||||
"""Initializes the Fairseq converter.
|
||||
|
||||
Arguments:
|
||||
model_path: Path to the Fairseq PyTorch model (.pt file).
|
||||
data_dir: Path to the Fairseq data directory containing vocabulary files.
|
||||
source_lang: Source language (may be required if not declared in the model).
|
||||
target_lang: Target language (may be required if not declared in the model).
|
||||
fixed_dictionary: Path to the fixed dictionary for multilingual models.
|
||||
no_default_special_tokens: Require all special tokens to be provided by the user
|
||||
(e.g. encoder end token, decoder start token).
|
||||
user_dir: Path to the user directory containing custom extensions.
|
||||
"""
|
||||
self._model_path = model_path
|
||||
self._data_dir = data_dir
|
||||
self._fixed_dictionary = fixed_dictionary
|
||||
self._source_lang = source_lang
|
||||
self._target_lang = target_lang
|
||||
self._no_default_special_tokens = no_default_special_tokens
|
||||
self._user_dir = user_dir
|
||||
|
||||
def _load(self):
|
||||
import fairseq
|
||||
import torch
|
||||
|
||||
from fairseq import checkpoint_utils
|
||||
|
||||
if self._user_dir:
|
||||
from fairseq.utils import import_user_module
|
||||
|
||||
import_user_module(argparse.Namespace(user_dir=self._user_dir))
|
||||
|
||||
with torch.no_grad():
|
||||
checkpoint = checkpoint_utils.load_checkpoint_to_cpu(self._model_path)
|
||||
args = checkpoint["args"] or checkpoint["cfg"]["model"]
|
||||
|
||||
args.data = self._data_dir
|
||||
if self._fixed_dictionary is not None:
|
||||
args.fixed_dictionary = self._fixed_dictionary
|
||||
if hasattr(args, "lang_dict") and args.lang_dict:
|
||||
args.lang_dict = os.path.join(
|
||||
self._data_dir, os.path.basename(args.lang_dict)
|
||||
)
|
||||
|
||||
if self._source_lang is not None:
|
||||
args.source_lang = self._source_lang
|
||||
|
||||
if self._target_lang is not None:
|
||||
args.target_lang = self._target_lang
|
||||
|
||||
spec = _get_model_spec(args)
|
||||
|
||||
task = fairseq.tasks.setup_task(args)
|
||||
model = fairseq.models.build_model(args, task)
|
||||
model.eval()
|
||||
model.load_state_dict(checkpoint["model"])
|
||||
|
||||
if isinstance(spec, transformer_spec.TransformerDecoderModelSpec):
|
||||
set_transformer_decoder(
|
||||
spec.decoder,
|
||||
model.decoder,
|
||||
with_encoder_attention=False,
|
||||
)
|
||||
|
||||
spec.register_vocabulary(_get_vocab(task.dictionary))
|
||||
if not args.add_bos_token:
|
||||
spec.config.bos_token = spec.config.eos_token
|
||||
|
||||
else:
|
||||
set_transformer_encoder(spec.encoder, model.encoder)
|
||||
set_transformer_decoder(spec.decoder, model.decoder)
|
||||
|
||||
spec.register_source_vocabulary(_get_vocab(task.source_dictionary))
|
||||
spec.register_target_vocabulary(_get_vocab(task.target_dictionary))
|
||||
if self._no_default_special_tokens:
|
||||
spec.config.decoder_start_token = None
|
||||
else:
|
||||
spec.config.decoder_start_token = spec.config.eos_token
|
||||
spec.config.add_source_eos = True
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def set_transformer_encoder(spec, module):
|
||||
set_input_layers(spec, module)
|
||||
for layer_spec, layer in zip(spec.layer, module.layers):
|
||||
set_transformer_encoder_layer(layer_spec, layer)
|
||||
if module.layer_norm is not None:
|
||||
set_layer_norm(spec.layer_norm, module.layer_norm)
|
||||
if module.layernorm_embedding is not None:
|
||||
set_layer_norm(spec.layernorm_embedding, module.layernorm_embedding)
|
||||
|
||||
|
||||
def set_transformer_decoder(spec, module, with_encoder_attention=True):
|
||||
set_input_layers(spec, module)
|
||||
set_linear(spec.projection, module.output_projection)
|
||||
for layer_spec, layer in zip(spec.layer, module.layers):
|
||||
set_transformer_decoder_layer(
|
||||
layer_spec,
|
||||
layer,
|
||||
with_encoder_attention=with_encoder_attention,
|
||||
)
|
||||
if module.layer_norm is not None:
|
||||
set_layer_norm(spec.layer_norm, module.layer_norm)
|
||||
if module.layernorm_embedding is not None:
|
||||
set_layer_norm(spec.layernorm_embedding, module.layernorm_embedding)
|
||||
if module.project_in_dim is not None:
|
||||
set_linear(spec.project_in, module.project_in_dim)
|
||||
if module.project_out_dim is not None:
|
||||
set_linear(spec.project_out, module.project_out_dim)
|
||||
|
||||
|
||||
def set_input_layers(spec, module):
|
||||
set_position_encodings(spec.position_encodings, module.embed_positions)
|
||||
set_embeddings(
|
||||
spec.embeddings[0] if isinstance(spec.embeddings, list) else spec.embeddings,
|
||||
module.embed_tokens,
|
||||
)
|
||||
spec.scale_embeddings = module.embed_scale
|
||||
|
||||
|
||||
def set_transformer_encoder_layer(spec, module):
|
||||
set_ffn(spec.ffn, module)
|
||||
set_multi_head_attention(spec.self_attention, module.self_attn, self_attention=True)
|
||||
set_layer_norm(spec.self_attention.layer_norm, module.self_attn_layer_norm)
|
||||
|
||||
|
||||
def set_transformer_decoder_layer(spec, module, with_encoder_attention=True):
|
||||
set_ffn(spec.ffn, module)
|
||||
set_multi_head_attention(spec.self_attention, module.self_attn, self_attention=True)
|
||||
set_layer_norm(spec.self_attention.layer_norm, module.self_attn_layer_norm)
|
||||
if with_encoder_attention:
|
||||
set_multi_head_attention(spec.attention, module.encoder_attn)
|
||||
set_layer_norm(spec.attention.layer_norm, module.encoder_attn_layer_norm)
|
||||
|
||||
|
||||
def set_ffn(spec, module):
|
||||
set_layer_norm(spec.layer_norm, module.final_layer_norm)
|
||||
set_linear(spec.linear_0, module.fc1)
|
||||
set_linear(spec.linear_1, module.fc2)
|
||||
|
||||
|
||||
def set_multi_head_attention(spec, module, self_attention=False):
|
||||
if self_attention:
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
||||
set_linear(split_layers[0], module.q_proj)
|
||||
set_linear(split_layers[1], module.k_proj)
|
||||
set_linear(split_layers[2], module.v_proj)
|
||||
utils.fuse_linear(spec.linear[0], split_layers)
|
||||
else:
|
||||
set_linear(spec.linear[0], module.q_proj)
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(2)]
|
||||
set_linear(split_layers[0], module.k_proj)
|
||||
set_linear(split_layers[1], module.v_proj)
|
||||
utils.fuse_linear(spec.linear[1], split_layers)
|
||||
set_linear(spec.linear[-1], module.out_proj)
|
||||
|
||||
|
||||
def set_layer_norm(spec, module):
|
||||
spec.gamma = module.weight.numpy()
|
||||
spec.beta = module.bias.numpy()
|
||||
|
||||
|
||||
def set_linear(spec, module):
|
||||
spec.weight = module.weight.numpy()
|
||||
if module.bias is not None:
|
||||
spec.bias = module.bias.numpy()
|
||||
|
||||
|
||||
def set_embeddings(spec, module):
|
||||
spec.weight = module.weight.numpy()
|
||||
|
||||
|
||||
def set_position_encodings(spec, module):
|
||||
import torch
|
||||
|
||||
weight = module.weight if isinstance(module, torch.nn.Embedding) else module.weights
|
||||
spec.encodings = weight.numpy()[module.padding_idx + 1 :]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument("--model_path", required=True, help="Model path.")
|
||||
parser.add_argument(
|
||||
"--data_dir",
|
||||
required=True,
|
||||
help="Data directory containing the source and target vocabularies.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--user_dir",
|
||||
help="Directory containing custom extensions.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--fixed_dictionary",
|
||||
help="Fixed dictionary for multilingual models.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--source_lang",
|
||||
help="Source language. This argument is used to find dictionary file from `data_dir`.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--target_lang",
|
||||
help="Target language. This argument is used to find dictionary file from `data_dir`.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--no_default_special_tokens",
|
||||
action="store_true",
|
||||
help=(
|
||||
"Require all special tokens to be provided by the user during inference, "
|
||||
"including the decoder start token."
|
||||
),
|
||||
)
|
||||
Converter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
converter = FairseqConverter(
|
||||
args.model_path,
|
||||
args.data_dir,
|
||||
source_lang=args.source_lang,
|
||||
target_lang=args.target_lang,
|
||||
fixed_dictionary=args.fixed_dictionary,
|
||||
no_default_special_tokens=args.no_default_special_tokens,
|
||||
user_dir=args.user_dir,
|
||||
)
|
||||
converter.convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,315 @@
|
||||
import argparse
|
||||
import re
|
||||
|
||||
from typing import List
|
||||
|
||||
import numpy as np
|
||||
import yaml
|
||||
|
||||
from ctranslate2.converters import utils
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.specs import common_spec, transformer_spec
|
||||
|
||||
_SUPPORTED_ACTIVATIONS = {
|
||||
"gelu": common_spec.Activation.GELUSigmoid,
|
||||
"relu": common_spec.Activation.RELU,
|
||||
"swish": common_spec.Activation.SWISH,
|
||||
}
|
||||
|
||||
_SUPPORTED_POSTPROCESS_EMB = {"", "d", "n", "nd"}
|
||||
|
||||
|
||||
class MarianConverter(Converter):
|
||||
"""Converts models trained with Marian."""
|
||||
|
||||
def __init__(self, model_path: str, vocab_paths: List[str]):
|
||||
"""Initializes the Marian converter.
|
||||
|
||||
Arguments:
|
||||
model_path: Path to the Marian model (.npz file).
|
||||
vocab_paths: Paths to the vocabularies (.yml files).
|
||||
"""
|
||||
self._model_path = model_path
|
||||
self._vocab_paths = vocab_paths
|
||||
|
||||
def _load(self):
|
||||
model = np.load(self._model_path)
|
||||
config = _get_model_config(model)
|
||||
vocabs = list(map(load_vocab, self._vocab_paths))
|
||||
|
||||
activation = config["transformer-ffn-activation"]
|
||||
pre_norm = "n" in config["transformer-preprocess"]
|
||||
postprocess_emb = config["transformer-postprocess-emb"]
|
||||
|
||||
check = utils.ConfigurationChecker()
|
||||
check(config["type"] == "transformer", "Option --type must be 'transformer'")
|
||||
check(
|
||||
config["transformer-decoder-autoreg"] == "self-attention",
|
||||
"Option --transformer-decoder-autoreg must be 'self-attention'",
|
||||
)
|
||||
check(
|
||||
not config["transformer-no-projection"],
|
||||
"Option --transformer-no-projection is not supported",
|
||||
)
|
||||
check(
|
||||
activation in _SUPPORTED_ACTIVATIONS,
|
||||
"Option --transformer-ffn-activation %s is not supported "
|
||||
"(supported activations are: %s)"
|
||||
% (activation, ", ".join(_SUPPORTED_ACTIVATIONS.keys())),
|
||||
)
|
||||
check(
|
||||
postprocess_emb in _SUPPORTED_POSTPROCESS_EMB,
|
||||
"Option --transformer-postprocess-emb %s is not supported (supported values are: %s)"
|
||||
% (postprocess_emb, ", ".join(_SUPPORTED_POSTPROCESS_EMB)),
|
||||
)
|
||||
|
||||
if pre_norm:
|
||||
check(
|
||||
config["transformer-preprocess"] == "n"
|
||||
and config["transformer-postprocess"] == "da"
|
||||
and config.get("transformer-postprocess-top", "") == "n",
|
||||
"Unsupported pre-norm Transformer architecture, expected the following "
|
||||
"combination of options: "
|
||||
"--transformer-preprocess n "
|
||||
"--transformer-postprocess da "
|
||||
"--transformer-postprocess-top n",
|
||||
)
|
||||
else:
|
||||
check(
|
||||
config["transformer-preprocess"] == ""
|
||||
and config["transformer-postprocess"] == "dan"
|
||||
and config.get("transformer-postprocess-top", "") == "",
|
||||
"Unsupported post-norm Transformer architecture, excepted the following "
|
||||
"combination of options: "
|
||||
"--transformer-preprocess '' "
|
||||
"--transformer-postprocess dan "
|
||||
"--transformer-postprocess-top ''",
|
||||
)
|
||||
|
||||
check.validate()
|
||||
|
||||
alignment_layer = config["transformer-guided-alignment-layer"]
|
||||
alignment_layer = -1 if alignment_layer == "last" else int(alignment_layer) - 1
|
||||
layernorm_embedding = "n" in postprocess_emb
|
||||
|
||||
model_spec = transformer_spec.TransformerSpec.from_config(
|
||||
(config["enc-depth"], config["dec-depth"]),
|
||||
config["transformer-heads"],
|
||||
pre_norm=pre_norm,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation],
|
||||
alignment_layer=alignment_layer,
|
||||
alignment_heads=1,
|
||||
layernorm_embedding=layernorm_embedding,
|
||||
)
|
||||
set_transformer_spec(model_spec, model)
|
||||
model_spec.register_source_vocabulary(vocabs[0])
|
||||
model_spec.register_target_vocabulary(vocabs[-1])
|
||||
model_spec.config.add_source_eos = True
|
||||
return model_spec
|
||||
|
||||
|
||||
def _get_model_config(model):
|
||||
config = model["special:model.yml"]
|
||||
config = config[:-1].tobytes()
|
||||
config = yaml.safe_load(config)
|
||||
return config
|
||||
|
||||
|
||||
def load_vocab(path):
|
||||
# pyyaml skips some entries so we manually parse the vocabulary file.
|
||||
with open(path, encoding="utf-8") as vocab:
|
||||
tokens = []
|
||||
token = None
|
||||
idx = None
|
||||
for i, line in enumerate(vocab):
|
||||
line = line.rstrip("\n\r")
|
||||
if not line:
|
||||
continue
|
||||
|
||||
if line.startswith("? "): # Complex key mapping (key)
|
||||
token = line[2:]
|
||||
elif token is not None: # Complex key mapping (value)
|
||||
idx = line[2:]
|
||||
else:
|
||||
token, idx = line.rsplit(":", 1)
|
||||
|
||||
if token is not None:
|
||||
if token.startswith('"') and token.endswith('"'):
|
||||
# Unescape characters and remove quotes.
|
||||
token = re.sub(r"\\([^x])", r"\1", token)
|
||||
token = token[1:-1]
|
||||
if token.startswith("\\x"):
|
||||
# Convert the digraph \x to the actual escaped sequence.
|
||||
token = chr(int(token[2:], base=16))
|
||||
elif token.startswith("'") and token.endswith("'"):
|
||||
token = token[1:-1]
|
||||
token = token.replace("''", "'")
|
||||
|
||||
if idx is not None:
|
||||
try:
|
||||
idx = int(idx.strip())
|
||||
except ValueError as e:
|
||||
raise ValueError(
|
||||
"Unexpected format at line %d: '%s'" % (i + 1, line)
|
||||
) from e
|
||||
|
||||
tokens.append((idx, token))
|
||||
|
||||
token = None
|
||||
idx = None
|
||||
|
||||
return [token for _, token in sorted(tokens, key=lambda item: item[0])]
|
||||
|
||||
|
||||
def set_transformer_spec(spec, weights):
|
||||
set_transformer_encoder(spec.encoder, weights, "encoder")
|
||||
set_transformer_decoder(spec.decoder, weights, "decoder")
|
||||
|
||||
|
||||
def set_transformer_encoder(spec, weights, scope):
|
||||
set_common_layers(spec, weights, scope)
|
||||
for i, layer_spec in enumerate(spec.layer):
|
||||
set_transformer_encoder_layer(layer_spec, weights, "%s_l%d" % (scope, i + 1))
|
||||
|
||||
|
||||
def set_transformer_decoder(spec, weights, scope):
|
||||
spec.start_from_zero_embedding = True
|
||||
set_common_layers(spec, weights, scope)
|
||||
for i, layer_spec in enumerate(spec.layer):
|
||||
set_transformer_decoder_layer(layer_spec, weights, "%s_l%d" % (scope, i + 1))
|
||||
|
||||
set_linear(
|
||||
spec.projection,
|
||||
weights,
|
||||
"%s_ff_logit_out" % scope,
|
||||
reuse_weight=spec.embeddings.weight,
|
||||
)
|
||||
|
||||
|
||||
def set_common_layers(spec, weights, scope):
|
||||
embeddings_specs = spec.embeddings
|
||||
if not isinstance(embeddings_specs, list):
|
||||
embeddings_specs = [embeddings_specs]
|
||||
|
||||
set_embeddings(embeddings_specs[0], weights, scope)
|
||||
set_position_encodings(
|
||||
spec.position_encodings, weights, dim=embeddings_specs[0].weight.shape[1]
|
||||
)
|
||||
if hasattr(spec, "layernorm_embedding"):
|
||||
set_layer_norm(
|
||||
spec.layernorm_embedding,
|
||||
weights,
|
||||
"%s_emb" % scope,
|
||||
pre_norm=True,
|
||||
)
|
||||
if hasattr(spec, "layer_norm"):
|
||||
set_layer_norm(spec.layer_norm, weights, "%s_top" % scope)
|
||||
|
||||
|
||||
def set_transformer_encoder_layer(spec, weights, scope):
|
||||
set_ffn(spec.ffn, weights, "%s_ffn" % scope)
|
||||
set_multi_head_attention(
|
||||
spec.self_attention, weights, "%s_self" % scope, self_attention=True
|
||||
)
|
||||
|
||||
|
||||
def set_transformer_decoder_layer(spec, weights, scope):
|
||||
set_ffn(spec.ffn, weights, "%s_ffn" % scope)
|
||||
set_multi_head_attention(
|
||||
spec.self_attention, weights, "%s_self" % scope, self_attention=True
|
||||
)
|
||||
set_multi_head_attention(spec.attention, weights, "%s_context" % scope)
|
||||
|
||||
|
||||
def set_multi_head_attention(spec, weights, scope, self_attention=False):
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
||||
set_linear(split_layers[0], weights, scope, "q")
|
||||
set_linear(split_layers[1], weights, scope, "k")
|
||||
set_linear(split_layers[2], weights, scope, "v")
|
||||
|
||||
if self_attention:
|
||||
utils.fuse_linear(spec.linear[0], split_layers)
|
||||
else:
|
||||
spec.linear[0].weight = split_layers[0].weight
|
||||
spec.linear[0].bias = split_layers[0].bias
|
||||
utils.fuse_linear(spec.linear[1], split_layers[1:])
|
||||
|
||||
set_linear(spec.linear[-1], weights, scope, "o")
|
||||
set_layer_norm_auto(spec.layer_norm, weights, "%s_Wo" % scope)
|
||||
|
||||
|
||||
def set_ffn(spec, weights, scope):
|
||||
set_layer_norm_auto(spec.layer_norm, weights, "%s_ffn" % scope)
|
||||
set_linear(spec.linear_0, weights, scope, "1")
|
||||
set_linear(spec.linear_1, weights, scope, "2")
|
||||
|
||||
|
||||
def set_layer_norm_auto(spec, weights, scope):
|
||||
try:
|
||||
set_layer_norm(spec, weights, scope, pre_norm=True)
|
||||
except KeyError:
|
||||
set_layer_norm(spec, weights, scope)
|
||||
|
||||
|
||||
def set_layer_norm(spec, weights, scope, pre_norm=False):
|
||||
suffix = "_pre" if pre_norm else ""
|
||||
spec.gamma = weights["%s_ln_scale%s" % (scope, suffix)].squeeze()
|
||||
spec.beta = weights["%s_ln_bias%s" % (scope, suffix)].squeeze()
|
||||
|
||||
|
||||
def set_linear(spec, weights, scope, suffix="", reuse_weight=None):
|
||||
weight = weights.get("%s_W%s" % (scope, suffix))
|
||||
|
||||
if weight is None:
|
||||
weight = weights.get("%s_Wt%s" % (scope, suffix), reuse_weight)
|
||||
else:
|
||||
weight = weight.transpose()
|
||||
|
||||
spec.weight = weight
|
||||
|
||||
bias = weights.get("%s_b%s" % (scope, suffix))
|
||||
if bias is not None:
|
||||
spec.bias = bias.squeeze()
|
||||
|
||||
|
||||
def set_embeddings(spec, weights, scope):
|
||||
spec.weight = weights.get("%s_Wemb" % scope)
|
||||
if spec.weight is None:
|
||||
spec.weight = weights.get("Wemb")
|
||||
|
||||
|
||||
def set_position_encodings(spec, weights, dim=None):
|
||||
spec.encodings = weights.get("Wpos", _make_sinusoidal_position_encodings(dim))
|
||||
|
||||
|
||||
def _make_sinusoidal_position_encodings(dim, num_positions=2048):
|
||||
positions = np.arange(num_positions)
|
||||
timescales = np.power(10000, 2 * (np.arange(dim) // 2) / dim)
|
||||
position_enc = np.expand_dims(positions, 1) / np.expand_dims(timescales, 0)
|
||||
table = np.zeros_like(position_enc)
|
||||
table[:, : dim // 2] = np.sin(position_enc[:, 0::2])
|
||||
table[:, dim // 2 :] = np.cos(position_enc[:, 1::2])
|
||||
return table
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_path", required=True, help="Path to the model .npz file."
|
||||
)
|
||||
parser.add_argument(
|
||||
"--vocab_paths",
|
||||
required=True,
|
||||
nargs="+",
|
||||
help="List of paths to the YAML vocabularies.",
|
||||
)
|
||||
Converter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
converter = MarianConverter(args.model_path, args.vocab_paths)
|
||||
converter.convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,95 @@
|
||||
import argparse
|
||||
import json
|
||||
import os
|
||||
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.specs import common_spec, model_spec, transformer_spec
|
||||
|
||||
|
||||
class OpenAIGPT2Converter(Converter):
|
||||
"""Converts GPT-2 models from https://github.com/openai/gpt-2."""
|
||||
|
||||
def __init__(self, model_dir: str):
|
||||
"""Initializes the OpenAI GPT-2 converter.
|
||||
|
||||
Arguments:
|
||||
model_dir: Path to the OpenAI GPT-2 model directory.
|
||||
"""
|
||||
self._model_dir = model_dir
|
||||
|
||||
def _load(self):
|
||||
import tensorflow as tf
|
||||
|
||||
reader = tf.train.load_checkpoint(self._model_dir)
|
||||
weights = {
|
||||
name: reader.get_tensor(name)
|
||||
for name in reader.get_variable_to_shape_map().keys()
|
||||
}
|
||||
|
||||
with open(os.path.join(self._model_dir, "hparams.json")) as hparams_file:
|
||||
hparams = json.load(hparams_file)
|
||||
with open(os.path.join(self._model_dir, "encoder.json")) as vocab_file:
|
||||
vocab = json.load(vocab_file)
|
||||
vocab = [
|
||||
token
|
||||
for token, index in sorted(vocab.items(), key=lambda item: item[1])
|
||||
]
|
||||
|
||||
spec = transformer_spec.TransformerDecoderModelSpec.from_config(
|
||||
hparams["n_layer"],
|
||||
hparams["n_head"],
|
||||
pre_norm=True,
|
||||
activation=common_spec.Activation.GELUTanh,
|
||||
)
|
||||
set_decoder(spec.decoder, weights, "model")
|
||||
spec.unk_token = "<|endoftext|>"
|
||||
spec.bos_token = "<|endoftext|>"
|
||||
spec.eos_token = "<|endoftext|>"
|
||||
spec.register_vocabulary(vocab)
|
||||
return spec
|
||||
|
||||
|
||||
def set_decoder(spec, weights, scope):
|
||||
spec.embeddings.weight = weights["%s/wte" % scope]
|
||||
spec.position_encodings.encodings = weights["%s/wpe" % scope]
|
||||
spec.scale_embeddings = False
|
||||
spec.projection.weight = spec.embeddings.weight
|
||||
set_layer_norm(spec.layer_norm, weights, "%s/ln_f" % scope)
|
||||
for i, layer_spec in enumerate(spec.layer):
|
||||
set_layer(layer_spec, weights, "%s/h%d" % (scope, i))
|
||||
|
||||
|
||||
def set_layer_norm(spec, weights, scope):
|
||||
spec.gamma = weights["%s/g" % scope]
|
||||
spec.beta = weights["%s/b" % scope]
|
||||
|
||||
|
||||
def set_linear(spec, weights, scope):
|
||||
spec.weight = weights["%s/w" % scope].squeeze().transpose()
|
||||
spec.bias = weights["%s/b" % scope]
|
||||
|
||||
|
||||
def set_layer(spec, weights, scope):
|
||||
set_layer_norm(spec.self_attention.layer_norm, weights, "%s/ln_1" % scope)
|
||||
set_linear(spec.self_attention.linear[0], weights, "%s/attn/c_attn" % scope)
|
||||
set_linear(spec.self_attention.linear[1], weights, "%s/attn/c_proj" % scope)
|
||||
set_layer_norm(spec.ffn.layer_norm, weights, "%s/ln_2" % scope)
|
||||
set_linear(spec.ffn.linear_0, weights, "%s/mlp/c_fc" % scope)
|
||||
set_linear(spec.ffn.linear_1, weights, "%s/mlp/c_proj" % scope)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_dir", required=True, help="Path to the model directory."
|
||||
)
|
||||
Converter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
converter = OpenAIGPT2Converter(args.model_dir)
|
||||
converter.convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,359 @@
|
||||
import argparse
|
||||
|
||||
from ctranslate2.converters import utils
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.specs import common_spec, transformer_spec
|
||||
|
||||
_SUPPORTED_ACTIVATIONS = {
|
||||
"gelu": common_spec.Activation.GELU,
|
||||
"fast_gelu": common_spec.Activation.GELUTanh,
|
||||
"relu": common_spec.Activation.RELU,
|
||||
"silu": common_spec.Activation.SWISH,
|
||||
}
|
||||
|
||||
_SUPPORTED_FEATURES_MERGE = {
|
||||
"concat": common_spec.EmbeddingsMerge.CONCAT,
|
||||
"sum": common_spec.EmbeddingsMerge.ADD,
|
||||
}
|
||||
|
||||
|
||||
def check_opt(opt, num_source_embeddings):
|
||||
with_relative_position = getattr(opt, "max_relative_positions", 0) > 0
|
||||
with_rotary = getattr(opt, "max_relative_positions", 0) == -1
|
||||
with_alibi = getattr(opt, "max_relative_positions", 0) == -2
|
||||
activation_fn = getattr(opt, "pos_ffn_activation_fn", "relu")
|
||||
feat_merge = getattr(opt, "feat_merge", "concat")
|
||||
self_attn_type = getattr(opt, "self_attn_type", "scaled-dot")
|
||||
|
||||
check = utils.ConfigurationChecker()
|
||||
check(
|
||||
opt.encoder_type == opt.decoder_type
|
||||
and opt.decoder_type in {"transformer", "transformer_lm"},
|
||||
"Options --encoder_type and --decoder_type must be"
|
||||
" 'transformer' or 'transformer_lm",
|
||||
)
|
||||
check(
|
||||
self_attn_type == "scaled-dot",
|
||||
"Option --self_attn_type %s is not supported (supported values are: scaled-dot)"
|
||||
% self_attn_type,
|
||||
)
|
||||
check(
|
||||
activation_fn in _SUPPORTED_ACTIVATIONS,
|
||||
"Option --pos_ffn_activation_fn %s is not supported (supported activations are: %s)"
|
||||
% (activation_fn, ", ".join(_SUPPORTED_ACTIVATIONS.keys())),
|
||||
)
|
||||
check(
|
||||
opt.position_encoding != (with_relative_position or with_rotary or with_alibi),
|
||||
"Options --position_encoding and --max_relative_positions cannot be both enabled "
|
||||
"or both disabled",
|
||||
)
|
||||
check(
|
||||
num_source_embeddings == 1 or feat_merge in _SUPPORTED_FEATURES_MERGE,
|
||||
"Option --feat_merge %s is not supported (supported merge modes are: %s)"
|
||||
% (feat_merge, " ".join(_SUPPORTED_FEATURES_MERGE.keys())),
|
||||
)
|
||||
check.validate()
|
||||
|
||||
|
||||
def _get_model_spec_seq2seq(
|
||||
opt, variables, src_vocabs, tgt_vocabs, num_source_embeddings
|
||||
):
|
||||
"""Creates a model specification from the model options."""
|
||||
with_relative_position = getattr(opt, "max_relative_positions", 0) > 0
|
||||
activation_fn = getattr(opt, "pos_ffn_activation_fn", "relu")
|
||||
feat_merge = getattr(opt, "feat_merge", "concat")
|
||||
|
||||
# Return the first head of the last layer unless the model was trained with alignments.
|
||||
if getattr(opt, "lambda_align", 0) == 0:
|
||||
alignment_layer = -1
|
||||
alignment_heads = 1
|
||||
else:
|
||||
alignment_layer = opt.alignment_layer
|
||||
alignment_heads = opt.alignment_heads
|
||||
|
||||
num_heads = getattr(opt, "heads", 8)
|
||||
|
||||
model_spec = transformer_spec.TransformerSpec.from_config(
|
||||
(opt.enc_layers, opt.dec_layers),
|
||||
num_heads,
|
||||
with_relative_position=with_relative_position,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_fn],
|
||||
alignment_layer=alignment_layer,
|
||||
alignment_heads=alignment_heads,
|
||||
num_source_embeddings=num_source_embeddings,
|
||||
embeddings_merge=_SUPPORTED_FEATURES_MERGE[feat_merge],
|
||||
multi_query_attention=getattr(opt, "multiquery", False),
|
||||
)
|
||||
|
||||
model_spec.config.decoder_start_token = getattr(opt, "decoder_start_token", "<s>")
|
||||
|
||||
set_transformer_spec(model_spec, variables)
|
||||
for src_vocab in src_vocabs:
|
||||
model_spec.register_source_vocabulary(src_vocab)
|
||||
for tgt_vocab in tgt_vocabs:
|
||||
model_spec.register_target_vocabulary(tgt_vocab)
|
||||
|
||||
return model_spec
|
||||
|
||||
|
||||
def _get_model_spec_lm(opt, variables, src_vocabs, tgt_vocabs, num_source_embeddings):
|
||||
"""Creates a model specification from the model options."""
|
||||
with_relative_position = getattr(opt, "max_relative_positions", 0) > 0
|
||||
with_rotary = getattr(opt, "max_relative_positions", 0) == -1
|
||||
with_alibi = getattr(opt, "max_relative_positions", 0) == -2
|
||||
activation_fn = getattr(opt, "pos_ffn_activation_fn", "relu")
|
||||
num_heads = getattr(opt, "heads", 8)
|
||||
num_kv = getattr(opt, "num_kv", 0)
|
||||
if num_kv == num_heads or num_kv == 0:
|
||||
num_kv = None
|
||||
rotary_dim = 0 if with_rotary else None
|
||||
rotary_interleave = getattr(opt, "rotary_interleave", True)
|
||||
ffn_glu = activation_fn == "silu"
|
||||
sliding_window = getattr(opt, "sliding_window", 0)
|
||||
|
||||
model_spec = transformer_spec.TransformerDecoderModelSpec.from_config(
|
||||
opt.dec_layers,
|
||||
num_heads,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_fn],
|
||||
ffn_glu=ffn_glu,
|
||||
with_relative_position=with_relative_position,
|
||||
alibi=with_alibi,
|
||||
rms_norm=opt.layer_norm == "rms",
|
||||
rotary_dim=rotary_dim,
|
||||
rotary_interleave=rotary_interleave,
|
||||
multi_query_attention=getattr(opt, "multiquery", False),
|
||||
num_heads_kv=num_kv,
|
||||
sliding_window=sliding_window,
|
||||
)
|
||||
|
||||
model_spec.config.layer_norm_epsilon = getattr(opt, "norm_eps", 1e-6)
|
||||
|
||||
set_transformer_decoder(
|
||||
model_spec.decoder,
|
||||
variables,
|
||||
with_encoder_attention=False,
|
||||
)
|
||||
|
||||
for tgt_vocab in tgt_vocabs:
|
||||
model_spec.register_vocabulary(tgt_vocab)
|
||||
|
||||
return model_spec
|
||||
|
||||
|
||||
def get_vocabs(vocab):
|
||||
if isinstance(vocab, dict) and "src" in vocab:
|
||||
if isinstance(vocab["src"], list):
|
||||
src_vocabs = [vocab["src"]]
|
||||
tgt_vocabs = [vocab["tgt"]]
|
||||
|
||||
src_feats = vocab.get("src_feats")
|
||||
if src_feats is not None:
|
||||
src_vocabs.extend(src_feats.values())
|
||||
else:
|
||||
src_vocabs = [field[1].vocab.itos for field in vocab["src"].fields]
|
||||
tgt_vocabs = [field[1].vocab.itos for field in vocab["tgt"].fields]
|
||||
else:
|
||||
# Compatibility with older models.
|
||||
src_vocabs = [vocab[0][1].itos]
|
||||
tgt_vocabs = [vocab[1][1].itos]
|
||||
|
||||
return src_vocabs, tgt_vocabs
|
||||
|
||||
|
||||
class OpenNMTPyConverter(Converter):
|
||||
"""Converts models generated by OpenNMT-py."""
|
||||
|
||||
def __init__(self, model_path: str):
|
||||
"""Initializes the OpenNMT-py converter.
|
||||
|
||||
Arguments:
|
||||
model_path: Path to the OpenNMT-py PyTorch model (.pt file).
|
||||
"""
|
||||
self._model_path = model_path
|
||||
|
||||
def _load(self):
|
||||
import torch
|
||||
|
||||
checkpoint = torch.load(self._model_path, map_location="cpu")
|
||||
|
||||
src_vocabs, tgt_vocabs = get_vocabs(checkpoint["vocab"])
|
||||
|
||||
check_opt(checkpoint["opt"], num_source_embeddings=len(src_vocabs))
|
||||
|
||||
variables = checkpoint["model"]
|
||||
variables.update(
|
||||
{
|
||||
"generator.%s" % key: value
|
||||
for key, value in checkpoint["generator"].items()
|
||||
}
|
||||
)
|
||||
|
||||
if checkpoint["opt"].decoder_type == "transformer_lm":
|
||||
return _get_model_spec_lm(
|
||||
checkpoint["opt"],
|
||||
variables,
|
||||
src_vocabs,
|
||||
tgt_vocabs,
|
||||
num_source_embeddings=len(src_vocabs),
|
||||
)
|
||||
else:
|
||||
return _get_model_spec_seq2seq(
|
||||
checkpoint["opt"],
|
||||
variables,
|
||||
src_vocabs,
|
||||
tgt_vocabs,
|
||||
num_source_embeddings=len(src_vocabs),
|
||||
)
|
||||
|
||||
|
||||
def set_transformer_spec(spec, variables):
|
||||
set_transformer_encoder(spec.encoder, variables)
|
||||
set_transformer_decoder(spec.decoder, variables)
|
||||
|
||||
|
||||
def set_transformer_encoder(spec, variables):
|
||||
set_input_layers(spec, variables, "encoder")
|
||||
set_layer_norm(spec.layer_norm, variables, "encoder.layer_norm")
|
||||
for i, layer in enumerate(spec.layer):
|
||||
set_transformer_encoder_layer(layer, variables, "encoder.transformer.%d" % i)
|
||||
|
||||
|
||||
def set_transformer_decoder(spec, variables, with_encoder_attention=True):
|
||||
set_input_layers(spec, variables, "decoder")
|
||||
set_layer_norm(spec.layer_norm, variables, "decoder.layer_norm")
|
||||
for i, layer in enumerate(spec.layer):
|
||||
set_transformer_decoder_layer(
|
||||
layer,
|
||||
variables,
|
||||
"decoder.transformer_layers.%d" % i,
|
||||
with_encoder_attention=with_encoder_attention,
|
||||
)
|
||||
|
||||
try:
|
||||
set_linear(spec.projection, variables, "generator")
|
||||
except KeyError:
|
||||
# Compatibility when the generator was a nn.Sequential module.
|
||||
set_linear(spec.projection, variables, "generator.0")
|
||||
|
||||
|
||||
def set_input_layers(spec, variables, scope):
|
||||
if hasattr(spec, "position_encodings"):
|
||||
set_position_encodings(
|
||||
spec.position_encodings,
|
||||
variables,
|
||||
"%s.embeddings.make_embedding.pe" % scope,
|
||||
)
|
||||
else:
|
||||
# See https://github.com/OpenNMT/OpenNMT-py/issues/1722
|
||||
spec.scale_embeddings = False
|
||||
|
||||
embeddings_specs = spec.embeddings
|
||||
if not isinstance(embeddings_specs, list):
|
||||
embeddings_specs = [embeddings_specs]
|
||||
|
||||
for i, embeddings_spec in enumerate(embeddings_specs):
|
||||
set_embeddings(
|
||||
embeddings_spec,
|
||||
variables,
|
||||
"%s.embeddings.make_embedding.emb_luts.%d" % (scope, i),
|
||||
)
|
||||
|
||||
|
||||
def set_transformer_encoder_layer(spec, variables, scope):
|
||||
set_ffn(spec.ffn, variables, "%s.feed_forward" % scope)
|
||||
set_multi_head_attention(
|
||||
spec.self_attention,
|
||||
variables,
|
||||
"%s.self_attn" % scope,
|
||||
self_attention=True,
|
||||
)
|
||||
set_layer_norm(spec.self_attention.layer_norm, variables, "%s.layer_norm" % scope)
|
||||
|
||||
|
||||
def set_transformer_decoder_layer(spec, variables, scope, with_encoder_attention=True):
|
||||
set_ffn(spec.ffn, variables, "%s.feed_forward" % scope)
|
||||
set_multi_head_attention(
|
||||
spec.self_attention,
|
||||
variables,
|
||||
"%s.self_attn" % scope,
|
||||
self_attention=True,
|
||||
)
|
||||
set_layer_norm(spec.self_attention.layer_norm, variables, "%s.layer_norm_1" % scope)
|
||||
if with_encoder_attention:
|
||||
set_multi_head_attention(spec.attention, variables, "%s.context_attn" % scope)
|
||||
set_layer_norm(spec.attention.layer_norm, variables, "%s.layer_norm_2" % scope)
|
||||
|
||||
|
||||
def set_ffn(spec, variables, scope):
|
||||
set_layer_norm(spec.layer_norm, variables, "%s.layer_norm" % scope)
|
||||
set_linear(spec.linear_0, variables, "%s.w_1" % scope)
|
||||
set_linear(spec.linear_1, variables, "%s.w_2" % scope)
|
||||
if hasattr(spec, "linear_0_noact"):
|
||||
set_linear(spec.linear_0_noact, variables, "%s.w_3" % scope)
|
||||
|
||||
|
||||
def set_multi_head_attention(spec, variables, scope, self_attention=False):
|
||||
if self_attention:
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
||||
set_linear(split_layers[0], variables, "%s.linear_query" % scope)
|
||||
set_linear(split_layers[1], variables, "%s.linear_keys" % scope)
|
||||
set_linear(split_layers[2], variables, "%s.linear_values" % scope)
|
||||
utils.fuse_linear(spec.linear[0], split_layers)
|
||||
else:
|
||||
set_linear(spec.linear[0], variables, "%s.linear_query" % scope)
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(2)]
|
||||
set_linear(split_layers[0], variables, "%s.linear_keys" % scope)
|
||||
set_linear(split_layers[1], variables, "%s.linear_values" % scope)
|
||||
utils.fuse_linear(spec.linear[1], split_layers)
|
||||
set_linear(spec.linear[-1], variables, "%s.final_linear" % scope)
|
||||
if hasattr(spec, "relative_position_keys"):
|
||||
spec.relative_position_keys = _get_variable(
|
||||
variables, "%s.relative_positions_embeddings.weight" % scope
|
||||
)
|
||||
spec.relative_position_values = spec.relative_position_keys
|
||||
|
||||
|
||||
def set_layer_norm(spec, variables, scope):
|
||||
try:
|
||||
spec.gamma = _get_variable(variables, "%s.weight" % scope)
|
||||
except KeyError:
|
||||
# Compatibility with older models using a custom LayerNorm module.
|
||||
spec.gamma = _get_variable(variables, "%s.a_2" % scope)
|
||||
spec.beta = _get_variable(variables, "%s.b_2" % scope)
|
||||
try:
|
||||
spec.beta = _get_variable(variables, "%s.bias" % scope)
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
|
||||
def set_linear(spec, variables, scope):
|
||||
spec.weight = _get_variable(variables, "%s.weight" % scope)
|
||||
bias = variables.get("%s.bias" % scope)
|
||||
if bias is not None:
|
||||
spec.bias = bias
|
||||
|
||||
|
||||
def set_embeddings(spec, variables, scope):
|
||||
spec.weight = _get_variable(variables, "%s.weight" % scope)
|
||||
|
||||
|
||||
def set_position_encodings(spec, variables, scope):
|
||||
spec.encodings = _get_variable(variables, "%s.pe" % scope).squeeze()
|
||||
|
||||
|
||||
def _get_variable(variables, name):
|
||||
return variables[name]
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument("--model_path", required=True, help="Model path.")
|
||||
Converter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
OpenNMTPyConverter(args.model_path).convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,455 @@
|
||||
import argparse
|
||||
import copy
|
||||
import os
|
||||
|
||||
from typing import Optional, Union
|
||||
|
||||
from ctranslate2.converters import utils
|
||||
from ctranslate2.converters.converter import Converter
|
||||
from ctranslate2.specs import common_spec, transformer_spec
|
||||
|
||||
_SUPPORTED_ACTIVATIONS = {
|
||||
"gelu": common_spec.Activation.GELUTanh,
|
||||
"relu": common_spec.Activation.RELU,
|
||||
"swish": common_spec.Activation.SWISH,
|
||||
}
|
||||
|
||||
|
||||
class OpenNMTTFConverter(Converter):
|
||||
"""Converts OpenNMT-tf models."""
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls,
|
||||
config: Union[str, dict],
|
||||
auto_config: bool = False,
|
||||
checkpoint_path: Optional[str] = None,
|
||||
model: Optional[str] = None,
|
||||
):
|
||||
"""Creates the converter from the configuration.
|
||||
|
||||
Arguments:
|
||||
config: Path to the YAML configuration, or a dictionary with the loaded configuration.
|
||||
auto_config: Whether the model automatic configuration values should be used.
|
||||
checkpoint_path: Path to the checkpoint or checkpoint directory to load. If not set,
|
||||
the latest checkpoint from the model directory is loaded.
|
||||
model: If the model instance cannot be resolved from the model directory, this argument
|
||||
can be set to either the name of the model in the catalog or the path to the model
|
||||
configuration.
|
||||
|
||||
Returns:
|
||||
A :class:`ctranslate2.converters.OpenNMTTFConverter` instance.
|
||||
"""
|
||||
from opennmt import config as config_util
|
||||
from opennmt.utils.checkpoint import Checkpoint
|
||||
|
||||
if isinstance(config, str):
|
||||
config = config_util.load_config([config])
|
||||
else:
|
||||
config = copy.deepcopy(config)
|
||||
|
||||
if model is None:
|
||||
model = config_util.load_model(config["model_dir"])
|
||||
elif os.path.exists(model):
|
||||
model = config_util.load_model_from_file(model)
|
||||
else:
|
||||
model = config_util.load_model_from_catalog(model)
|
||||
|
||||
if auto_config:
|
||||
config_util.merge_config(config, model.auto_config())
|
||||
|
||||
data_config = config_util.try_prefix_paths(config["model_dir"], config["data"])
|
||||
model.initialize(data_config)
|
||||
|
||||
checkpoint = Checkpoint.from_config(config, model)
|
||||
checkpoint_path = checkpoint.restore(checkpoint_path=checkpoint_path)
|
||||
if checkpoint_path is None:
|
||||
raise RuntimeError("No checkpoint was restored")
|
||||
|
||||
model.create_variables()
|
||||
return cls(model)
|
||||
|
||||
def __init__(self, model):
|
||||
"""Initializes the converter.
|
||||
|
||||
Arguments:
|
||||
model: An initialized and fully-built ``opennmt.models.Model`` instance.
|
||||
"""
|
||||
self._model = model
|
||||
|
||||
def _load(self):
|
||||
import opennmt
|
||||
|
||||
if isinstance(self._model, opennmt.models.LanguageModel):
|
||||
spec_builder = TransformerDecoderSpecBuilder()
|
||||
else:
|
||||
spec_builder = TransformerSpecBuilder()
|
||||
|
||||
return spec_builder(self._model)
|
||||
|
||||
|
||||
class TransformerSpecBuilder:
|
||||
def __call__(self, model):
|
||||
import opennmt
|
||||
|
||||
check = utils.ConfigurationChecker()
|
||||
check(
|
||||
isinstance(model, opennmt.models.Transformer),
|
||||
"Only Transformer models are supported",
|
||||
)
|
||||
check.validate()
|
||||
|
||||
check(
|
||||
isinstance(model.encoder, opennmt.encoders.SelfAttentionEncoder),
|
||||
"Parallel encoders are not supported",
|
||||
)
|
||||
check(
|
||||
isinstance(
|
||||
model.features_inputter,
|
||||
(opennmt.inputters.WordEmbedder, opennmt.inputters.ParallelInputter),
|
||||
),
|
||||
"Source inputter must be a WordEmbedder or a ParallelInputter",
|
||||
)
|
||||
check.validate()
|
||||
|
||||
mha = model.encoder.layers[0].self_attention.layer
|
||||
ffn = model.encoder.layers[0].ffn.layer
|
||||
with_relative_position = mha.maximum_relative_position is not None
|
||||
activation_name = ffn.inner.activation.__name__
|
||||
|
||||
check(
|
||||
activation_name in _SUPPORTED_ACTIVATIONS,
|
||||
"Activation %s is not supported (supported activations are: %s)"
|
||||
% (activation_name, ", ".join(_SUPPORTED_ACTIVATIONS.keys())),
|
||||
)
|
||||
check(
|
||||
with_relative_position != bool(model.encoder.position_encoder),
|
||||
"Relative position representation and position encoding cannot be both enabled "
|
||||
"or both disabled",
|
||||
)
|
||||
check(
|
||||
model.decoder.attention_reduction
|
||||
!= opennmt.layers.MultiHeadAttentionReduction.AVERAGE_ALL_LAYERS,
|
||||
"Averaging all multi-head attention matrices is not supported",
|
||||
)
|
||||
|
||||
source_inputters = _get_inputters(model.features_inputter)
|
||||
target_inputters = _get_inputters(model.labels_inputter)
|
||||
num_source_embeddings = len(source_inputters)
|
||||
if num_source_embeddings == 1:
|
||||
embeddings_merge = common_spec.EmbeddingsMerge.CONCAT
|
||||
else:
|
||||
reducer = model.features_inputter.reducer
|
||||
embeddings_merge = None
|
||||
if reducer is not None:
|
||||
if isinstance(reducer, opennmt.layers.ConcatReducer):
|
||||
embeddings_merge = common_spec.EmbeddingsMerge.CONCAT
|
||||
elif isinstance(reducer, opennmt.layers.SumReducer):
|
||||
embeddings_merge = common_spec.EmbeddingsMerge.ADD
|
||||
|
||||
check(
|
||||
all(
|
||||
isinstance(inputter, opennmt.inputters.WordEmbedder)
|
||||
for inputter in source_inputters
|
||||
),
|
||||
"All source inputters must WordEmbedders",
|
||||
)
|
||||
check(
|
||||
embeddings_merge is not None,
|
||||
"Unsupported embeddings reducer %s" % reducer,
|
||||
)
|
||||
|
||||
alignment_layer = -1
|
||||
alignment_heads = 1
|
||||
if (
|
||||
model.decoder.attention_reduction
|
||||
== opennmt.layers.MultiHeadAttentionReduction.AVERAGE_LAST_LAYER
|
||||
):
|
||||
alignment_heads = 0
|
||||
|
||||
check.validate()
|
||||
|
||||
encoder_spec = transformer_spec.TransformerEncoderSpec(
|
||||
len(model.encoder.layers),
|
||||
model.encoder.layers[0].self_attention.layer.num_heads,
|
||||
pre_norm=model.encoder.layer_norm is not None,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_name],
|
||||
num_source_embeddings=num_source_embeddings,
|
||||
embeddings_merge=embeddings_merge,
|
||||
relative_position=with_relative_position,
|
||||
)
|
||||
|
||||
decoder_spec = transformer_spec.TransformerDecoderSpec(
|
||||
len(model.decoder.layers),
|
||||
model.decoder.layers[0].self_attention.layer.num_heads,
|
||||
pre_norm=model.decoder.layer_norm is not None,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_name],
|
||||
relative_position=with_relative_position,
|
||||
alignment_layer=alignment_layer,
|
||||
alignment_heads=alignment_heads,
|
||||
)
|
||||
|
||||
spec = transformer_spec.TransformerSpec(encoder_spec, decoder_spec)
|
||||
|
||||
spec.config.add_source_bos = bool(source_inputters[0].mark_start)
|
||||
spec.config.add_source_eos = bool(source_inputters[0].mark_end)
|
||||
for inputter in source_inputters:
|
||||
spec.register_source_vocabulary(_load_vocab(inputter.vocabulary_file))
|
||||
for inputter in target_inputters:
|
||||
spec.register_target_vocabulary(_load_vocab(inputter.vocabulary_file))
|
||||
|
||||
self.set_transformer_encoder(
|
||||
spec.encoder,
|
||||
model.encoder,
|
||||
model.features_inputter,
|
||||
)
|
||||
self.set_transformer_decoder(
|
||||
spec.decoder,
|
||||
model.decoder,
|
||||
model.labels_inputter,
|
||||
)
|
||||
|
||||
return spec
|
||||
|
||||
def set_transformer_encoder(self, spec, module, inputter):
|
||||
for embedding_spec, inputter in zip(spec.embeddings, _get_inputters(inputter)):
|
||||
self.set_embeddings(embedding_spec, inputter)
|
||||
if module.position_encoder is not None:
|
||||
self.set_position_encodings(
|
||||
spec.position_encodings,
|
||||
module.position_encoder,
|
||||
)
|
||||
|
||||
for layer_spec, layer in zip(spec.layer, module.layers):
|
||||
self.set_multi_head_attention(
|
||||
layer_spec.self_attention,
|
||||
layer.self_attention,
|
||||
self_attention=True,
|
||||
)
|
||||
|
||||
self.set_ffn(layer_spec.ffn, layer.ffn)
|
||||
|
||||
if module.layer_norm is not None:
|
||||
self.set_layer_norm(spec.layer_norm, module.layer_norm)
|
||||
|
||||
def set_transformer_decoder(self, spec, module, inputter):
|
||||
self.set_embeddings(spec.embeddings, inputter)
|
||||
if module.position_encoder is not None:
|
||||
self.set_position_encodings(
|
||||
spec.position_encodings,
|
||||
module.position_encoder,
|
||||
)
|
||||
|
||||
for layer_spec, layer in zip(spec.layer, module.layers):
|
||||
self.set_multi_head_attention(
|
||||
layer_spec.self_attention,
|
||||
layer.self_attention,
|
||||
self_attention=True,
|
||||
)
|
||||
|
||||
if layer.attention:
|
||||
self.set_multi_head_attention(
|
||||
layer_spec.attention,
|
||||
layer.attention[0],
|
||||
self_attention=False,
|
||||
)
|
||||
|
||||
self.set_ffn(layer_spec.ffn, layer.ffn)
|
||||
|
||||
if module.layer_norm is not None:
|
||||
self.set_layer_norm(spec.layer_norm, module.layer_norm)
|
||||
|
||||
self.set_linear(spec.projection, module.output_layer)
|
||||
|
||||
def set_ffn(self, spec, module):
|
||||
self.set_linear(spec.linear_0, module.layer.inner)
|
||||
self.set_linear(spec.linear_1, module.layer.outer)
|
||||
self.set_layer_norm_from_wrapper(spec.layer_norm, module)
|
||||
|
||||
def set_multi_head_attention(self, spec, module, self_attention=False):
|
||||
split_layers = [common_spec.LinearSpec() for _ in range(3)]
|
||||
self.set_linear(split_layers[0], module.layer.linear_queries)
|
||||
self.set_linear(split_layers[1], module.layer.linear_keys)
|
||||
self.set_linear(split_layers[2], module.layer.linear_values)
|
||||
|
||||
if self_attention:
|
||||
utils.fuse_linear(spec.linear[0], split_layers)
|
||||
if module.layer.maximum_relative_position is not None:
|
||||
spec.relative_position_keys = (
|
||||
module.layer.relative_position_keys.numpy()
|
||||
)
|
||||
spec.relative_position_values = (
|
||||
module.layer.relative_position_values.numpy()
|
||||
)
|
||||
else:
|
||||
utils.fuse_linear(spec.linear[0], split_layers[:1])
|
||||
utils.fuse_linear(spec.linear[1], split_layers[1:])
|
||||
|
||||
self.set_linear(spec.linear[-1], module.layer.linear_output)
|
||||
self.set_layer_norm_from_wrapper(spec.layer_norm, module)
|
||||
|
||||
def set_layer_norm_from_wrapper(self, spec, module):
|
||||
self.set_layer_norm(
|
||||
spec,
|
||||
(
|
||||
module.output_layer_norm
|
||||
if module.input_layer_norm is None
|
||||
else module.input_layer_norm
|
||||
),
|
||||
)
|
||||
|
||||
def set_layer_norm(self, spec, module):
|
||||
spec.gamma = module.gamma.numpy()
|
||||
spec.beta = module.beta.numpy()
|
||||
|
||||
def set_linear(self, spec, module):
|
||||
spec.weight = module.kernel.numpy()
|
||||
if not module.transpose:
|
||||
spec.weight = spec.weight.transpose()
|
||||
if module.bias is not None:
|
||||
spec.bias = module.bias.numpy()
|
||||
|
||||
def set_embeddings(self, spec, module):
|
||||
spec.weight = module.embedding.numpy()
|
||||
|
||||
def set_position_encodings(self, spec, module):
|
||||
import opennmt
|
||||
|
||||
if isinstance(module, opennmt.layers.PositionEmbedder):
|
||||
spec.encodings = module.embedding.numpy()[1:]
|
||||
|
||||
|
||||
class TransformerDecoderSpecBuilder(TransformerSpecBuilder):
|
||||
def __call__(self, model):
|
||||
import opennmt
|
||||
|
||||
check = utils.ConfigurationChecker()
|
||||
check(
|
||||
isinstance(model.decoder, opennmt.decoders.SelfAttentionDecoder),
|
||||
"Only self-attention decoders are supported",
|
||||
)
|
||||
check.validate()
|
||||
|
||||
mha = model.decoder.layers[0].self_attention.layer
|
||||
ffn = model.decoder.layers[0].ffn.layer
|
||||
activation_name = ffn.inner.activation.__name__
|
||||
|
||||
check(
|
||||
activation_name in _SUPPORTED_ACTIVATIONS,
|
||||
"Activation %s is not supported (supported activations are: %s)"
|
||||
% (activation_name, ", ".join(_SUPPORTED_ACTIVATIONS.keys())),
|
||||
)
|
||||
check.validate()
|
||||
|
||||
spec = transformer_spec.TransformerDecoderModelSpec.from_config(
|
||||
len(model.decoder.layers),
|
||||
mha.num_heads,
|
||||
pre_norm=model.decoder.layer_norm is not None,
|
||||
activation=_SUPPORTED_ACTIVATIONS[activation_name],
|
||||
)
|
||||
|
||||
spec.register_vocabulary(_load_vocab(model.features_inputter.vocabulary_file))
|
||||
self.set_transformer_decoder(
|
||||
spec.decoder,
|
||||
model.decoder,
|
||||
model.features_inputter,
|
||||
)
|
||||
|
||||
return spec
|
||||
|
||||
|
||||
def _get_inputters(inputter):
|
||||
import opennmt
|
||||
|
||||
return (
|
||||
inputter.inputters
|
||||
if isinstance(inputter, opennmt.inputters.MultiInputter)
|
||||
else [inputter]
|
||||
)
|
||||
|
||||
|
||||
def _load_vocab(vocab, unk_token="<unk>"):
|
||||
import opennmt
|
||||
|
||||
if isinstance(vocab, opennmt.data.Vocab):
|
||||
tokens = list(vocab.words)
|
||||
elif isinstance(vocab, list):
|
||||
tokens = list(vocab)
|
||||
elif isinstance(vocab, str):
|
||||
tokens = opennmt.data.Vocab.from_file(vocab).words
|
||||
else:
|
||||
raise TypeError("Invalid vocabulary type")
|
||||
|
||||
if unk_token not in tokens:
|
||||
tokens.append(unk_token)
|
||||
return tokens
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument("--config", help="Path to the YAML configuration.")
|
||||
parser.add_argument(
|
||||
"--auto_config",
|
||||
action="store_true",
|
||||
help="Use the model automatic configuration values.",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_path",
|
||||
help=(
|
||||
"Path to the checkpoint or checkpoint directory to load. If not set, "
|
||||
"the latest checkpoint from the model directory is loaded."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_type",
|
||||
help=(
|
||||
"If the model instance cannot be resolved from the model directory, "
|
||||
"this argument can be set to either the name of the model in the catalog "
|
||||
"or the path to the model configuration."
|
||||
),
|
||||
)
|
||||
parser.add_argument(
|
||||
"--src_vocab",
|
||||
help="Path to the source vocabulary (required if no configuration is set).",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--tgt_vocab",
|
||||
help="Path to the target vocabulary (required if no configuration is set).",
|
||||
)
|
||||
Converter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
|
||||
config = args.config
|
||||
if not config:
|
||||
if not args.model_path or not args.src_vocab or not args.tgt_vocab:
|
||||
raise ValueError(
|
||||
"Options --model_path, --src_vocab, --tgt_vocab are required "
|
||||
"when a configuration is not set"
|
||||
)
|
||||
|
||||
model_dir = (
|
||||
args.model_path
|
||||
if os.path.isdir(args.model_path)
|
||||
else os.path.dirname(args.model_path)
|
||||
)
|
||||
config = {
|
||||
"model_dir": model_dir,
|
||||
"data": {
|
||||
"source_vocabulary": args.src_vocab,
|
||||
"target_vocabulary": args.tgt_vocab,
|
||||
},
|
||||
}
|
||||
|
||||
converter = OpenNMTTFConverter.from_config(
|
||||
config,
|
||||
auto_config=args.auto_config,
|
||||
checkpoint_path=args.model_path,
|
||||
model=args.model_type,
|
||||
)
|
||||
converter.convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,44 @@
|
||||
import argparse
|
||||
import os
|
||||
|
||||
import yaml
|
||||
|
||||
from ctranslate2.converters.marian import MarianConverter
|
||||
|
||||
|
||||
class OpusMTConverter(MarianConverter):
|
||||
"""Converts models trained with OPUS-MT."""
|
||||
|
||||
def __init__(self, model_dir: str):
|
||||
"""Initializes the OPUS-MT converter.
|
||||
|
||||
Arguments:
|
||||
model_dir: Path the OPUS-MT model directory.
|
||||
"""
|
||||
with open(
|
||||
os.path.join(model_dir, "decoder.yml"), encoding="utf-8"
|
||||
) as decoder_file:
|
||||
decoder_config = yaml.safe_load(decoder_file)
|
||||
|
||||
model_path = os.path.join(model_dir, decoder_config["models"][0])
|
||||
vocab_paths = [
|
||||
os.path.join(model_dir, path) for path in decoder_config["vocabs"]
|
||||
]
|
||||
super().__init__(model_path, vocab_paths)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(
|
||||
formatter_class=argparse.ArgumentDefaultsHelpFormatter
|
||||
)
|
||||
parser.add_argument(
|
||||
"--model_dir", required=True, help="Path to the OPUS-MT model directory."
|
||||
)
|
||||
OpusMTConverter.declare_arguments(parser)
|
||||
args = parser.parse_args()
|
||||
converter = OpusMTConverter(args.model_dir)
|
||||
converter.convert_from_args(args)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,127 @@
|
||||
import numpy as np
|
||||
|
||||
|
||||
def fuse_linear(spec, layers):
|
||||
if not layers:
|
||||
raise ValueError("Cannot fuse linear layers: at least one layer is required")
|
||||
|
||||
if isinstance(layers[0].weight, np.ndarray):
|
||||
concatenate = np.concatenate
|
||||
zeros = np.zeros
|
||||
else:
|
||||
import torch
|
||||
|
||||
concatenate = torch.cat
|
||||
zeros = torch.zeros
|
||||
|
||||
spec.weight = concatenate([layer.weight for layer in layers])
|
||||
|
||||
bias_dtype = None
|
||||
for layer in layers:
|
||||
if layer.has_bias():
|
||||
bias_dtype = layer.bias.dtype
|
||||
break
|
||||
|
||||
if bias_dtype is not None:
|
||||
spec.bias = concatenate(
|
||||
[
|
||||
(
|
||||
layer.bias
|
||||
if layer.has_bias()
|
||||
else zeros([layer.weight.shape[0]], dtype=bias_dtype)
|
||||
)
|
||||
for layer in layers
|
||||
]
|
||||
)
|
||||
|
||||
|
||||
def fuse_linear_prequant(spec, layers, axis):
|
||||
if not layers:
|
||||
raise ValueError("Cannot fuse linear layers: at least one layer is required")
|
||||
params = ["weight", "weight_scale", "weight_zero"]
|
||||
if isinstance(layers[0].weight, np.ndarray):
|
||||
concatenate = np.concatenate
|
||||
else:
|
||||
import torch
|
||||
|
||||
concatenate = torch.cat
|
||||
|
||||
for param in params:
|
||||
setattr(
|
||||
spec,
|
||||
param,
|
||||
concatenate([getattr(layer, param) for layer in layers], axis=axis),
|
||||
)
|
||||
|
||||
|
||||
def permute_for_sliced_rotary(weight, num_heads, rotary_dim=None):
|
||||
"""Permutes the weight to use the sliced rotary implementation."""
|
||||
if rotary_dim is not None:
|
||||
weight = weight.reshape(num_heads, weight.shape[0] // num_heads, -1)
|
||||
|
||||
rotary_weight = weight[:, :rotary_dim]
|
||||
rotary_weight = permute_for_sliced_rotary(
|
||||
rotary_weight.reshape(num_heads * rotary_dim, -1), num_heads
|
||||
).reshape(num_heads, rotary_dim, -1)
|
||||
|
||||
weight[:, :rotary_dim] = rotary_weight
|
||||
|
||||
return weight.reshape(-1, weight.shape[-1])
|
||||
|
||||
return (
|
||||
weight.reshape(num_heads, weight.shape[0] // num_heads // 2, 2, weight.shape[1])
|
||||
.swapaxes(1, 2)
|
||||
.reshape(weight.shape[0], weight.shape[1])
|
||||
)
|
||||
|
||||
|
||||
def smooth_activation(layer_norm, linear, activation_scales):
|
||||
"""Applies the activation smoothing technique described in
|
||||
https://github.com/mit-han-lab/smoothquant.
|
||||
"""
|
||||
if not isinstance(linear.weight, np.ndarray):
|
||||
linear_weight = linear.weight.numpy()
|
||||
activation_scales = activation_scales.numpy()
|
||||
else:
|
||||
linear_weight = linear.weight
|
||||
|
||||
weight_scales = np.amax(np.absolute(linear_weight), axis=0)
|
||||
weight_scales = np.maximum(weight_scales, 1e-5)
|
||||
|
||||
activation_scales = activation_scales.astype(weight_scales.dtype)
|
||||
|
||||
scales = np.sqrt(activation_scales / weight_scales)
|
||||
scales = np.maximum(scales, 1e-5)
|
||||
|
||||
if not isinstance(linear.weight, np.ndarray):
|
||||
import torch
|
||||
|
||||
scales = torch.from_numpy(scales)
|
||||
|
||||
layer_norm.gamma /= scales
|
||||
layer_norm.beta /= scales
|
||||
|
||||
linear.weight *= scales.reshape(1, -1)
|
||||
|
||||
|
||||
def raise_unsupported(reasons):
|
||||
message = (
|
||||
"The model you are trying to convert is not supported by CTranslate2. "
|
||||
"We identified the following reasons:\n"
|
||||
)
|
||||
for reason in reasons:
|
||||
message += "\n- " + reason
|
||||
raise ValueError(message)
|
||||
|
||||
|
||||
class ConfigurationChecker:
|
||||
def __init__(self):
|
||||
self._unsupported_reasons = []
|
||||
|
||||
def __call__(self, assert_condition, error_message):
|
||||
if not assert_condition:
|
||||
self._unsupported_reasons.append(error_message)
|
||||
|
||||
def validate(self):
|
||||
if self._unsupported_reasons:
|
||||
raise_unsupported(self._unsupported_reasons)
|
||||
585
venv/lib/python3.12/site-packages/ctranslate2/extensions.py
Normal file
585
venv/lib/python3.12/site-packages/ctranslate2/extensions.py
Normal file
@@ -0,0 +1,585 @@
|
||||
import asyncio
|
||||
import collections
|
||||
import itertools
|
||||
import queue
|
||||
import threading
|
||||
|
||||
from typing import AsyncIterable, Callable, Iterable, List, Optional, Union
|
||||
|
||||
from ctranslate2._ext import (
|
||||
GenerationResult,
|
||||
GenerationStepResult,
|
||||
Generator,
|
||||
ScoringResult,
|
||||
TranslationResult,
|
||||
Translator,
|
||||
)
|
||||
|
||||
|
||||
def register_extensions():
|
||||
"""Registers additional attributes to compiled modules."""
|
||||
setattr(Translator, "translate_iterable", translator_translate_iterable)
|
||||
setattr(Translator, "score_iterable", translator_score_iterable)
|
||||
setattr(Translator, "generate_tokens", translator_generate_tokens)
|
||||
setattr(Generator, "generate_iterable", generator_generate_iterable)
|
||||
setattr(Generator, "score_iterable", generator_score_iterable)
|
||||
setattr(Generator, "generate_tokens", generator_generate_tokens)
|
||||
setattr(Generator, "async_generate_tokens", generator_async_generate_tokens)
|
||||
|
||||
|
||||
def translator_translate_iterable(
|
||||
translator: Translator,
|
||||
source: Iterable[List[str]],
|
||||
target_prefix: Optional[Iterable[List[str]]] = None,
|
||||
max_batch_size: int = 32,
|
||||
batch_type: str = "examples",
|
||||
**kwargs,
|
||||
) -> Iterable[TranslationResult]:
|
||||
"""Translates an iterable of tokenized examples.
|
||||
|
||||
This method is built on top of :meth:`ctranslate2.Translator.translate_batch`
|
||||
to efficiently translate an arbitrarily large stream of data. It enables the
|
||||
following optimizations:
|
||||
|
||||
* stream processing (the iterable is not fully materialized in memory)
|
||||
* parallel translations (if the translator has multiple workers)
|
||||
* asynchronous batch prefetching
|
||||
* local sorting by length
|
||||
|
||||
Arguments:
|
||||
source: An iterable of tokenized source examples.
|
||||
target_prefix: An optional iterable of tokenized target prefixes.
|
||||
max_batch_size: The maximum batch size.
|
||||
batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
|
||||
**kwargs: Any translation options accepted by
|
||||
:meth:`ctranslate2.Translator.translate_batch`.
|
||||
|
||||
Returns:
|
||||
A generator iterator over :class:`ctranslate2.TranslationResult` instances.
|
||||
|
||||
Example:
|
||||
This method can be used to efficiently translate text files:
|
||||
|
||||
.. code-block:: python
|
||||
|
||||
# Replace by your own tokenization and detokenization functions.
|
||||
tokenize_fn = lambda line: line.strip().split()
|
||||
detokenize_fn = lambda tokens: " ".join(tokens)
|
||||
|
||||
with open("input.txt") as input_file:
|
||||
source = map(tokenize_fn, input_file)
|
||||
results = translator.translate_iterable(source, max_batch_size=64)
|
||||
|
||||
for result in results:
|
||||
tokens = result.hypotheses[0]
|
||||
target = detokenize_fn(tokens)
|
||||
print(target)
|
||||
"""
|
||||
iterables = [source]
|
||||
if target_prefix is not None:
|
||||
iterables.append(target_prefix)
|
||||
|
||||
yield from _process_iterable(
|
||||
translator.translate_batch,
|
||||
iterables,
|
||||
max_batch_size,
|
||||
batch_type,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def translator_score_iterable(
|
||||
translator: Translator,
|
||||
source: Iterable[List[str]],
|
||||
target: Iterable[List[str]],
|
||||
max_batch_size: int = 64,
|
||||
batch_type: str = "examples",
|
||||
**kwargs,
|
||||
) -> Iterable[ScoringResult]:
|
||||
"""Scores an iterable of tokenized examples.
|
||||
|
||||
This method is built on top of :meth:`ctranslate2.Translator.score_batch`
|
||||
to efficiently score an arbitrarily large stream of data. It enables the
|
||||
following optimizations:
|
||||
|
||||
* stream processing (the iterable is not fully materialized in memory)
|
||||
* parallel scoring (if the translator has multiple workers)
|
||||
* asynchronous batch prefetching
|
||||
* local sorting by length
|
||||
|
||||
Arguments:
|
||||
source: An iterable of tokenized source examples.
|
||||
target: An iterable of tokenized target examples.
|
||||
max_batch_size: The maximum batch size.
|
||||
batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
|
||||
**kwargs: Any scoring options accepted by
|
||||
:meth:`ctranslate2.Translator.score_batch`.
|
||||
|
||||
Returns:
|
||||
A generator iterator over :class:`ctranslate2.ScoringResult` instances.
|
||||
"""
|
||||
yield from _process_iterable(
|
||||
translator.score_batch,
|
||||
[source, target],
|
||||
max_batch_size,
|
||||
batch_type,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def generator_generate_iterable(
|
||||
generator: Generator,
|
||||
start_tokens: Iterable[List[str]],
|
||||
max_batch_size: int = 32,
|
||||
batch_type: str = "examples",
|
||||
**kwargs,
|
||||
) -> Iterable[GenerationResult]:
|
||||
"""Generates from an iterable of tokenized prompts.
|
||||
|
||||
This method is built on top of :meth:`ctranslate2.Generator.generate_batch`
|
||||
to efficiently run generation on an arbitrarily large stream of data. It enables
|
||||
the following optimizations:
|
||||
|
||||
* stream processing (the iterable is not fully materialized in memory)
|
||||
* parallel generations (if the generator has multiple workers)
|
||||
* asynchronous batch prefetching
|
||||
* local sorting by length
|
||||
|
||||
Arguments:
|
||||
start_tokens: An iterable of tokenized prompts.
|
||||
max_batch_size: The maximum batch size.
|
||||
batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
|
||||
**kwargs: Any generation options accepted by
|
||||
:meth:`ctranslate2.Generator.generate_batch`.
|
||||
|
||||
Returns:
|
||||
A generator iterator over :class:`ctranslate2.GenerationResult` instances.
|
||||
"""
|
||||
yield from _process_iterable(
|
||||
generator.generate_batch,
|
||||
[start_tokens],
|
||||
max_batch_size,
|
||||
batch_type,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def generator_score_iterable(
|
||||
generator: Generator,
|
||||
tokens: Iterable[List[str]],
|
||||
max_batch_size: int = 64,
|
||||
batch_type: str = "examples",
|
||||
**kwargs,
|
||||
) -> Iterable[ScoringResult]:
|
||||
"""Scores an iterable of tokenized examples.
|
||||
|
||||
This method is built on top of :meth:`ctranslate2.Generator.score_batch`
|
||||
to efficiently score an arbitrarily large stream of data. It enables
|
||||
the following optimizations:
|
||||
|
||||
* stream processing (the iterable is not fully materialized in memory)
|
||||
* parallel scoring (if the generator has multiple workers)
|
||||
* asynchronous batch prefetching
|
||||
* local sorting by length
|
||||
|
||||
Arguments:
|
||||
tokens: An iterable of tokenized examples.
|
||||
max_batch_size: The maximum batch size.
|
||||
batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
|
||||
**kwargs: Any score options accepted by
|
||||
:meth:`ctranslate2.Generator.score_batch`.
|
||||
|
||||
Returns:
|
||||
A generator iterator over :class:`ctranslate2.ScoringResult` instances.
|
||||
"""
|
||||
yield from _process_iterable(
|
||||
generator.score_batch,
|
||||
[tokens],
|
||||
max_batch_size,
|
||||
batch_type,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
def translator_generate_tokens(
|
||||
translator: Translator,
|
||||
source: List[str],
|
||||
target_prefix: Optional[List[str]] = None,
|
||||
*,
|
||||
max_decoding_length: int = 256,
|
||||
min_decoding_length: int = 1,
|
||||
sampling_topk: int = 1,
|
||||
sampling_topp: float = 1,
|
||||
sampling_temperature: float = 1,
|
||||
return_log_prob: bool = False,
|
||||
repetition_penalty: float = 1,
|
||||
no_repeat_ngram_size: int = 0,
|
||||
disable_unk: bool = False,
|
||||
suppress_sequences: Optional[List[List[str]]] = None,
|
||||
end_token: Optional[Union[str, List[str], List[int]]] = None,
|
||||
max_input_length: int = 1024,
|
||||
use_vmap: bool = False,
|
||||
) -> Iterable[GenerationStepResult]:
|
||||
"""Yields tokens as they are generated by the model.
|
||||
|
||||
Arguments:
|
||||
source: Source tokens.
|
||||
target_prefix: Optional target prefix tokens.
|
||||
max_decoding_length: Maximum prediction length.
|
||||
min_decoding_length: Minimum prediction length.
|
||||
sampling_topk: Randomly sample predictions from the top K candidates.
|
||||
sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
|
||||
sampling_temperature: Sampling temperature to generate more random samples.
|
||||
return_log_prob: Include the token log probability in the result.
|
||||
repetition_penalty: Penalty applied to the score of previously generated tokens
|
||||
(set > 1 to penalize).
|
||||
no_repeat_ngram_size: Prevent repetitions of ngrams with this size
|
||||
(set 0 to disable).
|
||||
disable_unk: Disable the generation of the unknown token.
|
||||
suppress_sequences: Disable the generation of some sequences of tokens.
|
||||
end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
|
||||
max_input_length: Truncate inputs after this many tokens (set 0 to disable).
|
||||
use_vmap: Use the vocabulary mapping file saved in this model
|
||||
|
||||
Returns:
|
||||
A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.
|
||||
|
||||
Note:
|
||||
This generation method is not compatible with beam search which requires a complete decoding.
|
||||
"""
|
||||
yield from _generate_tokens(
|
||||
translator.translate_batch,
|
||||
[source],
|
||||
[target_prefix] if target_prefix is not None else None,
|
||||
repetition_penalty=repetition_penalty,
|
||||
no_repeat_ngram_size=no_repeat_ngram_size,
|
||||
disable_unk=disable_unk,
|
||||
suppress_sequences=suppress_sequences,
|
||||
end_token=end_token,
|
||||
max_decoding_length=max_decoding_length,
|
||||
min_decoding_length=min_decoding_length,
|
||||
sampling_topk=sampling_topk,
|
||||
sampling_topp=sampling_topp,
|
||||
sampling_temperature=sampling_temperature,
|
||||
return_scores=return_log_prob,
|
||||
max_input_length=max_input_length,
|
||||
use_vmap=use_vmap,
|
||||
)
|
||||
|
||||
|
||||
def generator_generate_tokens(
|
||||
generator: Generator,
|
||||
prompt: Union[List[str], List[List[str]]],
|
||||
max_batch_size: int = 0,
|
||||
batch_type: str = "examples",
|
||||
*,
|
||||
max_length: int = 512,
|
||||
min_length: int = 0,
|
||||
sampling_topk: int = 1,
|
||||
sampling_topp: float = 1,
|
||||
sampling_temperature: float = 1,
|
||||
return_log_prob: bool = False,
|
||||
repetition_penalty: float = 1,
|
||||
no_repeat_ngram_size: int = 0,
|
||||
disable_unk: bool = False,
|
||||
suppress_sequences: Optional[List[List[str]]] = None,
|
||||
end_token: Optional[Union[str, List[str], List[int]]] = None,
|
||||
static_prompt: Optional[List[str]] = None,
|
||||
cache_static_prompt: bool = True,
|
||||
callback: Callable[[GenerationStepResult], bool] = None,
|
||||
) -> Iterable[GenerationStepResult]:
|
||||
"""Yields tokens as they are generated by the model.
|
||||
|
||||
Arguments:
|
||||
prompt: Batch of start tokens. If the decoder starts from a
|
||||
special start token like <s>, this token should be added to this input.
|
||||
max_batch_size: The maximum batch size.
|
||||
batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
|
||||
max_length: Maximum generation length.
|
||||
min_length: Minimum generation length.
|
||||
sampling_topk: Randomly sample predictions from the top K candidates.
|
||||
sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
|
||||
sampling_temperature: Sampling temperature to generate more random samples.
|
||||
return_log_prob: Include the token log probability in the result.
|
||||
repetition_penalty: Penalty applied to the score of previously generated tokens
|
||||
(set > 1 to penalize).
|
||||
no_repeat_ngram_size: Prevent repetitions of ngrams with this size
|
||||
(set 0 to disable).
|
||||
disable_unk: Disable the generation of the unknown token.
|
||||
suppress_sequences: Disable the generation of some sequences of tokens.
|
||||
end_token: Stop the decoding on one these tokens (defaults to the model EOS token).
|
||||
static_prompt: If the model expects a static prompt (a.k.a. system prompt)
|
||||
it can be set here to simplify the inputs and optionally cache the model
|
||||
state for this prompt to accelerate future generations.
|
||||
cache_static_prompt: Cache the model state after the static prompt and
|
||||
reuse it for future generations using the same static prompt.
|
||||
callback: Optional function that is called for each generated token when
|
||||
obj:`beam_size` is 1. If the callback function returns ``True``, the
|
||||
decoding will stop for this batch index.
|
||||
|
||||
Returns:
|
||||
A generator iterator over :class:`ctranslate2.GenerationStepResult` instances.
|
||||
|
||||
Note:
|
||||
This generation method is not compatible with beam search which requires a complete decoding.
|
||||
"""
|
||||
if len(prompt) > 0 and isinstance(prompt[0], str):
|
||||
prompt = [prompt]
|
||||
yield from _generate_tokens(
|
||||
generator.generate_batch,
|
||||
prompt,
|
||||
max_batch_size=max_batch_size,
|
||||
batch_type=batch_type,
|
||||
repetition_penalty=repetition_penalty,
|
||||
no_repeat_ngram_size=no_repeat_ngram_size,
|
||||
disable_unk=disable_unk,
|
||||
suppress_sequences=suppress_sequences,
|
||||
end_token=end_token,
|
||||
max_length=max_length,
|
||||
min_length=min_length,
|
||||
sampling_topk=sampling_topk,
|
||||
sampling_topp=sampling_topp,
|
||||
sampling_temperature=sampling_temperature,
|
||||
return_scores=return_log_prob,
|
||||
static_prompt=static_prompt,
|
||||
cache_static_prompt=cache_static_prompt,
|
||||
include_prompt_in_result=False,
|
||||
callback=callback,
|
||||
)
|
||||
|
||||
|
||||
async def generator_async_generate_tokens(
|
||||
generator: Generator,
|
||||
prompt: Union[List[str], List[List[str]]],
|
||||
max_batch_size: int = 0,
|
||||
batch_type: str = "examples",
|
||||
*,
|
||||
max_length: int = 512,
|
||||
min_length: int = 0,
|
||||
sampling_topk: int = 1,
|
||||
sampling_topp: float = 1,
|
||||
sampling_temperature: float = 1,
|
||||
return_log_prob: bool = False,
|
||||
repetition_penalty: float = 1,
|
||||
no_repeat_ngram_size: int = 0,
|
||||
disable_unk: bool = False,
|
||||
suppress_sequences: Optional[List[List[str]]] = None,
|
||||
end_token: Optional[Union[str, List[str], List[int]]] = None,
|
||||
static_prompt: Optional[List[str]] = None,
|
||||
cache_static_prompt: bool = True,
|
||||
callback: Callable[[GenerationStepResult], bool] = None,
|
||||
) -> AsyncIterable[GenerationStepResult]:
|
||||
"""Yields tokens asynchronously as they are generated by the model.
|
||||
|
||||
Arguments:
|
||||
prompt: Batch of start tokens. If the decoder starts from a
|
||||
special start token like <s>, this token should be added to this input.
|
||||
max_batch_size: The maximum batch size.
|
||||
batch_type: Whether :obj:`max_batch_size` is the number of "examples" or "tokens".
|
||||
max_length: Maximum generation length.
|
||||
min_length: Minimum generation length.
|
||||
sampling_topk: Randomly sample predictions from the top K candidates.
|
||||
sampling_topp: Keep the most probable tokens whose cumulative probability exceeds this value.
|
||||
sampling_temperature: Sampling temperature to generate more random samples.
|
||||
return_log_prob: Include the token log probability in the result.
|
||||
repetition_penalty: Penalty applied to the score of previously generated tokens
|
||||
(set > 1 to penalize).
|
||||
no_repeat_ngram_size: Prevent repetitions of ngrams with this size
|
||||
(set 0 to disable).
|
||||
disable_unk: Disable the generation of the unknown token.
|
||||
suppress_sequences: Disable the generation of some sequences of tokens.
|
||||
end_token: Stop the decoding on one of these tokens (defaults to the model EOS token).
|
||||
static_prompt: If the model expects a static prompt (a.k.a. system prompt)
|
||||
it can be set here to simplify the inputs and optionally cache the model
|
||||
state for this prompt to accelerate future generations.
|
||||
cache_static_prompt: Cache the model state after the static prompt and
|
||||
reuse it for future generations using the same static prompt.
|
||||
callback: Optional function that is called for each generated token when
|
||||
obj:`beam_size` is 1. If the callback function returns ``True``, the
|
||||
decoding will stop for this batch index.
|
||||
|
||||
Returns:
|
||||
An async generator iterator over :class:`ctranslate2.GenerationStepResult` instances.
|
||||
|
||||
Note:
|
||||
This generation method is not compatible with beam search which requires a complete decoding.
|
||||
"""
|
||||
if len(prompt) > 0 and isinstance(prompt[0], str):
|
||||
prompt = [prompt]
|
||||
async for step_result in AsyncGenerator(
|
||||
generator.generate_batch,
|
||||
prompt,
|
||||
max_batch_size=max_batch_size,
|
||||
batch_type=batch_type,
|
||||
repetition_penalty=repetition_penalty,
|
||||
no_repeat_ngram_size=no_repeat_ngram_size,
|
||||
disable_unk=disable_unk,
|
||||
suppress_sequences=suppress_sequences,
|
||||
end_token=end_token,
|
||||
max_length=max_length,
|
||||
min_length=min_length,
|
||||
sampling_topk=sampling_topk,
|
||||
sampling_topp=sampling_topp,
|
||||
sampling_temperature=sampling_temperature,
|
||||
return_scores=return_log_prob,
|
||||
static_prompt=static_prompt,
|
||||
cache_static_prompt=cache_static_prompt,
|
||||
include_prompt_in_result=False,
|
||||
callback=callback,
|
||||
):
|
||||
yield step_result
|
||||
|
||||
|
||||
class AsyncGenerator:
|
||||
def __init__(self, process_func, *args, **kwargs):
|
||||
self.queue = asyncio.Queue()
|
||||
self.shutdown_event = threading.Event()
|
||||
self.iterator_task = None
|
||||
self.process_func = process_func
|
||||
self.args = args
|
||||
self.kwargs = kwargs
|
||||
|
||||
async def producer(self):
|
||||
# Data generation logic here
|
||||
for step_result in _generate_tokens(
|
||||
self.process_func, *self.args, **self.kwargs
|
||||
):
|
||||
await self.queue.put(step_result)
|
||||
await asyncio.sleep(0.0001)
|
||||
# asyc sleep otherwise this doesn't yield any result
|
||||
if self.shutdown_event.is_set():
|
||||
break
|
||||
await self.queue.put(None)
|
||||
|
||||
def __aiter__(self):
|
||||
self.iterator_task = asyncio.create_task(self.producer())
|
||||
return self
|
||||
|
||||
async def __anext__(self):
|
||||
if self.shutdown_event.is_set():
|
||||
raise StopAsyncIteration
|
||||
|
||||
try:
|
||||
item = await self.queue.get()
|
||||
if item is None:
|
||||
self.shutdown_event.set()
|
||||
raise StopAsyncIteration
|
||||
return item
|
||||
except asyncio.CancelledError:
|
||||
self.shutdown_event.set()
|
||||
raise StopAsyncIteration
|
||||
|
||||
|
||||
def _generate_tokens(process_func, *args, **kwargs):
|
||||
step_results = queue.Queue()
|
||||
generator_closed = threading.Event()
|
||||
|
||||
user_callback = kwargs.get("callback", None)
|
||||
if user_callback is None:
|
||||
user_callback = lambda step_result: False
|
||||
|
||||
def _callback(step_result):
|
||||
user_callback_result = user_callback(step_result)
|
||||
step_results.put(step_result)
|
||||
|
||||
return generator_closed.is_set() or user_callback_result
|
||||
|
||||
kwargs.update(
|
||||
{
|
||||
"asynchronous": True,
|
||||
"beam_size": 1,
|
||||
"callback": _callback,
|
||||
}
|
||||
)
|
||||
|
||||
async_results = process_func(*args, **kwargs)
|
||||
|
||||
def _catch_exception():
|
||||
try:
|
||||
for result in async_results:
|
||||
result.result()
|
||||
except Exception as e:
|
||||
step_results.put(e)
|
||||
step_results.put(None)
|
||||
|
||||
thread = threading.Thread(target=_catch_exception, daemon=True)
|
||||
thread.start()
|
||||
|
||||
while True:
|
||||
step_result = step_results.get()
|
||||
|
||||
if step_result is None:
|
||||
break
|
||||
|
||||
if isinstance(step_result, Exception):
|
||||
raise step_result
|
||||
|
||||
try:
|
||||
yield step_result
|
||||
except GeneratorExit:
|
||||
generator_closed.set()
|
||||
break
|
||||
|
||||
# Wait for the job to terminate before exiting.
|
||||
thread.join()
|
||||
|
||||
|
||||
def _process_iterable(process_func, iterables, max_batch_size, batch_type, **kwargs):
|
||||
if max_batch_size < 1:
|
||||
raise ValueError("max_batch_size must be >= 1")
|
||||
|
||||
if len(iterables) == 1:
|
||||
iterable = iterables[0]
|
||||
else:
|
||||
iterable = itertools.zip_longest(*iterables)
|
||||
|
||||
kwargs.update(
|
||||
{
|
||||
"max_batch_size": max_batch_size,
|
||||
"batch_type": batch_type,
|
||||
"asynchronous": True,
|
||||
}
|
||||
)
|
||||
|
||||
read_batch_size = max_batch_size * 16 if max_batch_size > 1 else max_batch_size
|
||||
queue = collections.deque()
|
||||
|
||||
for streams in _batch_iterator(iterable, read_batch_size, batch_type):
|
||||
queue.extend(process_func(*streams, **kwargs))
|
||||
|
||||
while queue and queue[0].done():
|
||||
yield queue.popleft().result()
|
||||
|
||||
while queue:
|
||||
yield queue.popleft().result()
|
||||
|
||||
|
||||
def _batch_iterator(iterable, batch_size, batch_type):
|
||||
streams = None
|
||||
cur_batch_size = 0
|
||||
|
||||
for example in iterable:
|
||||
if not isinstance(example, tuple):
|
||||
example = (example,)
|
||||
|
||||
if streams is None:
|
||||
streams = tuple([] for _ in example)
|
||||
for batch, element in zip(streams, example):
|
||||
if element is None and len(streams) > 1:
|
||||
raise ValueError("Input iterables do not have the same length")
|
||||
batch.append(element)
|
||||
|
||||
if batch_type == "examples":
|
||||
cur_batch_size += 1
|
||||
elif batch_type == "tokens":
|
||||
cur_batch_size += len(example[0])
|
||||
else:
|
||||
raise ValueError("Invalid batch type %s" % batch_type)
|
||||
|
||||
if cur_batch_size >= batch_size:
|
||||
yield streams
|
||||
streams = None
|
||||
cur_batch_size = 0
|
||||
|
||||
if streams is not None:
|
||||
yield streams
|
||||
45
venv/lib/python3.12/site-packages/ctranslate2/logging.py
Normal file
45
venv/lib/python3.12/site-packages/ctranslate2/logging.py
Normal file
@@ -0,0 +1,45 @@
|
||||
import logging
|
||||
|
||||
from ctranslate2 import _ext
|
||||
|
||||
_PYTHON_TO_CT2_LEVEL = {
|
||||
logging.CRITICAL: _ext.LogLevel.Critical,
|
||||
logging.ERROR: _ext.LogLevel.Error,
|
||||
logging.WARNING: _ext.LogLevel.Warning,
|
||||
logging.INFO: _ext.LogLevel.Info,
|
||||
logging.DEBUG: _ext.LogLevel.Debug,
|
||||
logging.NOTSET: _ext.LogLevel.Trace,
|
||||
}
|
||||
|
||||
_CT2_TO_PYTHON_LEVEL = {v: k for k, v in _PYTHON_TO_CT2_LEVEL.items()}
|
||||
|
||||
|
||||
def set_log_level(level: int):
|
||||
"""Sets the CTranslate2 logging level from a Python logging level.
|
||||
|
||||
Arguments:
|
||||
level: A Python logging level.
|
||||
|
||||
Example:
|
||||
|
||||
>>> import logging
|
||||
>>> ctranslate2.set_log_level(logging.INFO)
|
||||
|
||||
Note:
|
||||
The argument is a Python logging level for convenience, but this function
|
||||
controls the C++ logs of the library.
|
||||
"""
|
||||
ct2_level = _PYTHON_TO_CT2_LEVEL.get(level)
|
||||
if ct2_level is None:
|
||||
raise ValueError("Level %d is not a valid logging level" % level)
|
||||
_ext.set_log_level(ct2_level)
|
||||
|
||||
|
||||
def get_log_level() -> int:
|
||||
"""Returns the current logging level.
|
||||
|
||||
Returns:
|
||||
A Python logging level.
|
||||
"""
|
||||
ct2_level = _ext.get_log_level()
|
||||
return _CT2_TO_PYTHON_LEVEL[ct2_level]
|
||||
@@ -0,0 +1,18 @@
|
||||
"""A collection of models which don't fit in the generic classes :class:`ctranslate2.Translator`
|
||||
and :class:`ctranslate2.Generator`.
|
||||
"""
|
||||
|
||||
try:
|
||||
from ctranslate2._ext import (
|
||||
Wav2Vec2,
|
||||
Wav2Vec2Bert,
|
||||
Whisper,
|
||||
WhisperGenerationResult,
|
||||
WhisperGenerationResultAsync,
|
||||
)
|
||||
except ImportError as e:
|
||||
# Allow using the Python package without the compiled extension.
|
||||
if "No module named" in str(e):
|
||||
pass
|
||||
else:
|
||||
raise
|
||||
Binary file not shown.
@@ -0,0 +1,18 @@
|
||||
from ctranslate2.specs.attention_spec import RotaryScalingType
|
||||
from ctranslate2.specs.common_spec import Activation, EmbeddingsMerge
|
||||
from ctranslate2.specs.model_spec import (
|
||||
LanguageModelSpec,
|
||||
LayerSpec,
|
||||
ModelSpec,
|
||||
SequenceToSequenceModelSpec,
|
||||
)
|
||||
from ctranslate2.specs.transformer_spec import (
|
||||
TransformerDecoderModelSpec,
|
||||
TransformerDecoderSpec,
|
||||
TransformerEncoderModelSpec,
|
||||
TransformerEncoderSpec,
|
||||
TransformerSpec,
|
||||
)
|
||||
from ctranslate2.specs.wav2vec2_spec import Wav2Vec2Spec
|
||||
from ctranslate2.specs.wav2vec2bert_spec import Wav2Vec2BertSpec
|
||||
from ctranslate2.specs.whisper_spec import WhisperSpec
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,96 @@
|
||||
import enum
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ctranslate2.specs import common_spec, model_spec
|
||||
|
||||
|
||||
# This enum should match the C++ equivalent in include/ctranslate2/layers/attention.h.
|
||||
class RotaryScalingType(enum.IntEnum):
|
||||
"""RoPE scaling type."""
|
||||
|
||||
Linear = 0
|
||||
Su = 1
|
||||
Llama3 = 2
|
||||
|
||||
|
||||
class MultiHeadAttentionSpec(model_spec.LayerSpec):
|
||||
def __init__(
|
||||
self,
|
||||
self_attention=False,
|
||||
relative_position=False,
|
||||
relative_asymmetric_position=False,
|
||||
relative_attention_bias=False,
|
||||
rms_norm=False,
|
||||
rotary_dim=None,
|
||||
rotary_interleave=True,
|
||||
rotary_scaling_type=None,
|
||||
rotary_scaling_factor=1,
|
||||
rotary_base=10000,
|
||||
original_max_position_embeddings=0,
|
||||
max_position_embeddings=0,
|
||||
num_heads_kv=None,
|
||||
head_dim=None,
|
||||
sliding_window=None,
|
||||
qk_norm=False,
|
||||
qk_norm_rms=True,
|
||||
):
|
||||
self.queries_scale = model_spec.OPTIONAL
|
||||
|
||||
self.layer_norm = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
self.linear = [
|
||||
common_spec.LinearSpec() for _ in range(2 if self_attention else 3)
|
||||
]
|
||||
|
||||
if qk_norm:
|
||||
self.q_norm = common_spec.LayerNormSpec(rms_norm=qk_norm_rms)
|
||||
self.k_norm = common_spec.LayerNormSpec(rms_norm=qk_norm_rms)
|
||||
|
||||
if relative_position:
|
||||
self.relative_position_keys = None
|
||||
self.relative_position_values = None
|
||||
|
||||
if relative_attention_bias:
|
||||
self.relative_attention_bias = None
|
||||
self.relative_attention_max_distance = None
|
||||
|
||||
if relative_asymmetric_position:
|
||||
self.relative_asymmetric_position_keys = None
|
||||
self.relative_left_max_position = None
|
||||
self.relative_right_max_position = None
|
||||
|
||||
if original_max_position_embeddings != 0:
|
||||
self.original_max_position_embeddings = np.dtype("int32").type(
|
||||
original_max_position_embeddings
|
||||
)
|
||||
if max_position_embeddings != 0:
|
||||
self.max_position_embeddings = np.dtype("int32").type(
|
||||
max_position_embeddings
|
||||
)
|
||||
|
||||
if rotary_dim is not None:
|
||||
self.rotary_dim = np.dtype("int32").type(rotary_dim)
|
||||
self.rotary_interleave = rotary_interleave
|
||||
self.rotary_base = np.dtype("float32").type(rotary_base)
|
||||
|
||||
if rotary_scaling_type is not None:
|
||||
self.rotary_scaling_type = np.dtype("int8").type(rotary_scaling_type)
|
||||
if rotary_scaling_type is RotaryScalingType.Linear:
|
||||
self.rotary_scaling_factor = np.dtype("float32").type(
|
||||
rotary_scaling_factor
|
||||
)
|
||||
elif rotary_scaling_type is RotaryScalingType.Su:
|
||||
self.rotary_scaling_long_factor = None
|
||||
self.rotary_scaling_short_factor = None
|
||||
elif rotary_scaling_type is RotaryScalingType.Llama3:
|
||||
self.rotary_low_freq_factor = None
|
||||
self.rotary_high_freq_factor = None
|
||||
|
||||
if num_heads_kv is not None:
|
||||
self.num_heads_kv = np.dtype("int32").type(num_heads_kv)
|
||||
|
||||
if head_dim is not None:
|
||||
self.head_dim = np.dtype("int32").type(head_dim)
|
||||
|
||||
if sliding_window is not None:
|
||||
self.sliding_window = np.dtype("int32").type(sliding_window)
|
||||
@@ -0,0 +1,66 @@
|
||||
import enum
|
||||
|
||||
from ctranslate2.specs import model_spec
|
||||
|
||||
|
||||
# This enum should match the C++ equivalent in include/ctranslate2/ops/activation.h.
|
||||
class Activation(enum.IntEnum):
|
||||
"""Activation type."""
|
||||
|
||||
RELU = 0
|
||||
GELUTanh = 1
|
||||
SWISH = 2
|
||||
GELU = 3
|
||||
GELUSigmoid = 4
|
||||
Tanh = 5
|
||||
Sigmoid = 6
|
||||
|
||||
|
||||
# This enum should match the C++ equivalent in include/ctranslate2/layers/common.h.
|
||||
class EmbeddingsMerge(enum.IntEnum):
|
||||
"""Merge strategy for factors embeddings."""
|
||||
|
||||
CONCAT = 0
|
||||
ADD = 1
|
||||
|
||||
|
||||
class Quantization(enum.IntEnum):
|
||||
"""Activation type."""
|
||||
|
||||
CT2 = 0
|
||||
AWQ_GEMM = 1
|
||||
AWQ_GEMV = 2
|
||||
|
||||
|
||||
class LayerNormSpec(model_spec.LayerSpec):
|
||||
def __init__(self, rms_norm=False):
|
||||
self.gamma = None
|
||||
if not rms_norm:
|
||||
self.beta = None
|
||||
else:
|
||||
self.layer_norm_use_residual = model_spec.OPTIONAL
|
||||
|
||||
|
||||
class LinearSpec(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.weight = None
|
||||
self.weight_scale = model_spec.OPTIONAL
|
||||
self.weight_zero = model_spec.OPTIONAL
|
||||
self.bias = model_spec.OPTIONAL
|
||||
|
||||
def has_bias(self):
|
||||
return not isinstance(self.bias, str)
|
||||
|
||||
|
||||
class Conv1DSpec(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.weight = None
|
||||
self.weight_scale = model_spec.OPTIONAL
|
||||
self.bias = model_spec.OPTIONAL
|
||||
|
||||
|
||||
class EmbeddingsSpec(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.weight = None
|
||||
self.weight_scale = model_spec.OPTIONAL
|
||||
self.multiply_by_sqrt_depth = model_spec.OPTIONAL
|
||||
@@ -0,0 +1,767 @@
|
||||
"""Specifications declare the expected variables layout of CTranslate2 models
|
||||
that do not load a computation graph. The model converter should make sure that
|
||||
each required variable of the specification is set.
|
||||
"""
|
||||
|
||||
import abc
|
||||
import ctypes
|
||||
import json
|
||||
import os
|
||||
import shutil
|
||||
import struct
|
||||
|
||||
from typing import Dict, List, Optional
|
||||
|
||||
import numpy as np
|
||||
|
||||
try:
|
||||
import torch
|
||||
|
||||
torch_is_available = True
|
||||
except ImportError:
|
||||
torch_is_available = False
|
||||
|
||||
OPTIONAL = "__optional"
|
||||
CURRENT_BINARY_VERSION = 6
|
||||
|
||||
ACCEPTED_MODEL_TYPES = (
|
||||
"int8",
|
||||
"int8_float32",
|
||||
"int8_float16",
|
||||
"int8_bfloat16",
|
||||
"int16",
|
||||
"float16",
|
||||
"bfloat16",
|
||||
"float32",
|
||||
)
|
||||
|
||||
SKIP_CREATING_ALIAS = ("rotary_scaling_long_factor", "rotary_scaling_short_factor")
|
||||
|
||||
|
||||
def _join_scope(scope, name):
|
||||
if not scope:
|
||||
return name
|
||||
return "%s/%s" % (scope, name)
|
||||
|
||||
|
||||
def _split_scope(scope):
|
||||
return scope.split("/")
|
||||
|
||||
|
||||
def _parent_scope(scope):
|
||||
keys = _split_scope(scope)
|
||||
scope, attr = keys[:-1], keys[-1]
|
||||
return "/".join(scope), attr
|
||||
|
||||
|
||||
def visit_spec(spec, fn, scope=""):
|
||||
"""Recursively visits a layer spec."""
|
||||
for name, value in list(spec.__dict__.items()):
|
||||
if name.startswith("_"):
|
||||
continue
|
||||
if isinstance(value, list):
|
||||
for i, elem in enumerate(value):
|
||||
visit_spec(elem, fn, scope=_join_scope(scope, "%s_%d" % (name, i)))
|
||||
elif isinstance(value, LayerSpec):
|
||||
visit_spec(value, fn, scope=_join_scope(scope, name))
|
||||
else:
|
||||
fn(spec, _join_scope(scope, name), value)
|
||||
|
||||
|
||||
def index_spec(spec, index):
|
||||
if not index:
|
||||
return spec
|
||||
keys = _split_scope(index)
|
||||
for key in keys:
|
||||
try:
|
||||
spec = getattr(spec, key)
|
||||
except AttributeError:
|
||||
attr, index = key.rsplit("_", 1)
|
||||
spec = getattr(spec, attr)[int(index)]
|
||||
return spec
|
||||
|
||||
|
||||
class FrozenMeta(type):
|
||||
def __call__(self, *args, **kwargs):
|
||||
instance = super().__call__(*args, **kwargs)
|
||||
instance._frozen = True
|
||||
return instance
|
||||
|
||||
|
||||
class FrozenAttr:
|
||||
def __setattr__(self, key, value):
|
||||
if hasattr(self, "_frozen") and not hasattr(self, key):
|
||||
raise AttributeError("Attribute %s does not exist" % key)
|
||||
super().__setattr__(key, value)
|
||||
|
||||
|
||||
class LayerSpec(FrozenAttr, metaclass=FrozenMeta):
|
||||
"""A layer specification declares the weights that should be set by the converters."""
|
||||
|
||||
def validate(self) -> None:
|
||||
"""Verify that the required weights are set.
|
||||
|
||||
Raises:
|
||||
ValueError: If a required weight is not set in the specification.
|
||||
"""
|
||||
unset_attributes = []
|
||||
|
||||
def _check(spec, name, value):
|
||||
if value is None:
|
||||
unset_attributes.append(name)
|
||||
return
|
||||
|
||||
if isinstance(value, np.ndarray):
|
||||
# float64 is not a supported type.
|
||||
if value.dtype == np.float64:
|
||||
value = value.astype(np.float32)
|
||||
elif isinstance(value, float):
|
||||
value = np.dtype("float32").type(value)
|
||||
elif isinstance(value, bool):
|
||||
# Convert bool to an integer type.
|
||||
value = np.dtype("int8").type(value)
|
||||
elif isinstance(value, str):
|
||||
if value != OPTIONAL:
|
||||
value = np.frombuffer(value.encode("utf-8"), dtype=np.int8)
|
||||
|
||||
if isinstance(value, np.ndarray) or isinstance(value, np.generic):
|
||||
value = NumpyVariable(value)
|
||||
elif torch_is_available and isinstance(value, torch.Tensor):
|
||||
value = PyTorchVariable(value)
|
||||
|
||||
attr_name = _split_scope(name)[-1]
|
||||
setattr(spec, attr_name, value)
|
||||
|
||||
self._visit(_check)
|
||||
|
||||
if unset_attributes:
|
||||
raise ValueError(
|
||||
"Some required model attributes are not set:\n\n%s"
|
||||
% "\n".join(unset_attributes)
|
||||
)
|
||||
|
||||
def variables(
|
||||
self,
|
||||
prefix: str = "",
|
||||
ordered: bool = False,
|
||||
) -> Dict[str, np.ndarray]:
|
||||
"""Recursively returns the weights from this layer and its children.
|
||||
|
||||
Arguments:
|
||||
prefix: Prefix to prepend to all variable names.
|
||||
ordered: If set, an ordered list is returned instead.
|
||||
|
||||
Returns:
|
||||
Dictionary mapping variables name to value.
|
||||
"""
|
||||
var = {}
|
||||
|
||||
def _register_var(spec, name, value):
|
||||
if isinstance(value, str) and value == OPTIONAL:
|
||||
return
|
||||
var[_join_scope(prefix, name)] = value
|
||||
|
||||
self._visit(_register_var)
|
||||
if ordered:
|
||||
return list(sorted(var.items(), key=lambda x: x[0]))
|
||||
return var
|
||||
|
||||
def _alias_variables(self):
|
||||
"""Find duplicate variables in spec and create aliases."""
|
||||
# When a variable is duplicated, keep the version that comes first in
|
||||
# the alphabetical order and alias the others.
|
||||
variables = self.variables(ordered=True)
|
||||
for name, value in reversed(variables):
|
||||
for other_name, other_value in variables:
|
||||
if name == other_name:
|
||||
break
|
||||
# Because variables can be transformed on load (e.g. transposed),
|
||||
# we use an element-wise equality check.
|
||||
scope, attr_name = _parent_scope(name)
|
||||
if (
|
||||
not value.is_scalar()
|
||||
and value.equal(other_value)
|
||||
and attr_name not in SKIP_CREATING_ALIAS
|
||||
):
|
||||
# Replace variable value by the alias name.
|
||||
spec = index_spec(self, scope)
|
||||
setattr(spec, attr_name, other_name)
|
||||
break
|
||||
|
||||
def _quantize(self, quantization):
|
||||
"""Possibly quantizes the variable of the layer."""
|
||||
if quantization is not None and quantization not in ACCEPTED_MODEL_TYPES:
|
||||
raise ValueError(
|
||||
"%s is not a valid quantization type. Accepted types are: %s"
|
||||
% (quantization, ", ".join(ACCEPTED_MODEL_TYPES))
|
||||
)
|
||||
|
||||
def _quantize(spec, name, value):
|
||||
if not isinstance(value, Variable) or value.is_scalar():
|
||||
return
|
||||
|
||||
key = _split_scope(name)[-1]
|
||||
scale = None
|
||||
is_quantizable = hasattr(spec, "%s_scale" % key)
|
||||
is_convertible = value.dtype in ("float32", "float16", "bfloat16")
|
||||
|
||||
if is_quantizable:
|
||||
if quantization == "int16":
|
||||
value = value.to("float32").numpy()
|
||||
# Represent the value with 10 bits so the multiplication is 20 bits
|
||||
# and 12 bits are left for accumulation.
|
||||
scale = np.float32(2**10 / np.amax(np.absolute(value)))
|
||||
value *= scale
|
||||
value = np.rint(value)
|
||||
value = np.clip(
|
||||
value, np.iinfo(np.int16).min, np.iinfo(np.int16).max
|
||||
)
|
||||
value = value.astype(np.int16)
|
||||
scale = NumpyVariable(scale)
|
||||
value = NumpyVariable(value)
|
||||
elif quantization in (
|
||||
"int8",
|
||||
"int8_float32",
|
||||
"int8_float16",
|
||||
"int8_bfloat16",
|
||||
):
|
||||
value = value.to("float32").numpy()
|
||||
# For conv1d layer we need to reshape to 2D before calculating scale
|
||||
old_shape = None
|
||||
if len(value.shape) == 3:
|
||||
old_shape = value.shape
|
||||
value = value.reshape(value.shape[0], -1)
|
||||
amax = np.amax(np.absolute(value), axis=1)
|
||||
amax[amax == 0] = 127.0
|
||||
scale = 127.0 / amax
|
||||
value *= np.expand_dims(scale, 1)
|
||||
value = np.rint(value)
|
||||
value = value.astype(np.int8)
|
||||
# reshape back to old shape
|
||||
if old_shape:
|
||||
value = value.reshape(old_shape)
|
||||
scale = NumpyVariable(scale)
|
||||
value = NumpyVariable(value)
|
||||
elif quantization in ("float16", "bfloat16", "float32"):
|
||||
value = value.to(quantization)
|
||||
|
||||
elif is_convertible:
|
||||
if quantization in ("float16", "int8_float16"):
|
||||
value = value.to("float16")
|
||||
elif quantization in ("bfloat16", "int8_bfloat16"):
|
||||
value = value.to("bfloat16")
|
||||
elif quantization in ("float32", "int16", "int8_float32"):
|
||||
value = value.to("float32")
|
||||
|
||||
setattr(spec, key, value)
|
||||
if scale is not None:
|
||||
setattr(spec, "%s_scale" % key, scale)
|
||||
|
||||
self._visit(_quantize)
|
||||
|
||||
def optimize(self, quantization: Optional[str] = None) -> None:
|
||||
"""Recursively applies some optimizations to this layer:
|
||||
|
||||
* Alias variables with the same shape and value.
|
||||
* Quantize weights.
|
||||
|
||||
Arguments:
|
||||
quantization: Weight quantization scheme (possible values are: int8, int8_float32,
|
||||
int8_float16, int8_bfloat16, int16, float16, bfloat16, float32).
|
||||
"""
|
||||
self._alias_variables()
|
||||
self._quantize(quantization)
|
||||
|
||||
def _visit(self, fn):
|
||||
"""Recursively visits this layer and its children."""
|
||||
visit_spec(self, fn)
|
||||
|
||||
|
||||
def _dtype_to_type_id(object_dtype):
|
||||
# Order should match the DataType enum in include/ctranslate2/types.h
|
||||
dtypes = ("float32", "int8", "int16", "int32", "float16", "bfloat16")
|
||||
try:
|
||||
return dtypes.index(object_dtype)
|
||||
except ValueError:
|
||||
raise ValueError(
|
||||
"%s is not in list of supported dtypes: %s"
|
||||
% (object_dtype, ", ".join(dtypes))
|
||||
)
|
||||
|
||||
|
||||
class ModelConfig(FrozenAttr, metaclass=FrozenMeta):
|
||||
"""Base class for model configurations."""
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
"""Initializes the configuration with a set of parameters."""
|
||||
for key, value in kwargs.items():
|
||||
setattr(self, key, value)
|
||||
|
||||
def to_dict(self):
|
||||
"""Returns the configuration as a dictionary."""
|
||||
return {
|
||||
key: value
|
||||
for key, value in self.__dict__.items()
|
||||
if not key.startswith("_")
|
||||
}
|
||||
|
||||
def add_attribute(self, key, value):
|
||||
self.__dict__[key] = value
|
||||
|
||||
def save_as_json(self, path):
|
||||
"""Saves the configuration as a JSON file."""
|
||||
with open(path, "w", encoding="utf-8") as config_file:
|
||||
json.dump(
|
||||
self.to_dict(),
|
||||
config_file,
|
||||
indent=2,
|
||||
sort_keys=True,
|
||||
)
|
||||
config_file.write("\n")
|
||||
|
||||
|
||||
class ModelSpec(LayerSpec):
|
||||
"""The top level layer specification."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes the model specification."""
|
||||
self._config = self.get_default_config()
|
||||
self._files = {}
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
"""The name of the model specification."""
|
||||
raise NotImplementedError()
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
"""The model specification revision.
|
||||
|
||||
This value is incremented each time the weights layout of the model is
|
||||
changed (e.g. a weight is renamed).
|
||||
"""
|
||||
return 1
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
"""The model configuration."""
|
||||
return self._config
|
||||
|
||||
def get_default_config(self):
|
||||
"""Returns the default configuration used by this model."""
|
||||
return None
|
||||
|
||||
def register_file(self, path: str, filename: Optional[str] = None) -> None:
|
||||
"""Registers a file to be saved in the model directory."""
|
||||
if not os.path.isfile(path):
|
||||
raise ValueError("File %s does not exist" % path)
|
||||
if filename is None:
|
||||
filename = os.path.basename(path)
|
||||
if filename in self._files:
|
||||
raise ValueError("A file with name %s was already registered" % filename)
|
||||
self._files[filename] = path
|
||||
|
||||
def save(self, output_dir: str) -> None:
|
||||
"""Saves this model on disk.
|
||||
|
||||
Arguments:
|
||||
output_dir: Output directory where the model is saved.
|
||||
"""
|
||||
self._serialize(os.path.join(output_dir, "model.bin"))
|
||||
if self._config is not None:
|
||||
self._config.save_as_json(os.path.join(output_dir, "config.json"))
|
||||
|
||||
for filename, path in self._files.items():
|
||||
destination = os.path.join(output_dir, filename)
|
||||
if os.path.exists(destination):
|
||||
raise RuntimeError(
|
||||
"File %s already exists in the model directory" % destination
|
||||
)
|
||||
shutil.copy(path, destination)
|
||||
|
||||
def _serialize(self, path):
|
||||
"""Serializes the model variables."""
|
||||
variables = []
|
||||
aliases = []
|
||||
for variable in self.variables(ordered=True):
|
||||
if isinstance(variable[1], str):
|
||||
aliases.append(variable)
|
||||
else:
|
||||
variables.append(variable)
|
||||
|
||||
with open(path, "wb") as model:
|
||||
|
||||
def _write_string(string):
|
||||
model.write(struct.pack("H", len(string) + 1))
|
||||
model.write(string.encode("utf-8"))
|
||||
model.write(struct.pack("B", 0))
|
||||
|
||||
model.write(struct.pack("I", CURRENT_BINARY_VERSION))
|
||||
_write_string(self.name)
|
||||
model.write(struct.pack("I", self.revision))
|
||||
model.write(struct.pack("I", len(variables)))
|
||||
for name, value in variables:
|
||||
_write_string(name)
|
||||
model.write(struct.pack("B", len(value.shape)))
|
||||
for dim in value.shape:
|
||||
model.write(struct.pack("I", dim))
|
||||
model.write(struct.pack("B", _dtype_to_type_id(value.dtype)))
|
||||
model.write(struct.pack("I", value.num_bytes()))
|
||||
model.write(value.to_bytes())
|
||||
model.write(struct.pack("I", len(aliases)))
|
||||
for alias, variable_name in aliases:
|
||||
_write_string(alias)
|
||||
_write_string(variable_name)
|
||||
|
||||
|
||||
def _flatten_vocabularies(vocabularies):
|
||||
for name, vocabulary in vocabularies.items():
|
||||
if len(vocabulary) == 1:
|
||||
yield name, vocabulary[0]
|
||||
else:
|
||||
for i, vocab in enumerate(vocabulary):
|
||||
yield "%s_%d" % (name, i + 1), vocab
|
||||
|
||||
|
||||
class SequenceToSequenceModelConfig(ModelConfig):
|
||||
"""Configuration for sequence-to-sequence models."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
unk_token: str = "<unk>",
|
||||
bos_token: str = "<s>",
|
||||
eos_token: str = "</s>",
|
||||
decoder_start_token: Optional[str] = "<s>",
|
||||
add_source_bos: bool = False,
|
||||
add_source_eos: bool = False,
|
||||
**kwargs,
|
||||
):
|
||||
"""Initializes the configuration for sequence-to-sequence models.
|
||||
|
||||
Args:
|
||||
unk_token: The unknown token.
|
||||
bos_token: The start of sentence token.
|
||||
eos_token: The end of sentence token.
|
||||
decoder_start_token: The decoder start token. If ``None``, the token should
|
||||
be passed by the user in the target prefix.
|
||||
add_source_bos: If ``True``, ``bos_token`` will be automatically added to
|
||||
the source input.
|
||||
add_source_eos: If ``True``, ``eos_token`` will be automatically added to
|
||||
the source input.
|
||||
**kwargs: Additional configuration.
|
||||
"""
|
||||
super().__init__(
|
||||
unk_token=unk_token,
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
decoder_start_token=decoder_start_token,
|
||||
add_source_bos=add_source_bos,
|
||||
add_source_eos=add_source_eos,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class SequenceToSequenceModelSpec(ModelSpec):
|
||||
"""Base specification for sequence to sequence models."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes a sequence to sequence model specification."""
|
||||
super().__init__()
|
||||
self._vocabularies = {
|
||||
"source": [],
|
||||
"target": [],
|
||||
}
|
||||
|
||||
def get_default_config(self):
|
||||
return SequenceToSequenceModelConfig()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_source_vocabulary_size(self):
|
||||
"""Returns the source vocabulary size expected by the model."""
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_target_vocabulary_size(self):
|
||||
"""Returns the target vocabulary size expected by the model."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def register_source_vocabulary(self, tokens: List[str]) -> None:
|
||||
"""Registers a source vocabulary of tokens.
|
||||
|
||||
Arguments:
|
||||
tokens: List of source tokens.
|
||||
"""
|
||||
self._vocabularies["source"].append(tokens)
|
||||
|
||||
def register_target_vocabulary(self, tokens: List[str]) -> None:
|
||||
"""Registers a target vocabulary of tokens.
|
||||
|
||||
Arguments:
|
||||
tokens: List of target tokens.
|
||||
"""
|
||||
self._vocabularies["target"].append(tokens)
|
||||
|
||||
def register_vocabulary_mapping(self, path: str) -> None:
|
||||
"""Registers a vocabulary mapping file.
|
||||
|
||||
Arguments:
|
||||
path: Path to the vocabulary mapping file.
|
||||
"""
|
||||
self.register_file(path, "vmap.txt")
|
||||
|
||||
def validate(self) -> None:
|
||||
super().validate()
|
||||
|
||||
# Check that vocabularies are registered and have the correct size.
|
||||
vocabulary_sizes = {
|
||||
"source": self.get_source_vocabulary_size(),
|
||||
"target": self.get_target_vocabulary_size(),
|
||||
}
|
||||
|
||||
for name, sizes in vocabulary_sizes.items():
|
||||
if not isinstance(sizes, list):
|
||||
sizes = [sizes]
|
||||
vocabularies = self._vocabularies[name]
|
||||
if len(vocabularies) != len(sizes):
|
||||
raise ValueError(
|
||||
"Incorrect number of %s vocabularies: %d registered, but expected %d"
|
||||
% (name, len(vocabularies), len(sizes))
|
||||
)
|
||||
for i, (vocabulary, expected_size) in enumerate(zip(vocabularies, sizes)):
|
||||
if len(vocabulary) != expected_size:
|
||||
raise ValueError(
|
||||
"%s vocabulary %d has size %d but the model expected a vocabulary "
|
||||
"of size %d"
|
||||
% (name.capitalize(), i, len(vocabulary), expected_size)
|
||||
)
|
||||
|
||||
def save(self, output_dir: str) -> None:
|
||||
# Save the vocabularies.
|
||||
vocabularies = dict(_flatten_vocabularies(self._vocabularies))
|
||||
all_vocabularies = list(vocabularies.values())
|
||||
if all(vocabulary == all_vocabularies[0] for vocabulary in all_vocabularies):
|
||||
vocabularies = {"shared": all_vocabularies[0]}
|
||||
|
||||
for name, tokens in vocabularies.items():
|
||||
_save_vocabulary(output_dir, "%s_vocabulary" % name, tokens)
|
||||
|
||||
# Save the rest of the model.
|
||||
super().save(output_dir)
|
||||
|
||||
|
||||
class LanguageModelConfig(ModelConfig):
|
||||
"""Configuration for language models."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
unk_token: str = "<unk>",
|
||||
bos_token: str = "<s>",
|
||||
eos_token: str = "</s>",
|
||||
**kwargs,
|
||||
):
|
||||
"""Initializes the configuration for language models.
|
||||
|
||||
Args:
|
||||
unk_token: The unknown token.
|
||||
bos_token: The start of sentence token.
|
||||
eos_token: The end of sentence token.
|
||||
**kwargs: Additional configuration.
|
||||
"""
|
||||
super().__init__(
|
||||
unk_token=unk_token,
|
||||
bos_token=bos_token,
|
||||
eos_token=eos_token,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
|
||||
class LanguageModelSpec(ModelSpec):
|
||||
"""Base specification for language models."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initializes a language model specification."""
|
||||
super().__init__()
|
||||
self._vocabulary = []
|
||||
|
||||
def get_default_config(self):
|
||||
return LanguageModelConfig()
|
||||
|
||||
@abc.abstractmethod
|
||||
def get_vocabulary_size(self):
|
||||
"""Returns the vocabulary size expected by the model."""
|
||||
raise NotImplementedError()
|
||||
|
||||
def register_vocabulary(self, tokens: List[str]) -> None:
|
||||
"""Registers the vocabulary of tokens.
|
||||
|
||||
Arguments:
|
||||
tokens: List of tokens.
|
||||
"""
|
||||
self._vocabulary = list(tokens)
|
||||
|
||||
def validate(self) -> None:
|
||||
super().validate()
|
||||
|
||||
expected_vocabulary_size = self.get_vocabulary_size()
|
||||
if len(self._vocabulary) != expected_vocabulary_size:
|
||||
raise ValueError(
|
||||
"Vocabulary has size %d but the model expected a vocabulary of size %d"
|
||||
% (len(self._vocabulary), expected_vocabulary_size)
|
||||
)
|
||||
|
||||
def save(self, output_dir: str) -> None:
|
||||
# Save the vocabulary.
|
||||
_save_vocabulary(output_dir, "vocabulary", self._vocabulary)
|
||||
|
||||
# Save the rest of the model.
|
||||
super().save(output_dir)
|
||||
|
||||
|
||||
def _save_vocabulary(output_dir, name, tokens):
|
||||
vocabulary_path = os.path.join(output_dir, "%s.json" % name)
|
||||
|
||||
with open(vocabulary_path, "w", encoding="utf-8") as vocabulary_file:
|
||||
json.dump(tokens, vocabulary_file, indent=2)
|
||||
|
||||
|
||||
class Variable(abc.ABC):
|
||||
"""Abstract base class for model variables."""
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def shape(self) -> List[int]:
|
||||
raise NotImplementedError()
|
||||
|
||||
def is_scalar(self) -> bool:
|
||||
return len(self.shape) == 0
|
||||
|
||||
@property
|
||||
@abc.abstractmethod
|
||||
def dtype(self) -> str:
|
||||
raise NotImplementedError()
|
||||
|
||||
def to(self, dtype: str) -> "Variable":
|
||||
if dtype == self.dtype:
|
||||
return self
|
||||
return self._to(dtype)
|
||||
|
||||
@abc.abstractmethod
|
||||
def numpy(self) -> np.ndarray:
|
||||
raise NotImplementedError()
|
||||
|
||||
def equal(self, other) -> bool:
|
||||
return type(self) is type(other) and self._equal(other)
|
||||
|
||||
@abc.abstractmethod
|
||||
def num_bytes(self) -> int:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def to_bytes(self) -> bytes:
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def _to(self, dtype: str) -> "Variable":
|
||||
raise NotImplementedError()
|
||||
|
||||
@abc.abstractmethod
|
||||
def _equal(self, other) -> bool:
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class NumpyVariable(Variable):
|
||||
"""Model variable as a Numpy array."""
|
||||
|
||||
def __init__(self, array):
|
||||
self.array = array
|
||||
|
||||
@property
|
||||
def shape(self) -> List[int]:
|
||||
return self.array.shape
|
||||
|
||||
@property
|
||||
def dtype(self) -> str:
|
||||
return self.array.dtype.name
|
||||
|
||||
def numpy(self) -> np.ndarray:
|
||||
return self.array
|
||||
|
||||
def num_bytes(self) -> int:
|
||||
return self.array.nbytes
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
return self.array.tobytes()
|
||||
|
||||
def _to(self, dtype: str) -> Variable:
|
||||
if dtype == "bfloat16":
|
||||
if not torch_is_available:
|
||||
raise RuntimeError(
|
||||
"Converting to bfloat16 requires torch to be installed"
|
||||
)
|
||||
return PyTorchVariable.from_numpy(self.array).to(dtype)
|
||||
|
||||
dtype = np.dtype(dtype)
|
||||
self.array = self.array.astype(dtype)
|
||||
return self
|
||||
|
||||
def _equal(self, other) -> bool:
|
||||
a = self.array
|
||||
b = other.array
|
||||
return a is b or (
|
||||
a.dtype == b.dtype
|
||||
and a.shape == b.shape
|
||||
and a.flat[0] == b.flat[0]
|
||||
and np.array_equal(a, b)
|
||||
)
|
||||
|
||||
|
||||
class PyTorchVariable(Variable):
|
||||
"""Model variable as a PyTorch tensor."""
|
||||
|
||||
def __init__(self, tensor):
|
||||
if isinstance(tensor, torch.nn.Parameter):
|
||||
tensor = tensor.data
|
||||
|
||||
self.tensor = tensor.contiguous()
|
||||
|
||||
@classmethod
|
||||
def from_numpy(cls, array):
|
||||
tensor = torch.from_numpy(array)
|
||||
return cls(tensor)
|
||||
|
||||
@property
|
||||
def shape(self) -> List[int]:
|
||||
return list(self.tensor.shape)
|
||||
|
||||
@property
|
||||
def dtype(self) -> str:
|
||||
return str(self.tensor.dtype).replace("torch.", "")
|
||||
|
||||
def numpy(self) -> np.ndarray:
|
||||
return self.tensor.detach().numpy()
|
||||
|
||||
def num_bytes(self) -> int:
|
||||
return self.tensor.numel() * self.tensor.element_size()
|
||||
|
||||
def to_bytes(self) -> bytes:
|
||||
max_size = 2**31 - 1
|
||||
num_bytes = self.num_bytes()
|
||||
output = b""
|
||||
offset = 0
|
||||
while num_bytes > 0:
|
||||
chunk_size = max_size if num_bytes > max_size else num_bytes
|
||||
chunk = ctypes.string_at(self.tensor.data_ptr() + offset, chunk_size)
|
||||
output += chunk
|
||||
offset += chunk_size
|
||||
num_bytes -= chunk_size
|
||||
return output
|
||||
|
||||
def _to(self, dtype: str) -> Variable:
|
||||
dtype = getattr(torch, dtype)
|
||||
self.tensor = self.tensor.to(dtype)
|
||||
return self
|
||||
|
||||
def _equal(self, other) -> bool:
|
||||
a = self.tensor
|
||||
b = other.tensor
|
||||
return a is b or (a.dtype == b.dtype and torch.equal(a, b))
|
||||
@@ -0,0 +1,714 @@
|
||||
"""Declares specification of the Transformer model."""
|
||||
|
||||
from typing import Optional, Tuple, Union
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ctranslate2.specs import attention_spec, common_spec, model_spec
|
||||
|
||||
|
||||
class TransformerEncoderSpec(model_spec.LayerSpec):
|
||||
def __init__(
|
||||
self,
|
||||
num_layers: int,
|
||||
num_heads: int,
|
||||
pre_norm: bool = True,
|
||||
no_final_norm: bool = False,
|
||||
activation: common_spec.Activation = common_spec.Activation.RELU,
|
||||
num_source_embeddings: int = 1,
|
||||
embeddings_merge: common_spec.EmbeddingsMerge = common_spec.EmbeddingsMerge.CONCAT,
|
||||
layernorm_embedding: bool = False,
|
||||
relative_position: bool = False,
|
||||
relative_attention_bias: bool = False,
|
||||
ffn_glu: bool = False,
|
||||
rms_norm: bool = False,
|
||||
multi_query_attention: bool = False,
|
||||
):
|
||||
"""Initializes a Transformer encoder specification.
|
||||
|
||||
Args:
|
||||
num_layers: Number of layers.
|
||||
num_heads: Number of attention heads.
|
||||
pre_norm: Enable the pre-norm Transformer architecture.
|
||||
no_final_norm: Disable the final layer norm in the pre-norm architecture.
|
||||
activation: Activation to apply in the feed-forward network.
|
||||
num_source_embeddings: Number of source embeddings.
|
||||
embeddings_merge: When :obj:`num_source_embeddings` > 1, specify how the
|
||||
embeddings are merged.
|
||||
layernorm_embedding: Apply layer normalization after the embedding layer.
|
||||
relative_position: Use relative position representations in the self-attention
|
||||
layers as described in https://arxiv.org/abs/1803.02155.
|
||||
relative_attention_bias: Use relative attention bias in the self-attention
|
||||
layers as described in the T5 paper https://arxiv.org/abs/1910.10683.
|
||||
ffn_glu: Use gated linear units in the FFN layers as described in
|
||||
https://arxiv.org/abs/2002.05202.
|
||||
rms_norm: Use the root mean square layer normalization.
|
||||
multi_query_attention: Use multi-query attention.
|
||||
"""
|
||||
self.multi_query_attention = multi_query_attention
|
||||
self.num_heads = np.dtype("int16").type(num_heads)
|
||||
self.pre_norm = pre_norm
|
||||
self.activation = np.dtype("int8").type(activation)
|
||||
self.embeddings_merge = np.dtype("int8").type(embeddings_merge)
|
||||
self.embeddings = [
|
||||
common_spec.EmbeddingsSpec() for _ in range(num_source_embeddings)
|
||||
]
|
||||
self.scale_embeddings = True
|
||||
if not relative_position and not relative_attention_bias:
|
||||
self.position_encodings = PositionEncoderSpec()
|
||||
if pre_norm and not no_final_norm:
|
||||
self.layer_norm = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
if layernorm_embedding:
|
||||
self.layernorm_embedding = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
self.layer = [
|
||||
TransformerEncoderLayerSpec(
|
||||
relative_position=relative_position,
|
||||
relative_attention_bias=relative_attention_bias,
|
||||
ffn_glu=ffn_glu,
|
||||
rms_norm=rms_norm,
|
||||
num_heads_kv=1 if multi_query_attention else None,
|
||||
)
|
||||
for _ in range(num_layers)
|
||||
]
|
||||
|
||||
|
||||
class TransformerDecoderSpec(model_spec.LayerSpec):
|
||||
def __init__(
|
||||
self,
|
||||
num_layers: int,
|
||||
num_heads: int,
|
||||
pre_norm: bool = True,
|
||||
activation: common_spec.Activation = common_spec.Activation.RELU,
|
||||
layernorm_embedding: bool = False,
|
||||
with_encoder_attention: bool = True,
|
||||
no_final_norm: bool = False,
|
||||
project_in_out: bool = False,
|
||||
relative_position: bool = False,
|
||||
relative_attention_bias: bool = False,
|
||||
alignment_layer: int = -1,
|
||||
alignment_heads: int = 1,
|
||||
ffn_glu: bool = False,
|
||||
rms_norm: bool = False,
|
||||
alibi: bool = False,
|
||||
alibi_use_positive_positions: bool = False,
|
||||
scale_alibi: bool = False,
|
||||
rotary_dim: Optional[int] = None,
|
||||
rotary_interleave: bool = True,
|
||||
rotary_scaling_type: Optional[attention_spec.RotaryScalingType] = None,
|
||||
rotary_scaling_factor: float = 1,
|
||||
rotary_base: float = 10000,
|
||||
original_max_position_embeddings: int = 0,
|
||||
max_position_embeddings: int = 0,
|
||||
parallel_residual: bool = False,
|
||||
shared_layer_norm: bool = False,
|
||||
pre_post_layer_norm: bool = False,
|
||||
multi_query_attention: bool = False,
|
||||
num_heads_kv: Optional[int] = None,
|
||||
head_dim: Optional[int] = None,
|
||||
sliding_window: Optional[int] = None,
|
||||
quant_type: Optional[common_spec.Quantization] = None,
|
||||
quant_group_size: Optional[int] = None,
|
||||
quant_bits: Optional[int] = None,
|
||||
qk_norm: Optional[bool] = False,
|
||||
):
|
||||
"""Initializes a Transformer decoder specification.
|
||||
|
||||
Args:
|
||||
num_layers: Number of layers.
|
||||
num_heads: Number of attention heads.
|
||||
pre_norm: Enable the pre-norm Transformer architecture.
|
||||
activation: Activation to apply in the feed-forward network.
|
||||
layernorm_embedding: Apply layer normalization after the embedding layer.
|
||||
with_encoder_attention: Enable the encoder attention sublayers.
|
||||
no_final_norm: Disable the final layer norm in the pre-norm architecture.
|
||||
project_in_out: Add linear transformations after the embedding layer and before
|
||||
the final layer.
|
||||
relative_position: Use relative position representations in the self-attention
|
||||
layers as described in https://arxiv.org/abs/1803.02155.
|
||||
relative_attention_bias: Use relative attention bias in the self-attention
|
||||
layers as described in the T5 paper https://arxiv.org/abs/1910.10683.
|
||||
alignment_layer: Layer index selected for alignment.
|
||||
alignment_heads: Number of attention heads selected for alignment.
|
||||
ffn_glu: Use gated linear units in the FFN layers as described in
|
||||
https://arxiv.org/abs/2002.05202.
|
||||
rms_norm: Use the root mean square layer normalization.
|
||||
alibi: Use attention with linear biases.
|
||||
alibi_use_positive_positions: Use positive positions in the ALiBi definition.
|
||||
scale_alibi: Apply the dot product scale factor to ALiBi.
|
||||
rotary_dim: Apply rotary embeddings to these first N dimensions. If 0, rotary
|
||||
embeddings are applied to all dimensions.
|
||||
rotary_interleave: Interleave the head dimensions when rotary embeddings are applied.
|
||||
Otherwise the head dimensions are sliced in half.
|
||||
rotary_scaling_type: Type of RoPE scaling.
|
||||
rotary_scaling_factor: Factor used in the RoPE scaling.
|
||||
rotary_base: The base period of the rotary embeddings.
|
||||
original_max_position_embeddings: The original max position embeddings
|
||||
for Su rope embeddings
|
||||
max_position_embeddings: The max position embeddings for Su rope embeddings
|
||||
parallel_residual: Use parallel residual connections in each layer block, as used
|
||||
by the GPT-J and GPT-NeoX models.
|
||||
shared_layer_norm: When using parallel residual, share the input and post
|
||||
attention layer norms.
|
||||
pre_post_layer_norm: Add post layer norm for each pre norm layer
|
||||
multi_query_attention: Use multi-query attention (alias for num_heads_kv=1).
|
||||
num_heads_kv: Number of attention heads for the key and value.
|
||||
sliding_window: Max sequence length to retain in KV Cache.
|
||||
quant_type: quantization type used (like awq... for lower bit quantization)
|
||||
quant_group_size: group size of the lower bit quantization
|
||||
quant_bits: number of bit of the quantization (ex: 4bit)
|
||||
"""
|
||||
|
||||
self._config = dict()
|
||||
if parallel_residual:
|
||||
if not pre_norm:
|
||||
raise ValueError("The GPT-J block expects a pre-norm architecture")
|
||||
if with_encoder_attention:
|
||||
raise ValueError("The GPT-J block does not have cross attention")
|
||||
|
||||
if multi_query_attention:
|
||||
if num_heads_kv is not None and num_heads_kv != 1:
|
||||
raise ValueError(
|
||||
"Enabling multi_query_attention implies num_heads_kv=1"
|
||||
)
|
||||
num_heads_kv = 1
|
||||
|
||||
if with_encoder_attention and num_heads_kv not in (None, 1, num_heads):
|
||||
raise ValueError(
|
||||
"num_heads_kv=%d is not supported in the cross-attention layers"
|
||||
% num_heads_kv
|
||||
)
|
||||
|
||||
self.num_heads = np.dtype("int16").type(num_heads)
|
||||
self.pre_norm = pre_norm
|
||||
self.activation = np.dtype("int8").type(activation)
|
||||
self.alignment_layer = np.dtype("int16").type(alignment_layer)
|
||||
self.alignment_heads = np.dtype("int16").type(alignment_heads)
|
||||
self.embeddings = common_spec.EmbeddingsSpec()
|
||||
self.scale_embeddings = True
|
||||
self.scale_outputs = model_spec.OPTIONAL
|
||||
self.alibi = alibi
|
||||
self.alibi_use_positive_positions = alibi_use_positive_positions
|
||||
self.scale_alibi = scale_alibi
|
||||
if sliding_window is not None:
|
||||
self.sliding_window = np.dtype("int32").type(sliding_window)
|
||||
if (
|
||||
not relative_position
|
||||
and not relative_attention_bias
|
||||
and not alibi
|
||||
and rotary_dim is None
|
||||
):
|
||||
self.position_encodings = PositionEncoderSpec()
|
||||
if pre_norm and not no_final_norm:
|
||||
self.layer_norm = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
if layernorm_embedding:
|
||||
self.layernorm_embedding = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
self.projection = common_spec.LinearSpec()
|
||||
self.layer = [
|
||||
TransformerDecoderLayerSpec(
|
||||
with_encoder_attention=with_encoder_attention,
|
||||
relative_position=relative_position,
|
||||
relative_attention_bias=relative_attention_bias,
|
||||
ffn_glu=ffn_glu,
|
||||
rms_norm=rms_norm,
|
||||
rotary_dim=rotary_dim,
|
||||
rotary_interleave=rotary_interleave,
|
||||
rotary_scaling_type=rotary_scaling_type,
|
||||
rotary_scaling_factor=rotary_scaling_factor,
|
||||
rotary_base=rotary_base,
|
||||
original_max_position_embeddings=original_max_position_embeddings,
|
||||
max_position_embeddings=max_position_embeddings,
|
||||
parallel_residual=parallel_residual,
|
||||
shared_layer_norm=shared_layer_norm,
|
||||
pre_post_layer_norm=pre_post_layer_norm,
|
||||
num_heads_kv=num_heads_kv,
|
||||
head_dim=head_dim,
|
||||
sliding_window=sliding_window,
|
||||
qk_norm=qk_norm,
|
||||
)
|
||||
for _ in range(num_layers)
|
||||
]
|
||||
self.start_from_zero_embedding = False
|
||||
self._config["multi_query_attention"] = multi_query_attention or (
|
||||
num_heads_kv != num_heads
|
||||
)
|
||||
|
||||
if project_in_out:
|
||||
self.project_in = common_spec.LinearSpec()
|
||||
self.project_out = common_spec.LinearSpec()
|
||||
|
||||
if quant_type is not None:
|
||||
self._config["quantization_type"] = quant_type
|
||||
self._config["quantization_bits"] = quant_bits
|
||||
self._config["quantization_group_size"] = quant_group_size
|
||||
|
||||
@property
|
||||
def config(self):
|
||||
return self._config
|
||||
|
||||
|
||||
class TransformerEncoderLayerSpec(model_spec.LayerSpec):
|
||||
def __init__(
|
||||
self,
|
||||
relative_position=False,
|
||||
relative_attention_bias=False,
|
||||
ffn_glu=False,
|
||||
rms_norm=False,
|
||||
num_heads_kv=None,
|
||||
sliding_window=None,
|
||||
):
|
||||
self.self_attention = attention_spec.MultiHeadAttentionSpec(
|
||||
self_attention=True,
|
||||
relative_position=relative_position,
|
||||
relative_attention_bias=relative_attention_bias,
|
||||
rms_norm=rms_norm,
|
||||
num_heads_kv=num_heads_kv,
|
||||
sliding_window=sliding_window,
|
||||
)
|
||||
self.ffn = FeedForwardSpec(glu=ffn_glu, rms_norm=rms_norm)
|
||||
|
||||
|
||||
class TransformerDecoderLayerSpec(model_spec.LayerSpec):
|
||||
def __init__(
|
||||
self,
|
||||
with_encoder_attention=True,
|
||||
relative_position=False,
|
||||
relative_attention_bias=False,
|
||||
ffn_glu=False,
|
||||
rms_norm=False,
|
||||
rotary_dim=None,
|
||||
rotary_interleave=True,
|
||||
rotary_scaling_type=None,
|
||||
rotary_scaling_factor=1,
|
||||
rotary_base=10000,
|
||||
original_max_position_embeddings=0,
|
||||
max_position_embeddings=0,
|
||||
parallel_residual=False,
|
||||
shared_layer_norm=False,
|
||||
pre_post_layer_norm=False,
|
||||
num_heads_kv=None,
|
||||
head_dim=None,
|
||||
sliding_window=None,
|
||||
qk_norm=False,
|
||||
):
|
||||
self.self_attention = attention_spec.MultiHeadAttentionSpec(
|
||||
self_attention=True,
|
||||
relative_position=relative_position,
|
||||
relative_attention_bias=relative_attention_bias,
|
||||
rms_norm=rms_norm,
|
||||
rotary_dim=rotary_dim,
|
||||
rotary_interleave=rotary_interleave,
|
||||
rotary_scaling_type=rotary_scaling_type,
|
||||
rotary_scaling_factor=rotary_scaling_factor,
|
||||
rotary_base=rotary_base,
|
||||
original_max_position_embeddings=original_max_position_embeddings,
|
||||
max_position_embeddings=max_position_embeddings,
|
||||
num_heads_kv=num_heads_kv,
|
||||
head_dim=head_dim,
|
||||
sliding_window=sliding_window,
|
||||
qk_norm=qk_norm,
|
||||
)
|
||||
|
||||
if with_encoder_attention:
|
||||
self.attention = attention_spec.MultiHeadAttentionSpec(
|
||||
rms_norm=rms_norm,
|
||||
num_heads_kv=num_heads_kv,
|
||||
sliding_window=sliding_window,
|
||||
qk_norm=qk_norm,
|
||||
)
|
||||
|
||||
self.ffn = FeedForwardSpec(glu=ffn_glu, rms_norm=rms_norm)
|
||||
|
||||
if parallel_residual:
|
||||
if shared_layer_norm:
|
||||
self.shared_layer_norm = common_spec.LayerNormSpec()
|
||||
else:
|
||||
self.input_layer_norm = common_spec.LayerNormSpec()
|
||||
self.post_attention_layer_norm = common_spec.LayerNormSpec()
|
||||
|
||||
delattr(self.self_attention, "layer_norm")
|
||||
delattr(self.ffn, "layer_norm")
|
||||
|
||||
if pre_post_layer_norm:
|
||||
self.input_layer_norm = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
self.post_attention_layer_norm = common_spec.LayerNormSpec(
|
||||
rms_norm=rms_norm
|
||||
)
|
||||
self.pre_feedforward_layer_norm = common_spec.LayerNormSpec(
|
||||
rms_norm=rms_norm
|
||||
)
|
||||
self.post_feedforward_layer_norm = common_spec.LayerNormSpec(
|
||||
rms_norm=rms_norm
|
||||
)
|
||||
|
||||
delattr(self.self_attention, "layer_norm")
|
||||
delattr(self.ffn, "layer_norm")
|
||||
|
||||
|
||||
class FeedForwardSpec(model_spec.LayerSpec):
|
||||
def __init__(self, glu=False, rms_norm=False):
|
||||
self.layer_norm = common_spec.LayerNormSpec(rms_norm=rms_norm)
|
||||
self.linear_0 = common_spec.LinearSpec()
|
||||
self.linear_1 = common_spec.LinearSpec()
|
||||
if glu:
|
||||
self.linear_0_noact = common_spec.LinearSpec()
|
||||
|
||||
|
||||
class PositionEncoderSpec(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.encodings = model_spec.OPTIONAL
|
||||
|
||||
|
||||
class TransformerConfig(model_spec.SequenceToSequenceModelConfig):
|
||||
"""Configuration for Transformer models."""
|
||||
|
||||
def __init__(self, layer_norm_epsilon: Optional[float] = None, **kwargs):
|
||||
"""Initializes the configuration for Transformer models.
|
||||
|
||||
Args:
|
||||
layer_norm_epsilon: The layer norm epsilon value.
|
||||
**kwargs: Additional configuration.
|
||||
"""
|
||||
super().__init__(layer_norm_epsilon=layer_norm_epsilon, **kwargs)
|
||||
|
||||
|
||||
class TransformerSpec(model_spec.SequenceToSequenceModelSpec):
|
||||
"""Describes a Transformer model.
|
||||
|
||||
The specification is invariant to hidden dimensions but requires to
|
||||
explicitly set the number of layers and attention heads.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self, encoder: TransformerEncoderSpec, decoder: TransformerDecoderSpec
|
||||
):
|
||||
"""Initializes a Transformer model specification.
|
||||
|
||||
Args:
|
||||
encoder: The encoder specification.
|
||||
decoder: The decoder specification.
|
||||
"""
|
||||
if not isinstance(encoder, TransformerEncoderSpec):
|
||||
raise TypeError("encoder argument must be a TransformerEncoderSpec")
|
||||
if not isinstance(decoder, TransformerDecoderSpec):
|
||||
raise TypeError("decoder argument must be a TransformerDecoderSpec")
|
||||
|
||||
super().__init__()
|
||||
self.encoder = encoder
|
||||
self.decoder = decoder
|
||||
self._config.add_attribute(
|
||||
"multi_query_attention", self.encoder.multi_query_attention
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls,
|
||||
num_layers: Union[int, Tuple[int, int]],
|
||||
num_heads: int,
|
||||
with_relative_position: bool = False,
|
||||
pre_norm: bool = True,
|
||||
no_final_norm: bool = False,
|
||||
activation: common_spec.Activation = common_spec.Activation.RELU,
|
||||
alignment_layer: int = -1,
|
||||
alignment_heads: int = 1,
|
||||
num_source_embeddings: int = 1,
|
||||
embeddings_merge: common_spec.EmbeddingsMerge = common_spec.EmbeddingsMerge.CONCAT,
|
||||
layernorm_embedding: bool = False,
|
||||
relative_attention_bias: bool = False,
|
||||
ffn_glu: bool = False,
|
||||
rms_norm: bool = False,
|
||||
multi_query_attention: bool = False,
|
||||
):
|
||||
"""Creates a Transformer model specification.
|
||||
|
||||
Args:
|
||||
num_layers: Number of encoder and decoder layers, or a 2-tuple if the
|
||||
number is different.
|
||||
num_heads: Number of attention heads.
|
||||
with_relative_position: Use relative position representations in the self-attention
|
||||
layers as described in https://arxiv.org/abs/1803.02155.
|
||||
pre_norm: Enable the pre-norm Transformer architecture.
|
||||
no_final_norm: Disable the final layer norm in the pre-norm architecture.
|
||||
activation: Activation to apply in the feed-forward network.
|
||||
alignment_layer: Layer index selected for alignment.
|
||||
alignment_heads: Number of attention heads selected for alignment.
|
||||
num_source_embeddings: Number of source embeddings.
|
||||
embeddings_merge: When :obj:`num_source_embeddings` > 1, specify how the
|
||||
embeddings are merged.
|
||||
layernorm_embedding: Apply layer normalization after the embedding layer.
|
||||
relative_attention_bias: Use relative attention bias in the self-attention
|
||||
layers as described in the T5 paper https://arxiv.org/abs/1910.10683.
|
||||
ffn_glu: Use gated linear units in the FFN layer as described in
|
||||
https://arxiv.org/abs/2002.05202.
|
||||
rms_norm: Use the root mean square layer normalization.
|
||||
multi_query_attention: Use multi-query attention.
|
||||
"""
|
||||
if isinstance(num_layers, (list, tuple)):
|
||||
num_encoder_layers, num_decoder_layers = num_layers
|
||||
else:
|
||||
num_encoder_layers, num_decoder_layers = num_layers, num_layers
|
||||
|
||||
encoder = TransformerEncoderSpec(
|
||||
num_encoder_layers,
|
||||
num_heads,
|
||||
pre_norm=pre_norm,
|
||||
no_final_norm=no_final_norm,
|
||||
activation=activation,
|
||||
num_source_embeddings=num_source_embeddings,
|
||||
embeddings_merge=embeddings_merge,
|
||||
layernorm_embedding=layernorm_embedding,
|
||||
relative_position=with_relative_position,
|
||||
relative_attention_bias=relative_attention_bias,
|
||||
ffn_glu=ffn_glu,
|
||||
rms_norm=rms_norm,
|
||||
multi_query_attention=multi_query_attention,
|
||||
)
|
||||
|
||||
decoder = TransformerDecoderSpec(
|
||||
num_decoder_layers,
|
||||
num_heads,
|
||||
pre_norm=pre_norm,
|
||||
no_final_norm=no_final_norm,
|
||||
activation=activation,
|
||||
layernorm_embedding=layernorm_embedding,
|
||||
relative_position=with_relative_position,
|
||||
relative_attention_bias=relative_attention_bias,
|
||||
alignment_layer=alignment_layer,
|
||||
alignment_heads=alignment_heads,
|
||||
ffn_glu=ffn_glu,
|
||||
rms_norm=rms_norm,
|
||||
multi_query_attention=multi_query_attention,
|
||||
)
|
||||
|
||||
return cls(encoder, decoder)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "TransformerSpec"
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return 7
|
||||
|
||||
def get_default_config(self):
|
||||
return TransformerConfig()
|
||||
|
||||
def get_source_vocabulary_size(self):
|
||||
return [spec.weight.shape[0] for spec in self.encoder.embeddings]
|
||||
|
||||
def get_target_vocabulary_size(self):
|
||||
return self.decoder.embeddings.weight.shape[0]
|
||||
|
||||
|
||||
class TransformerDecoderModelConfig(model_spec.LanguageModelConfig):
|
||||
"""Configuration for Transformer decoder models."""
|
||||
|
||||
def __init__(self, layer_norm_epsilon: Optional[float] = None, **kwargs):
|
||||
"""Initializes the configuration for Transformer decoder models.
|
||||
|
||||
Args:
|
||||
layer_norm_epsilon: The layer norm epsilon value.
|
||||
**kwargs: Additional configuration.
|
||||
"""
|
||||
super().__init__(layer_norm_epsilon=layer_norm_epsilon, **kwargs)
|
||||
|
||||
|
||||
class TransformerDecoderModelSpec(model_spec.LanguageModelSpec):
|
||||
"""Describes a Transformer decoder model (e.g. GPT-2)."""
|
||||
|
||||
def __init__(self, decoder: TransformerDecoderSpec):
|
||||
"""Initializes a Transformer decoder model specification.
|
||||
|
||||
Args:
|
||||
decoder: The decoder specification.
|
||||
"""
|
||||
if not isinstance(decoder, TransformerDecoderSpec):
|
||||
raise TypeError("decoder argument must be a TransformerDecoderSpec")
|
||||
|
||||
super().__init__()
|
||||
self.decoder = decoder
|
||||
for key, value in self.decoder.config.items():
|
||||
self._config.add_attribute(key, value)
|
||||
|
||||
@classmethod
|
||||
def from_config(
|
||||
cls,
|
||||
num_layers: int,
|
||||
num_heads: int,
|
||||
pre_norm: bool = True,
|
||||
activation: common_spec.Activation = common_spec.Activation.RELU,
|
||||
layernorm_embedding: bool = False,
|
||||
no_final_norm: bool = False,
|
||||
project_in_out: bool = False,
|
||||
with_relative_position: bool = False,
|
||||
ffn_glu: bool = False,
|
||||
rms_norm: bool = False,
|
||||
alibi: bool = False,
|
||||
alibi_use_positive_positions: bool = False,
|
||||
scale_alibi: bool = False,
|
||||
rotary_dim: Optional[int] = None,
|
||||
rotary_interleave: bool = True,
|
||||
rotary_scaling_type: Optional[attention_spec.RotaryScalingType] = None,
|
||||
rotary_scaling_factor: float = 1,
|
||||
rotary_base: float = 10000,
|
||||
original_max_position_embeddings: int = 0,
|
||||
max_position_embeddings: int = 0,
|
||||
parallel_residual: bool = False,
|
||||
shared_layer_norm: bool = False,
|
||||
pre_post_layer_norm: bool = False,
|
||||
multi_query_attention: bool = False,
|
||||
num_heads_kv: Optional[int] = None,
|
||||
head_dim: Optional[int] = None,
|
||||
sliding_window: Optional[int] = None,
|
||||
quant_type: Optional[common_spec.Quantization] = None,
|
||||
quant_group_size: Optional[int] = None,
|
||||
quant_bits: Optional[int] = None,
|
||||
qk_norm: Optional[bool] = False,
|
||||
):
|
||||
"""Creates a Transformer decoder model specification.
|
||||
|
||||
Args:
|
||||
num_layers: Number of decoder layers.
|
||||
num_heads: Number of attention heads.
|
||||
pre_norm: Enable the pre-norm Transformer architecture.
|
||||
activation: Activation to apply in the feed-forward network.
|
||||
layernorm_embedding: Apply layer normalization after the embedding layer.
|
||||
no_final_norm: Do not apply layer normalization after the last decoder block.
|
||||
project_in_out: Add a linear layer after the embedding layer and another one
|
||||
before the final output projection.
|
||||
with_relative_position: Enable relative position representations modules.
|
||||
ffn_glu: Use gated linear units in the FFN layers as described in
|
||||
https://arxiv.org/abs/2002.05202.
|
||||
rms_norm: Use the root mean square layer normalization.
|
||||
alibi: Use attention with linear biases.
|
||||
alibi_use_positive_positions: Use positive positions in the ALiBi definition.
|
||||
scale_alibi: Apply the dot product scale factor to ALiBi.
|
||||
rotary_dim: Apply rotary embeddings to these first N dimensions. If 0, rotary
|
||||
embeddings are applied to all dimensions.
|
||||
rotary_interleave: Interleave the head dimensions when rotary embeddings are applied.
|
||||
Otherwise the head dimensions are sliced in half.
|
||||
rotary_scaling_type: Type of RoPE scaling.
|
||||
rotary_scaling_factor: Factor used in the RoPE scaling.
|
||||
rotary_base: The base period of the rotary embeddings.
|
||||
original_max_position_embeddings: The original max position embeddings
|
||||
for Su rope embeddings
|
||||
max_position_embeddings: The max position embeddings for Su rope embeddings
|
||||
parallel_residual: Use parallel residual connections in each layer block, as used
|
||||
by the GPT-J and GPT-NeoX models.
|
||||
shared_layer_norm: When using parallel residual, share the input and post
|
||||
attention layer norms.
|
||||
pre_post_layer_norm: add post layer norm for each pre norm layer
|
||||
multi_query_attention: Use multi-query attention (alias for num_heads_kv=1).
|
||||
num_heads_kv: Number of attention heads for the key and value.
|
||||
head_dim: Number of head
|
||||
sliding_window: max sequence length to retain KV cache
|
||||
quant_type: quantization type used (like awq... for lower bit quantization)
|
||||
quant_group_size: group size of the lower bit quantization
|
||||
quant_bits: number of bit of the quantization (ex: 4bit)
|
||||
"""
|
||||
decoder = TransformerDecoderSpec(
|
||||
num_layers,
|
||||
num_heads,
|
||||
pre_norm=pre_norm,
|
||||
activation=activation,
|
||||
layernorm_embedding=layernorm_embedding,
|
||||
with_encoder_attention=False,
|
||||
no_final_norm=no_final_norm,
|
||||
project_in_out=project_in_out,
|
||||
relative_position=with_relative_position,
|
||||
ffn_glu=ffn_glu,
|
||||
rms_norm=rms_norm,
|
||||
alibi=alibi,
|
||||
alibi_use_positive_positions=alibi_use_positive_positions,
|
||||
scale_alibi=scale_alibi,
|
||||
rotary_dim=rotary_dim,
|
||||
rotary_interleave=rotary_interleave,
|
||||
rotary_scaling_type=rotary_scaling_type,
|
||||
rotary_scaling_factor=rotary_scaling_factor,
|
||||
rotary_base=rotary_base,
|
||||
original_max_position_embeddings=original_max_position_embeddings,
|
||||
max_position_embeddings=max_position_embeddings,
|
||||
parallel_residual=parallel_residual,
|
||||
shared_layer_norm=shared_layer_norm,
|
||||
pre_post_layer_norm=pre_post_layer_norm,
|
||||
multi_query_attention=multi_query_attention,
|
||||
num_heads_kv=num_heads_kv,
|
||||
head_dim=head_dim,
|
||||
sliding_window=sliding_window,
|
||||
quant_type=quant_type,
|
||||
quant_group_size=quant_group_size,
|
||||
quant_bits=quant_bits,
|
||||
qk_norm=qk_norm,
|
||||
)
|
||||
|
||||
return cls(decoder)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "TransformerDecoderSpec"
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return 8
|
||||
|
||||
def get_default_config(self):
|
||||
return TransformerDecoderModelConfig()
|
||||
|
||||
def get_vocabulary_size(self):
|
||||
return self.decoder.embeddings.weight.shape[0]
|
||||
|
||||
|
||||
class TransformerEncoderModelConfig(model_spec.LanguageModelConfig):
|
||||
"""Configuration for Transformer encoder models."""
|
||||
|
||||
def __init__(self, layer_norm_epsilon: Optional[float] = None, **kwargs):
|
||||
"""Initializes the configuration for Transformer encoder models.
|
||||
|
||||
Args:
|
||||
layer_norm_epsilon: The layer norm epsilon value.
|
||||
**kwargs: Additional configuration.
|
||||
"""
|
||||
super().__init__(layer_norm_epsilon=layer_norm_epsilon, **kwargs)
|
||||
|
||||
|
||||
class TransformerEncoderModelSpec(model_spec.LanguageModelSpec):
|
||||
"""Describes a Transformer encoder model (e.g. BERT)."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
encoder: TransformerEncoderSpec,
|
||||
pooling_layer: bool = False,
|
||||
pooling_activation: common_spec.Activation = common_spec.Activation.Tanh,
|
||||
):
|
||||
"""Initializes a Transformer encoder model specification.
|
||||
|
||||
Args:
|
||||
encoder: The encoder specification.
|
||||
pooling_layer: Add the pooling layer.
|
||||
pooling_activation: The activation to apply after the pooling layer.
|
||||
"""
|
||||
if not isinstance(encoder, TransformerEncoderSpec):
|
||||
raise TypeError("encoder argument must be a TransformerEncoderSpec")
|
||||
|
||||
super().__init__()
|
||||
self.encoder = encoder
|
||||
self._config.add_attribute(
|
||||
"multi_query_attention", self.encoder.multi_query_attention
|
||||
)
|
||||
|
||||
if pooling_layer:
|
||||
self.pooler_dense = common_spec.LinearSpec()
|
||||
self.pooler_activation = np.dtype("int8").type(pooling_activation)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "TransformerEncoderSpec"
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return 1
|
||||
|
||||
def get_default_config(self):
|
||||
return TransformerEncoderModelConfig()
|
||||
|
||||
def get_vocabulary_size(self):
|
||||
return self.encoder.embeddings[0].weight.shape[0]
|
||||
@@ -0,0 +1,72 @@
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ctranslate2.specs import common_spec, model_spec, transformer_spec
|
||||
|
||||
|
||||
class Wav2Vec2Config(model_spec.ModelConfig):
|
||||
"""Configuration for the Wav2Vec2 model."""
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
|
||||
class Wav2Vec2Spec(model_spec.LanguageModelSpec):
|
||||
def __init__(
|
||||
self,
|
||||
feat_layers,
|
||||
num_layers,
|
||||
num_heads,
|
||||
vocab_size,
|
||||
return_hidden,
|
||||
):
|
||||
super().__init__()
|
||||
self.vocab_size = np.dtype("int16").type(vocab_size)
|
||||
self.encoder = Wav2Vec2EncoderSpec(
|
||||
feat_layers,
|
||||
num_layers,
|
||||
num_heads,
|
||||
return_hidden,
|
||||
)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "Wav2Vec2Spec"
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return 3
|
||||
|
||||
def get_default_config(self):
|
||||
return Wav2Vec2Config()
|
||||
|
||||
def get_vocabulary_size(self):
|
||||
return int(self.vocab_size.numpy())
|
||||
|
||||
|
||||
class Wav2Vec2LayerNormConvLayer(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.conv = common_spec.Conv1DSpec()
|
||||
self.layer_norm = common_spec.LayerNormSpec()
|
||||
|
||||
|
||||
class Wav2Vec2PosEmbedConvLayer(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.conv = common_spec.Conv1DSpec()
|
||||
|
||||
|
||||
class Wav2Vec2EncoderSpec(model_spec.LayerSpec):
|
||||
def __init__(self, feat_layers, num_layers, num_heads, return_hidden):
|
||||
self.num_heads = np.dtype("int16").type(num_heads)
|
||||
self.feat_layer0 = Wav2Vec2LayerNormConvLayer()
|
||||
self.feat_layer = [Wav2Vec2LayerNormConvLayer() for i in range(feat_layers - 1)]
|
||||
self.fp_layer_norm = common_spec.LayerNormSpec()
|
||||
self.fp_projection = common_spec.LinearSpec()
|
||||
self.pos_conv_embed = Wav2Vec2PosEmbedConvLayer()
|
||||
self.layer_norm = common_spec.LayerNormSpec()
|
||||
self.layer = [
|
||||
transformer_spec.TransformerEncoderLayerSpec() for _ in range(num_layers)
|
||||
]
|
||||
if not return_hidden:
|
||||
self.lm_head = common_spec.LinearSpec()
|
||||
@@ -0,0 +1,97 @@
|
||||
import numpy as np
|
||||
|
||||
from ctranslate2.specs import attention_spec, common_spec, model_spec
|
||||
|
||||
|
||||
class Wav2Vec2BertConfig(model_spec.ModelConfig):
|
||||
"""Configuration for the Wav2Vec2Bert model."""
|
||||
|
||||
def __init__(self):
|
||||
return
|
||||
|
||||
|
||||
class Wav2Vec2BertSpec(model_spec.LanguageModelSpec):
|
||||
def __init__(
|
||||
self,
|
||||
num_hidden_layers,
|
||||
num_adapter_layers,
|
||||
vocab_size,
|
||||
return_hidden,
|
||||
):
|
||||
super().__init__()
|
||||
self.vocab_size = np.dtype("int16").type(vocab_size)
|
||||
self.encoder = Wav2Vec2BertEncoderSpec(
|
||||
num_adapter_layers,
|
||||
num_hidden_layers,
|
||||
return_hidden,
|
||||
)
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "Wav2Vec2BertSpec"
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return 1
|
||||
|
||||
def get_default_config(self):
|
||||
return Wav2Vec2BertConfig()
|
||||
|
||||
def get_vocabulary_size(self):
|
||||
return int(self.vocab_size.numpy())
|
||||
|
||||
|
||||
class Wav2Vec2BertFeedForwardSpec(model_spec.LayerSpec):
|
||||
def __init__(self, glu=False, rms_norm=False):
|
||||
self.linear_0 = common_spec.LinearSpec()
|
||||
self.linear_1 = common_spec.LinearSpec()
|
||||
if glu:
|
||||
self.linear_0_noact = common_spec.LinearSpec()
|
||||
|
||||
|
||||
class EncoderSpec(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.enc_ffn1_layer_norm = common_spec.LayerNormSpec()
|
||||
self.enc_ffn1 = Wav2Vec2BertFeedForwardSpec()
|
||||
self.enc_attn_layer_norm = common_spec.LayerNormSpec()
|
||||
self.enc_attn = attention_spec.MultiHeadAttentionSpec(
|
||||
self_attention=True,
|
||||
relative_asymmetric_position=True,
|
||||
)
|
||||
del self.enc_attn.layer_norm
|
||||
self.enc_conv_layer_norm = common_spec.LayerNormSpec()
|
||||
self.enc_conv_pointwise_conv1 = common_spec.Conv1DSpec()
|
||||
del self.enc_conv_pointwise_conv1.bias
|
||||
self.enc_conv_depthwise_conv = common_spec.Conv1DSpec()
|
||||
del self.enc_conv_depthwise_conv.bias
|
||||
self.enc_conv_depthwise_layer_norm = common_spec.LayerNormSpec()
|
||||
self.enc_conv_pointwise_conv2 = common_spec.Conv1DSpec()
|
||||
del self.enc_conv_pointwise_conv2.bias
|
||||
self.enc_ffn2_layer_norm = common_spec.LayerNormSpec()
|
||||
self.enc_ffn2 = Wav2Vec2BertFeedForwardSpec()
|
||||
self.enc_final_layer_norm = common_spec.LayerNormSpec()
|
||||
|
||||
|
||||
class AdapterSpec(model_spec.LayerSpec):
|
||||
def __init__(self):
|
||||
self.adpt_residual_layer_norm = common_spec.LayerNormSpec()
|
||||
self.adpt_residual_conv = common_spec.Conv1DSpec()
|
||||
self.adpt_attn_layer_norm = common_spec.LayerNormSpec()
|
||||
self.adpt_attn_conv = common_spec.Conv1DSpec()
|
||||
self.adpt_attn_layer = attention_spec.MultiHeadAttentionSpec(
|
||||
self_attention=True,
|
||||
relative_asymmetric_position=False,
|
||||
)
|
||||
del self.adpt_attn_layer.layer_norm
|
||||
self.adpt_ffn_layer_norm = common_spec.LayerNormSpec()
|
||||
self.adpt_ffn = Wav2Vec2BertFeedForwardSpec()
|
||||
|
||||
|
||||
class Wav2Vec2BertEncoderSpec(model_spec.LayerSpec):
|
||||
def __init__(self, num_hidden_layers, num_adapter_layers, return_hidden):
|
||||
self.fp_layer_norm = common_spec.LayerNormSpec()
|
||||
self.fp_projection = common_spec.LinearSpec()
|
||||
self.encoder_layers = [EncoderSpec() for _ in range(num_hidden_layers)]
|
||||
self.adapter_layers = [AdapterSpec() for _ in range(num_adapter_layers)]
|
||||
if not return_hidden:
|
||||
self.lm_head = common_spec.LinearSpec()
|
||||
@@ -0,0 +1,77 @@
|
||||
from typing import List, Optional, Tuple
|
||||
|
||||
import numpy as np
|
||||
|
||||
from ctranslate2.specs import common_spec, model_spec, transformer_spec
|
||||
|
||||
|
||||
class WhisperConfig(model_spec.ModelConfig):
|
||||
"""Configuration for the Whisper model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
suppress_ids: Optional[List[int]] = None,
|
||||
suppress_ids_begin: Optional[List[int]] = None,
|
||||
lang_ids: Optional[List[int]] = None,
|
||||
alignment_heads: Optional[List[Tuple[int, int]]] = None,
|
||||
):
|
||||
super().__init__(
|
||||
suppress_ids=suppress_ids,
|
||||
suppress_ids_begin=suppress_ids_begin,
|
||||
lang_ids=lang_ids,
|
||||
alignment_heads=alignment_heads,
|
||||
)
|
||||
|
||||
|
||||
class WhisperSpec(model_spec.LanguageModelSpec):
|
||||
"""Describes a Whisper model."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_encoder_layers,
|
||||
num_encoder_heads,
|
||||
num_decoder_layers,
|
||||
num_decoder_heads,
|
||||
):
|
||||
"""Initializes the model specification.
|
||||
|
||||
Args:
|
||||
num_encoder_layers: The number of encoder layers.
|
||||
num_encoder_heads: The number of encoder attention heads.
|
||||
num_decoder_layers: The number of decoder layers.
|
||||
num_decoder_heads: The number of decoder attention heads.
|
||||
"""
|
||||
super().__init__()
|
||||
self.encoder = WhisperEncoderSpec(num_encoder_layers, num_encoder_heads)
|
||||
self.decoder = transformer_spec.TransformerDecoderSpec(
|
||||
num_decoder_layers,
|
||||
num_decoder_heads,
|
||||
activation=common_spec.Activation.GELU,
|
||||
)
|
||||
self.decoder.scale_embeddings = False
|
||||
|
||||
@property
|
||||
def name(self):
|
||||
return "WhisperSpec"
|
||||
|
||||
@property
|
||||
def revision(self):
|
||||
return 3
|
||||
|
||||
def get_default_config(self):
|
||||
return WhisperConfig()
|
||||
|
||||
def get_vocabulary_size(self):
|
||||
return self.decoder.embeddings.weight.shape[0]
|
||||
|
||||
|
||||
class WhisperEncoderSpec(model_spec.LayerSpec):
|
||||
def __init__(self, num_layers, num_heads):
|
||||
self.num_heads = np.dtype("int16").type(num_heads)
|
||||
self.conv1 = common_spec.Conv1DSpec()
|
||||
self.conv2 = common_spec.Conv1DSpec()
|
||||
self.position_encodings = transformer_spec.PositionEncoderSpec()
|
||||
self.layer_norm = common_spec.LayerNormSpec()
|
||||
self.layer = [
|
||||
transformer_spec.TransformerEncoderLayerSpec() for _ in range(num_layers)
|
||||
]
|
||||
3
venv/lib/python3.12/site-packages/ctranslate2/version.py
Normal file
3
venv/lib/python3.12/site-packages/ctranslate2/version.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""Version information."""
|
||||
|
||||
__version__ = "4.6.2"
|
||||
Reference in New Issue
Block a user