Skip to content

Commit 355a91c

Browse files
committed
Update Ovis2 configuration and processing classes for improved documentation
1 parent 7305a22 commit 355a91c

File tree

3 files changed

+27
-22
lines changed

3 files changed

+27
-22
lines changed

src/transformers/models/ovis2/configuration_ovis2.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,9 @@
33

44

55
class Ovis2VisionConfig(PretrainedConfig):
6-
r"""
6+
r"""This is the configuration class to store the configuration of a [`Ovis2VisionModel`]. It is used to instantiate a
7+
Ovis2VisionModel model according to the specified arguments, defining the model architecture. Instantiating a configuration
8+
with the defaults will yield a similar configuration to that of Ovis2.
79
810
Args:
911
hidden_size (`int`, *optional*, defaults to 1024):
@@ -102,7 +104,7 @@ class Ovis2Config(PretrainedConfig):
102104
Configuration objects inherit from [`PretrainedConfig`] and can be used to control the model outputs. Read the
103105
documentation from [`PretrainedConfig`] for more information.
104106
105-
[thisisiron/Ovis2-1B-hf](https://huggingface.co/thisisiron/Ovis2-1B-hf)
107+
e.g. [thisisiron/Ovis2-1B-hf](https://huggingface.co/thisisiron/Ovis2-1B-hf)
106108
107109
Args:
108110
vision_config (`Union[AutoConfig, dict]`, *optional*, defaults to `Ovis2VisionConfig`):
@@ -124,7 +126,7 @@ class Ovis2Config(PretrainedConfig):
124126
>>> # Initializing a Ovis2 style configuration
125127
>>> configuration = Ovis2Config()
126128
127-
>>> # Initializing a model from the Qwen2-VL-7B style configuration
129+
>>> # Initializing a model from the Ovis2-2B style configuration
128130
>>> model = Ovis2ForConditionalGeneration(configuration)
129131
130132
>>> # Accessing the model configuration

src/transformers/models/ovis2/convert_ovis2_weights_to_hf.py

Lines changed: 15 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
from transformers import (
1010
AutoModelForCausalLM,
1111
AutoModelForVision2Seq,
12+
AutoProcessor,
1213
AutoTokenizer,
1314
)
1415
from transformers.models.ovis2.configuration_ovis2 import Ovis2Config, Ovis2VisionConfig
@@ -96,17 +97,15 @@ def create_tokenizer(model_name_or_path, save_dir):
9697
Returns:
9798
The configured tokenizer
9899
"""
99-
if model_name_or_path:
100-
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, return_token_type_ids=False)
101-
tokenizer.model_max_length = CONTEXT_LENGTH
102-
tokenizer.add_special_tokens(
103-
{"additional_special_tokens": SPECIAL_TOKENS},
104-
replace_additional_special_tokens=False,
105-
)
106-
else:
107-
tokenizer = AutoTokenizer.from_pretrained("./ovisv2_hf/tokenizer_ovisv2", return_token_type_ids=False)
100+
tokenizer = AutoTokenizer.from_pretrained(model_name_or_path, return_token_type_ids=False)
101+
tokenizer.model_max_length = CONTEXT_LENGTH
102+
tokenizer.add_special_tokens(
103+
{"additional_special_tokens": SPECIAL_TOKENS},
104+
replace_additional_special_tokens=False,
105+
)
108106
tokenizer.chat_template = CHAT_TEMPLATE
109-
tokenizer.save_pretrained(save_dir)
107+
setattr(tokenizer, "image_token", "<IMG_ATOM>")
108+
setattr(tokenizer, "image_token_id", tokenizer.convert_tokens_to_ids(tokenizer.image_token))
110109
return tokenizer
111110

112111

@@ -124,9 +123,6 @@ def create_image_processor(save_dir):
124123
crop_to_patches=True,
125124
size={"height": 448, "width": 448},
126125
)
127-
128-
image_processor.save_pretrained(save_dir)
129-
print(f"Image processor saved to {save_dir}")
130126
return image_processor
131127

132128

@@ -315,14 +311,15 @@ def main():
315311
save_dir=args.save_dir,
316312
)
317313

314+
os.makedirs(args.save_dir, exist_ok=True)
315+
318316
# Convert and save the model
319317
model = convert_model(model_name_or_path=args.model_name_or_path)
318+
model.save_pretrained(args.save_dir)
320319

321-
# Save the model and processor
322-
os.makedirs(args.save_dir, exist_ok=True)
320+
# Save the processor
323321
processor = Ovis2Processor(tokenizer=tokenizer, image_processor=image_processor, chat_template=CHAT_TEMPLATE)
324322
processor.save_pretrained(args.save_dir)
325-
model.save_pretrained(args.save_dir)
326323

327324
# Push to hub if requested
328325
if args.push_to_hub:
@@ -338,6 +335,8 @@ def main():
338335
.to("cuda:0")
339336
)
340337

338+
processor = AutoProcessor.from_pretrained(args.save_dir)
339+
341340
messages = [
342341
{
343342
"role": "user",

src/transformers/models/ovis2/processing_ovis2.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,8 +36,7 @@ class Ovis2Processor(ProcessorMixin):
3636
image_token (`str`, *optional*, defaults to `"<image>"`):
3737
Special token used to denote image location.
3838
image_seq_length (`int`, *optional*, defaults to 256):
39-
The maximum sequence length for image tokens.
40-
Shoudl be same as in model's config
39+
The number of image tokens to be used for each image in the input.
4140
"""
4241

4342
attributes = ["image_processor", "tokenizer"]
@@ -60,7 +59,12 @@ def __init__(
6059
):
6160
self.image_seq_length = image_seq_length
6261
self.image_token = tokenizer.image_token if hasattr(tokenizer, "image_token") else image_token
63-
super().__init__(image_processor, tokenizer, chat_template=chat_template)
62+
self.image_token_id = (
63+
tokenizer.image_token_id
64+
if getattr(tokenizer, "image_token_id", None)
65+
else tokenizer.convert_tokens_to_ids(self.image_token)
66+
)
67+
super().__init__(image_processor, tokenizer, chat_template=chat_template, **kwargs)
6468

6569
def __call__(
6670
self,

0 commit comments

Comments
 (0)