99from transformers import (
1010 AutoModelForCausalLM ,
1111 AutoModelForVision2Seq ,
12+ AutoProcessor ,
1213 AutoTokenizer ,
1314)
1415from transformers .models .ovis2 .configuration_ovis2 import Ovis2Config , Ovis2VisionConfig
@@ -96,17 +97,15 @@ def create_tokenizer(model_name_or_path, save_dir):
9697 Returns:
9798 The configured tokenizer
9899 """
99- if model_name_or_path :
100- tokenizer = AutoTokenizer .from_pretrained (model_name_or_path , return_token_type_ids = False )
101- tokenizer .model_max_length = CONTEXT_LENGTH
102- tokenizer .add_special_tokens (
103- {"additional_special_tokens" : SPECIAL_TOKENS },
104- replace_additional_special_tokens = False ,
105- )
106- else :
107- tokenizer = AutoTokenizer .from_pretrained ("./ovisv2_hf/tokenizer_ovisv2" , return_token_type_ids = False )
100+ tokenizer = AutoTokenizer .from_pretrained (model_name_or_path , return_token_type_ids = False )
101+ tokenizer .model_max_length = CONTEXT_LENGTH
102+ tokenizer .add_special_tokens (
103+ {"additional_special_tokens" : SPECIAL_TOKENS },
104+ replace_additional_special_tokens = False ,
105+ )
108106 tokenizer .chat_template = CHAT_TEMPLATE
109- tokenizer .save_pretrained (save_dir )
107+ setattr (tokenizer , "image_token" , "<IMG_ATOM>" )
108+ setattr (tokenizer , "image_token_id" , tokenizer .convert_tokens_to_ids (tokenizer .image_token ))
110109 return tokenizer
111110
112111
@@ -124,9 +123,6 @@ def create_image_processor(save_dir):
124123 crop_to_patches = True ,
125124 size = {"height" : 448 , "width" : 448 },
126125 )
127-
128- image_processor .save_pretrained (save_dir )
129- print (f"Image processor saved to { save_dir } " )
130126 return image_processor
131127
132128
@@ -315,14 +311,15 @@ def main():
315311 save_dir = args .save_dir ,
316312 )
317313
314+ os .makedirs (args .save_dir , exist_ok = True )
315+
318316 # Convert and save the model
319317 model = convert_model (model_name_or_path = args .model_name_or_path )
318+ model .save_pretrained (args .save_dir )
320319
321- # Save the model and processor
322- os .makedirs (args .save_dir , exist_ok = True )
320+ # Save the processor
323321 processor = Ovis2Processor (tokenizer = tokenizer , image_processor = image_processor , chat_template = CHAT_TEMPLATE )
324322 processor .save_pretrained (args .save_dir )
325- model .save_pretrained (args .save_dir )
326323
327324 # Push to hub if requested
328325 if args .push_to_hub :
@@ -338,6 +335,8 @@ def main():
338335 .to ("cuda:0" )
339336 )
340337
338+ processor = AutoProcessor .from_pretrained (args .save_dir )
339+
341340 messages = [
342341 {
343342 "role" : "user" ,
0 commit comments