prithivMLmods commited on
Commit
50bfc9d
·
verified ·
1 Parent(s): e0bd93e

Upload folder using huggingface_hub

Browse files
checkpoint-1190/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Child 0-12",
8
+ "1": "Teenager 13-20",
9
+ "2": "Adult 21-44",
10
+ "3": "Middle Age 45-64",
11
+ "4": "Aged 65+"
12
+ },
13
+ "initializer_factor": 1.0,
14
+ "label2id": {
15
+ "Adult 21-44": 2,
16
+ "Aged 65+": 4,
17
+ "Child 0-12": 0,
18
+ "Middle Age 45-64": 3,
19
+ "Teenager 13-20": 1
20
+ },
21
+ "logit_scale_init_value": 2.6592,
22
+ "model_type": "metaclip_2",
23
+ "problem_type": "single_label_classification",
24
+ "projection_dim": 384,
25
+ "text_config": {
26
+ "attention_dropout": 0.0,
27
+ "dtype": "float32",
28
+ "eos_token_id": 2,
29
+ "hidden_act": "gelu",
30
+ "hidden_size": 384,
31
+ "initializer_factor": 1.0,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 1536,
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 77,
36
+ "model_type": "metaclip_2_text_model",
37
+ "num_attention_heads": 6,
38
+ "num_hidden_layers": 12,
39
+ "projection_dim": 384,
40
+ "vocab_size": 901629
41
+ },
42
+ "transformers_version": "4.57.1",
43
+ "vision_config": {
44
+ "attention_dropout": 0.0,
45
+ "dtype": "float32",
46
+ "hidden_act": "gelu",
47
+ "hidden_size": 384,
48
+ "image_size": 224,
49
+ "initializer_factor": 1.0,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 1536,
52
+ "layer_norm_eps": 1e-05,
53
+ "model_type": "metaclip_2_vision_model",
54
+ "num_attention_heads": 6,
55
+ "num_channels": 3,
56
+ "num_hidden_layers": 12,
57
+ "patch_size": 16,
58
+ "projection_dim": 384
59
+ }
60
+ }
checkpoint-1190/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07352e52a9d04b444234bc7fdce0937588d811fff4603c9cc18776c1853f67bf
3
+ size 86695548
checkpoint-1190/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4875411e8c0d535b1fd85d0436643875ff02e1595ec14850e9ab405da8a0c943
3
+ size 173507339
checkpoint-1190/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-1190/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc0fea880db68c60449840e725448b65c06c7b8576a88e765d1284ec552448ac
3
+ size 14645
checkpoint-1190/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f75c72ecb4036b4fce96fdb2ffc5536718b0f3642ed55affa47d7eef31d9fbb
3
+ size 1465
checkpoint-1190/trainer_state.json ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1190,
3
+ "best_metric": 0.35589027404785156,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1190",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1190,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8403361344537815,
14
+ "grad_norm": 36.58893585205078,
15
+ "learning_rate": 1.6929914529914532e-05,
16
+ "loss": 0.8451,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.8110012620950778,
22
+ "eval_loss": 0.4803926944732666,
23
+ "eval_model_preparation_time": 0.0159,
24
+ "eval_runtime": 184.4364,
25
+ "eval_samples_per_second": 103.103,
26
+ "eval_steps_per_second": 12.888,
27
+ "step": 595
28
+ },
29
+ {
30
+ "epoch": 1.680672268907563,
31
+ "grad_norm": 25.577220916748047,
32
+ "learning_rate": 1.3511111111111112e-05,
33
+ "loss": 0.5616,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.8604859066049643,
39
+ "eval_loss": 0.35589027404785156,
40
+ "eval_model_preparation_time": 0.0159,
41
+ "eval_runtime": 182.6442,
42
+ "eval_samples_per_second": 104.115,
43
+ "eval_steps_per_second": 13.014,
44
+ "step": 1190
45
+ }
46
+ ],
47
+ "logging_steps": 500,
48
+ "max_steps": 2975,
49
+ "num_input_tokens_seen": 0,
50
+ "num_train_epochs": 5,
51
+ "save_steps": 500,
52
+ "stateful_callbacks": {
53
+ "TrainerControl": {
54
+ "args": {
55
+ "should_epoch_stop": false,
56
+ "should_evaluate": false,
57
+ "should_log": false,
58
+ "should_save": true,
59
+ "should_training_stop": false
60
+ },
61
+ "attributes": {}
62
+ }
63
+ },
64
+ "total_flos": 7.416809333563392e+17,
65
+ "train_batch_size": 32,
66
+ "trial_name": null,
67
+ "trial_params": null
68
+ }
checkpoint-1190/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4ead4878e2e55fe3a4fe39438d2aaee6578e9b7eb9c1e0638d6c33b6fd4199
3
+ size 5777
checkpoint-1785/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Child 0-12",
8
+ "1": "Teenager 13-20",
9
+ "2": "Adult 21-44",
10
+ "3": "Middle Age 45-64",
11
+ "4": "Aged 65+"
12
+ },
13
+ "initializer_factor": 1.0,
14
+ "label2id": {
15
+ "Adult 21-44": 2,
16
+ "Aged 65+": 4,
17
+ "Child 0-12": 0,
18
+ "Middle Age 45-64": 3,
19
+ "Teenager 13-20": 1
20
+ },
21
+ "logit_scale_init_value": 2.6592,
22
+ "model_type": "metaclip_2",
23
+ "problem_type": "single_label_classification",
24
+ "projection_dim": 384,
25
+ "text_config": {
26
+ "attention_dropout": 0.0,
27
+ "dtype": "float32",
28
+ "eos_token_id": 2,
29
+ "hidden_act": "gelu",
30
+ "hidden_size": 384,
31
+ "initializer_factor": 1.0,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 1536,
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 77,
36
+ "model_type": "metaclip_2_text_model",
37
+ "num_attention_heads": 6,
38
+ "num_hidden_layers": 12,
39
+ "projection_dim": 384,
40
+ "vocab_size": 901629
41
+ },
42
+ "transformers_version": "4.57.1",
43
+ "vision_config": {
44
+ "attention_dropout": 0.0,
45
+ "dtype": "float32",
46
+ "hidden_act": "gelu",
47
+ "hidden_size": 384,
48
+ "image_size": 224,
49
+ "initializer_factor": 1.0,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 1536,
52
+ "layer_norm_eps": 1e-05,
53
+ "model_type": "metaclip_2_vision_model",
54
+ "num_attention_heads": 6,
55
+ "num_channels": 3,
56
+ "num_hidden_layers": 12,
57
+ "patch_size": 16,
58
+ "projection_dim": 384
59
+ }
60
+ }
checkpoint-1785/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31e46998bf31cf598ec9e64a99020bb32261f3c05c3f58218c2c4cd3866ef827
3
+ size 86695548
checkpoint-1785/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3f26a2d4a8b8002521d2986ed0cd612062f6e54b864a4a54c4c8780c42e154f
3
+ size 173507339
checkpoint-1785/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-1785/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60b99619fd48f4c73b844833968383dc2aa4ede042c19b949f9a572817307f66
3
+ size 14645
checkpoint-1785/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0107ac088db75354f166f3d8151158e80817f1e001de3d620e5df182bb7aa957
3
+ size 1465
checkpoint-1785/trainer_state.json ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1785,
3
+ "best_metric": 0.26440638303756714,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-1785",
5
+ "epoch": 3.0,
6
+ "eval_steps": 500,
7
+ "global_step": 1785,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8403361344537815,
14
+ "grad_norm": 36.58893585205078,
15
+ "learning_rate": 1.6929914529914532e-05,
16
+ "loss": 0.8451,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.8110012620950778,
22
+ "eval_loss": 0.4803926944732666,
23
+ "eval_model_preparation_time": 0.0159,
24
+ "eval_runtime": 184.4364,
25
+ "eval_samples_per_second": 103.103,
26
+ "eval_steps_per_second": 12.888,
27
+ "step": 595
28
+ },
29
+ {
30
+ "epoch": 1.680672268907563,
31
+ "grad_norm": 25.577220916748047,
32
+ "learning_rate": 1.3511111111111112e-05,
33
+ "loss": 0.5616,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.8604859066049643,
39
+ "eval_loss": 0.35589027404785156,
40
+ "eval_model_preparation_time": 0.0159,
41
+ "eval_runtime": 182.6442,
42
+ "eval_samples_per_second": 104.115,
43
+ "eval_steps_per_second": 13.014,
44
+ "step": 1190
45
+ },
46
+ {
47
+ "epoch": 2.5210084033613445,
48
+ "grad_norm": 22.501468658447266,
49
+ "learning_rate": 1.0092307692307693e-05,
50
+ "loss": 0.4402,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.8958245687841817,
56
+ "eval_loss": 0.26440638303756714,
57
+ "eval_model_preparation_time": 0.0159,
58
+ "eval_runtime": 185.7803,
59
+ "eval_samples_per_second": 102.357,
60
+ "eval_steps_per_second": 12.795,
61
+ "step": 1785
62
+ }
63
+ ],
64
+ "logging_steps": 500,
65
+ "max_steps": 2975,
66
+ "num_input_tokens_seen": 0,
67
+ "num_train_epochs": 5,
68
+ "save_steps": 500,
69
+ "stateful_callbacks": {
70
+ "TrainerControl": {
71
+ "args": {
72
+ "should_epoch_stop": false,
73
+ "should_evaluate": false,
74
+ "should_log": false,
75
+ "should_save": true,
76
+ "should_training_stop": false
77
+ },
78
+ "attributes": {}
79
+ }
80
+ },
81
+ "total_flos": 1.1125214000345088e+18,
82
+ "train_batch_size": 32,
83
+ "trial_name": null,
84
+ "trial_params": null
85
+ }
checkpoint-1785/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4ead4878e2e55fe3a4fe39438d2aaee6578e9b7eb9c1e0638d6c33b6fd4199
3
+ size 5777
checkpoint-2380/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Child 0-12",
8
+ "1": "Teenager 13-20",
9
+ "2": "Adult 21-44",
10
+ "3": "Middle Age 45-64",
11
+ "4": "Aged 65+"
12
+ },
13
+ "initializer_factor": 1.0,
14
+ "label2id": {
15
+ "Adult 21-44": 2,
16
+ "Aged 65+": 4,
17
+ "Child 0-12": 0,
18
+ "Middle Age 45-64": 3,
19
+ "Teenager 13-20": 1
20
+ },
21
+ "logit_scale_init_value": 2.6592,
22
+ "model_type": "metaclip_2",
23
+ "problem_type": "single_label_classification",
24
+ "projection_dim": 384,
25
+ "text_config": {
26
+ "attention_dropout": 0.0,
27
+ "dtype": "float32",
28
+ "eos_token_id": 2,
29
+ "hidden_act": "gelu",
30
+ "hidden_size": 384,
31
+ "initializer_factor": 1.0,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 1536,
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 77,
36
+ "model_type": "metaclip_2_text_model",
37
+ "num_attention_heads": 6,
38
+ "num_hidden_layers": 12,
39
+ "projection_dim": 384,
40
+ "vocab_size": 901629
41
+ },
42
+ "transformers_version": "4.57.1",
43
+ "vision_config": {
44
+ "attention_dropout": 0.0,
45
+ "dtype": "float32",
46
+ "hidden_act": "gelu",
47
+ "hidden_size": 384,
48
+ "image_size": 224,
49
+ "initializer_factor": 1.0,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 1536,
52
+ "layer_norm_eps": 1e-05,
53
+ "model_type": "metaclip_2_vision_model",
54
+ "num_attention_heads": 6,
55
+ "num_channels": 3,
56
+ "num_hidden_layers": 12,
57
+ "patch_size": 16,
58
+ "projection_dim": 384
59
+ }
60
+ }
checkpoint-2380/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ae7c9ede706184b084d89f9fe58b48d90f1520d35aa2ea83f086844db8dbf90
3
+ size 86695548
checkpoint-2380/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67c11a111f4b77c50309b584bb9d179e57794465003ddc8fed0e3ddf7ff59ca4
3
+ size 173507339
checkpoint-2380/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-2380/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac69cff8b26e2632aa36cfe5c915c1da831540a43a12793fac26cffeffd7f018
3
+ size 14645
checkpoint-2380/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a9b29c1e14bde64c335f10467cf6285f0db73e33a154c94e6426b6cc6072b53
3
+ size 1465
checkpoint-2380/trainer_state.json ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2380,
3
+ "best_metric": 0.15674497187137604,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-2380",
5
+ "epoch": 4.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2380,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8403361344537815,
14
+ "grad_norm": 36.58893585205078,
15
+ "learning_rate": 1.6929914529914532e-05,
16
+ "loss": 0.8451,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.8110012620950778,
22
+ "eval_loss": 0.4803926944732666,
23
+ "eval_model_preparation_time": 0.0159,
24
+ "eval_runtime": 184.4364,
25
+ "eval_samples_per_second": 103.103,
26
+ "eval_steps_per_second": 12.888,
27
+ "step": 595
28
+ },
29
+ {
30
+ "epoch": 1.680672268907563,
31
+ "grad_norm": 25.577220916748047,
32
+ "learning_rate": 1.3511111111111112e-05,
33
+ "loss": 0.5616,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.8604859066049643,
39
+ "eval_loss": 0.35589027404785156,
40
+ "eval_model_preparation_time": 0.0159,
41
+ "eval_runtime": 182.6442,
42
+ "eval_samples_per_second": 104.115,
43
+ "eval_steps_per_second": 13.014,
44
+ "step": 1190
45
+ },
46
+ {
47
+ "epoch": 2.5210084033613445,
48
+ "grad_norm": 22.501468658447266,
49
+ "learning_rate": 1.0092307692307693e-05,
50
+ "loss": 0.4402,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.8958245687841817,
56
+ "eval_loss": 0.26440638303756714,
57
+ "eval_model_preparation_time": 0.0159,
58
+ "eval_runtime": 185.7803,
59
+ "eval_samples_per_second": 102.357,
60
+ "eval_steps_per_second": 12.795,
61
+ "step": 1785
62
+ },
63
+ {
64
+ "epoch": 3.361344537815126,
65
+ "grad_norm": 27.099328994750977,
66
+ "learning_rate": 6.673504273504275e-06,
67
+ "loss": 0.3172,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 4.0,
72
+ "eval_accuracy": 0.9453092132940681,
73
+ "eval_loss": 0.15674497187137604,
74
+ "eval_model_preparation_time": 0.0159,
75
+ "eval_runtime": 181.8977,
76
+ "eval_samples_per_second": 104.542,
77
+ "eval_steps_per_second": 13.068,
78
+ "step": 2380
79
+ }
80
+ ],
81
+ "logging_steps": 500,
82
+ "max_steps": 2975,
83
+ "num_input_tokens_seen": 0,
84
+ "num_train_epochs": 5,
85
+ "save_steps": 500,
86
+ "stateful_callbacks": {
87
+ "TrainerControl": {
88
+ "args": {
89
+ "should_epoch_stop": false,
90
+ "should_evaluate": false,
91
+ "should_log": false,
92
+ "should_save": true,
93
+ "should_training_stop": false
94
+ },
95
+ "attributes": {}
96
+ }
97
+ },
98
+ "total_flos": 1.4833618667126784e+18,
99
+ "train_batch_size": 32,
100
+ "trial_name": null,
101
+ "trial_params": null
102
+ }
checkpoint-2380/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4ead4878e2e55fe3a4fe39438d2aaee6578e9b7eb9c1e0638d6c33b6fd4199
3
+ size 5777
checkpoint-2975/config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Child 0-12",
8
+ "1": "Teenager 13-20",
9
+ "2": "Adult 21-44",
10
+ "3": "Middle Age 45-64",
11
+ "4": "Aged 65+"
12
+ },
13
+ "initializer_factor": 1.0,
14
+ "label2id": {
15
+ "Adult 21-44": 2,
16
+ "Aged 65+": 4,
17
+ "Child 0-12": 0,
18
+ "Middle Age 45-64": 3,
19
+ "Teenager 13-20": 1
20
+ },
21
+ "logit_scale_init_value": 2.6592,
22
+ "model_type": "metaclip_2",
23
+ "problem_type": "single_label_classification",
24
+ "projection_dim": 384,
25
+ "text_config": {
26
+ "attention_dropout": 0.0,
27
+ "dtype": "float32",
28
+ "eos_token_id": 2,
29
+ "hidden_act": "gelu",
30
+ "hidden_size": 384,
31
+ "initializer_factor": 1.0,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 1536,
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 77,
36
+ "model_type": "metaclip_2_text_model",
37
+ "num_attention_heads": 6,
38
+ "num_hidden_layers": 12,
39
+ "projection_dim": 384,
40
+ "vocab_size": 901629
41
+ },
42
+ "transformers_version": "4.57.1",
43
+ "vision_config": {
44
+ "attention_dropout": 0.0,
45
+ "dtype": "float32",
46
+ "hidden_act": "gelu",
47
+ "hidden_size": 384,
48
+ "image_size": 224,
49
+ "initializer_factor": 1.0,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 1536,
52
+ "layer_norm_eps": 1e-05,
53
+ "model_type": "metaclip_2_vision_model",
54
+ "num_attention_heads": 6,
55
+ "num_channels": 3,
56
+ "num_hidden_layers": 12,
57
+ "patch_size": 16,
58
+ "projection_dim": 384
59
+ }
60
+ }
checkpoint-2975/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8636d6e9ceb085bddd789514fd5d76530eca6981e91ec63c07e9e9c0a3f1c6
3
+ size 86695548
checkpoint-2975/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd9ed98480bfcae67203f6be416f9657396e05e2674bcf5ea3d66c5c80dc3404
3
+ size 173507339
checkpoint-2975/preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
checkpoint-2975/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73068c589ce4d6fd20a35518dde8f3d307e57b652e65f960e80544ca67a4bf93
3
+ size 14645
checkpoint-2975/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a4dc6c6efc1e511bd1933b9a99504a13b1009dcfe1068d8dec69ec4ace54d08
3
+ size 1465
checkpoint-2975/trainer_state.json ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 2975,
3
+ "best_metric": 0.12348528951406479,
4
+ "best_model_checkpoint": "metaclip-2-image-classification/checkpoint-2975",
5
+ "epoch": 5.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2975,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.8403361344537815,
14
+ "grad_norm": 36.58893585205078,
15
+ "learning_rate": 1.6929914529914532e-05,
16
+ "loss": 0.8451,
17
+ "step": 500
18
+ },
19
+ {
20
+ "epoch": 1.0,
21
+ "eval_accuracy": 0.8110012620950778,
22
+ "eval_loss": 0.4803926944732666,
23
+ "eval_model_preparation_time": 0.0159,
24
+ "eval_runtime": 184.4364,
25
+ "eval_samples_per_second": 103.103,
26
+ "eval_steps_per_second": 12.888,
27
+ "step": 595
28
+ },
29
+ {
30
+ "epoch": 1.680672268907563,
31
+ "grad_norm": 25.577220916748047,
32
+ "learning_rate": 1.3511111111111112e-05,
33
+ "loss": 0.5616,
34
+ "step": 1000
35
+ },
36
+ {
37
+ "epoch": 2.0,
38
+ "eval_accuracy": 0.8604859066049643,
39
+ "eval_loss": 0.35589027404785156,
40
+ "eval_model_preparation_time": 0.0159,
41
+ "eval_runtime": 182.6442,
42
+ "eval_samples_per_second": 104.115,
43
+ "eval_steps_per_second": 13.014,
44
+ "step": 1190
45
+ },
46
+ {
47
+ "epoch": 2.5210084033613445,
48
+ "grad_norm": 22.501468658447266,
49
+ "learning_rate": 1.0092307692307693e-05,
50
+ "loss": 0.4402,
51
+ "step": 1500
52
+ },
53
+ {
54
+ "epoch": 3.0,
55
+ "eval_accuracy": 0.8958245687841817,
56
+ "eval_loss": 0.26440638303756714,
57
+ "eval_model_preparation_time": 0.0159,
58
+ "eval_runtime": 185.7803,
59
+ "eval_samples_per_second": 102.357,
60
+ "eval_steps_per_second": 12.795,
61
+ "step": 1785
62
+ },
63
+ {
64
+ "epoch": 3.361344537815126,
65
+ "grad_norm": 27.099328994750977,
66
+ "learning_rate": 6.673504273504275e-06,
67
+ "loss": 0.3172,
68
+ "step": 2000
69
+ },
70
+ {
71
+ "epoch": 4.0,
72
+ "eval_accuracy": 0.9453092132940681,
73
+ "eval_loss": 0.15674497187137604,
74
+ "eval_model_preparation_time": 0.0159,
75
+ "eval_runtime": 181.8977,
76
+ "eval_samples_per_second": 104.542,
77
+ "eval_steps_per_second": 13.068,
78
+ "step": 2380
79
+ },
80
+ {
81
+ "epoch": 4.201680672268908,
82
+ "grad_norm": 28.719318389892578,
83
+ "learning_rate": 3.2547008547008553e-06,
84
+ "loss": 0.2164,
85
+ "step": 2500
86
+ },
87
+ {
88
+ "epoch": 5.0,
89
+ "eval_accuracy": 0.9559318468657972,
90
+ "eval_loss": 0.12348528951406479,
91
+ "eval_model_preparation_time": 0.0159,
92
+ "eval_runtime": 186.47,
93
+ "eval_samples_per_second": 101.979,
94
+ "eval_steps_per_second": 12.747,
95
+ "step": 2975
96
+ }
97
+ ],
98
+ "logging_steps": 500,
99
+ "max_steps": 2975,
100
+ "num_input_tokens_seen": 0,
101
+ "num_train_epochs": 5,
102
+ "save_steps": 500,
103
+ "stateful_callbacks": {
104
+ "TrainerControl": {
105
+ "args": {
106
+ "should_epoch_stop": false,
107
+ "should_evaluate": false,
108
+ "should_log": false,
109
+ "should_save": true,
110
+ "should_training_stop": true
111
+ },
112
+ "attributes": {}
113
+ }
114
+ },
115
+ "total_flos": 1.854202333390848e+18,
116
+ "train_batch_size": 32,
117
+ "trial_name": null,
118
+ "trial_params": null
119
+ }
checkpoint-2975/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4ead4878e2e55fe3a4fe39438d2aaee6578e9b7eb9c1e0638d6c33b6fd4199
3
+ size 5777
config.json ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MetaClip2ForImageClassification"
4
+ ],
5
+ "dtype": "float32",
6
+ "id2label": {
7
+ "0": "Child 0-12",
8
+ "1": "Teenager 13-20",
9
+ "2": "Adult 21-44",
10
+ "3": "Middle Age 45-64",
11
+ "4": "Aged 65+"
12
+ },
13
+ "initializer_factor": 1.0,
14
+ "label2id": {
15
+ "Adult 21-44": 2,
16
+ "Aged 65+": 4,
17
+ "Child 0-12": 0,
18
+ "Middle Age 45-64": 3,
19
+ "Teenager 13-20": 1
20
+ },
21
+ "logit_scale_init_value": 2.6592,
22
+ "model_type": "metaclip_2",
23
+ "problem_type": "single_label_classification",
24
+ "projection_dim": 384,
25
+ "text_config": {
26
+ "attention_dropout": 0.0,
27
+ "dtype": "float32",
28
+ "eos_token_id": 2,
29
+ "hidden_act": "gelu",
30
+ "hidden_size": 384,
31
+ "initializer_factor": 1.0,
32
+ "initializer_range": 0.02,
33
+ "intermediate_size": 1536,
34
+ "layer_norm_eps": 1e-05,
35
+ "max_position_embeddings": 77,
36
+ "model_type": "metaclip_2_text_model",
37
+ "num_attention_heads": 6,
38
+ "num_hidden_layers": 12,
39
+ "projection_dim": 384,
40
+ "vocab_size": 901629
41
+ },
42
+ "transformers_version": "4.57.1",
43
+ "vision_config": {
44
+ "attention_dropout": 0.0,
45
+ "dtype": "float32",
46
+ "hidden_act": "gelu",
47
+ "hidden_size": 384,
48
+ "image_size": 224,
49
+ "initializer_factor": 1.0,
50
+ "initializer_range": 0.02,
51
+ "intermediate_size": 1536,
52
+ "layer_norm_eps": 1e-05,
53
+ "model_type": "metaclip_2_vision_model",
54
+ "num_attention_heads": 6,
55
+ "num_channels": 3,
56
+ "num_hidden_layers": 12,
57
+ "patch_size": 16,
58
+ "projection_dim": 384
59
+ }
60
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ff8636d6e9ceb085bddd789514fd5d76530eca6981e91ec63c07e9e9c0a3f1c6
3
+ size 86695548
preprocessor_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": {
3
+ "height": 224,
4
+ "width": 224
5
+ },
6
+ "do_center_crop": true,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "image_mean": [
12
+ 0.48145466,
13
+ 0.4578275,
14
+ 0.40821073
15
+ ],
16
+ "image_processor_type": "CLIPImageProcessor",
17
+ "image_std": [
18
+ 0.26862954,
19
+ 0.26130258,
20
+ 0.27577711
21
+ ],
22
+ "processor_class": "CLIPProcessor",
23
+ "resample": 3,
24
+ "rescale_factor": 0.00392156862745098,
25
+ "size": {
26
+ "height": 224,
27
+ "width": 224
28
+ }
29
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce4ead4878e2e55fe3a4fe39438d2aaee6578e9b7eb9c1e0638d6c33b6fd4199
3
+ size 5777