Spaces:
Paused
Paused
| random_seed: 1 | |
| WANDB: | |
| project: StructDiffusion | |
| save_dir: ${base_dirs.wandb_dir} | |
| name: conditional_pose_diffusion | |
| DATASET: | |
| data_root: ${base_dirs.data} | |
| vocab_dir: ${base_dirs.data}/type_vocabs_coarse.json | |
| # important | |
| use_virtual_structure_frame: True | |
| ignore_distractor_objects: True | |
| ignore_rgb: True | |
| # the following are determined by the dataset | |
| max_num_target_objects: 7 | |
| max_num_distractor_objects: 5 | |
| max_num_shape_parameters: 5 | |
| # set to zeros because they are not used for now | |
| max_num_rearrange_features: 0 | |
| max_num_anchor_features: 0 | |
| num_pts: 1024 | |
| filter_num_moved_objects_range: | |
| data_augmentation: False | |
| DATALOADER: | |
| batch_size: 64 | |
| num_workers: 8 | |
| pin_memory: True | |
| MODEL: | |
| # transformer encoder | |
| encoder_input_dim: 256 | |
| num_attention_heads: 8 | |
| encoder_hidden_dim: 512 | |
| encoder_dropout: 0.0 | |
| encoder_activation: relu | |
| encoder_num_layers: 8 | |
| # output head | |
| structure_dropout: 0 | |
| object_dropout: 0 | |
| # pc encoder | |
| ignore_rgb: ${DATASET.ignore_rgb} | |
| pc_emb_dim: 256 | |
| posed_pc_emb_dim: 80 | |
| # pose encoder | |
| pose_emb_dim: 80 | |
| # language | |
| word_emb_dim: 160 | |
| # diffusion step | |
| time_emb_dim: 80 | |
| # sequence embeddings | |
| # max_num_target_objects (+ max_num_distractor_objects if not ignore_distractor_objects) | |
| max_seq_size: 7 | |
| max_token_type_size: 4 | |
| seq_pos_emb_dim: 8 | |
| seq_type_emb_dim: 8 | |
| # virtual frame | |
| use_virtual_structure_frame: ${DATASET.use_virtual_structure_frame} | |
| NOISE_SCHEDULE: | |
| timesteps: 200 | |
| LOSS: | |
| type: huber | |
| OPTIMIZER: | |
| lr: 0.0001 | |
| weight_decay: 0 #0.0001 | |
| # lr_restart: 3000 | |
| # warmup: 10 | |
| TRAINER: | |
| max_epochs: 200 | |
| gradient_clip_val: 1.0 | |
| gpus: 1 | |
| deterministic: False | |
| # enable_progress_bar: False |