In this work, we introduce DreamWaltz-G, a novel learning framework for text-driven 3D avatar creation and expressive whole-body animation.
The core of this framework lies in the proposed Skeleton-guided Score Distillation and Hybrid 3D Gaussian Avatar Representation. Specifically, the proposed skeleton-guided score distillation integrates skeleton controls from 3D human templates into 2D diffusion models, enhancing the consistency of SDS supervision in terms of view and human pose. This facilitates the generation of high-quality avatars, mitigating issues such as multiple faces, extra limbs, and blurring. The proposed hybrid 3D Gaussian avatar representation builds on the efficient 3D Gaussians, combining neural implicit fields and parameterized 3D meshes to enable real-time rendering, stable SDS optimization, and expressive animation.
Extensive experiments demonstrate that DreamWaltz-G is highly effective in generating and animating 3D avatars, outperforming existing methods in both visual quality and animation expressiveness. Our framework further supports diverse applications, including human video reenactment and multi-subject scene composition.
@article{huang2024dreamwaltz-g,
title={{DreamWaltz-G: Expressive 3D Gaussian Avatars from Skeleton-Guided 2D Diffusion}},
author={Huang, Yukun and Wang, Jianan and Zeng, Ailing and Zha, Zheng-Jun and Zhang, Lei and Liu, Xihui},
year={2024},
eprint={arXiv preprint arXiv:2409.17145},
archivePrefix={arXiv},
primaryClass={cs.CV},
}
@inproceedings{huang2024dreamwaltz,
title={{DreamWaltz: Make a Scene with Complex 3D Animatable Avatars}},
author={Huang, Yukun and Wang, Jianan and Zeng, Ailing and Cao, He and Qi, Xianbiao and Shi, Yukai and Zha, Zheng-Jun and Zhang, Lei},
booktitle={Proceedings of the 37th International Conference on Neural Information Processing Systems},
pages={4566--4584},
year={2023}
}