@article{ren2024grounding,title={Grounding DINO 1.5: Advance the "Edge" of Open-Set Object Detection},author={Ren*, Tianhe and Jiang*, Qing and Liu*, Shilong and Zeng*, Zhaoyang and Liu, Wenlong and Gao, Han and Huang, Hongjie and Ma, Zhengyu and Jiang, Xiaoke and Chen, Yihao and Xiong, Yuda and Zhang, Hao and Li, Feng and Tang, Peijun and Yu, Kent and Zhang, Lei},year={2024},journal={arXiv:2405.10300},eprint={2405.10300},archiveprefix={arXiv},primaryclass={cs.CV},}
LLaVA-Plus: Learning to Use Tools for Creating Multimodal Agents
Shilong Liu, Hao Cheng, Haotian Liu, and 10 more authors
To be shown in ECCV, 2024
Equip multimodal large language models with tools to create multimodal agents.
@article{liu2023grounding,title={LLaVA-Plus: Learning to Use Tools for Creating Multimodal Agents},author={Liu, Shilong and Cheng, Hao and Liu, Haotian and Zhang, Hao and Li, Feng and Ren, Tianhe and Zou, Xueyan and Yang, Jianwei and Su, Hang and Zhu, Jun and Zhang, Lei and Gao, Jianfeng and Li, Chunyuan},journal={To be shown in ECCV},year={2024},codebadge={https://img.shields.io/github/stars/LLaVA-VL/LLaVA-Plus-Codebase},}
2023
Grounding DINO: Marrying dino with grounded pre-training for open-set object detection
Shilong Liu, Zhaoyang Zeng, Tianhe Ren, and 8 more authors
To be shown in ECCV, 2023
SOTA open-set object detector. 52.5AP on COCO without COCO training data!
@article{liu2023groundinh,title={Grounding {DINO}: Marrying dino with grounded pre-training for open-set object detection},author={Liu, Shilong and Zeng, Zhaoyang and Ren, Tianhe and Li, Feng and Zhang, Hao and Yang, Jie and Li, Chunyuan and Yang, Jianwei and Su, Hang and Zhu, Jun and others},journal={To be shown in ECCV},year={2023},codebadge={https://img.shields.io/github/stars/IDEA-Research/GroundingDINO,https://img.shields.io/github/stars/IDEA-Research/Grounded-Segment-Anything}}
Mask DINO: Towards A Unified Transformer-based Framework for Object Detection and Segmentation
Feng Li, Hao Zhang, Huaizhe Xu, and 4 more authors
In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2023
@inproceedings{FengLi2023MaskDT,title={Mask DINO: Towards A Unified Transformer-based Framework for Object Detection and Segmentation},author={Li, Feng and Zhang, Hao and Xu, Huaizhe and Liu, Shilong and Zhang, Lei and Ni, Lionel M and Shum, Heung-Yeung},year={2023},booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},}
DQ-DETR: Dual Query Detection Transformer for Phrase Extraction and Grounding
Liu Shilong, Liang Yaoyuan, Huang Shijia, and 5 more authors
In Proceedings of the AAAI Conference on Artificial Intelligence, 2023
A comparison of object detection, REC, and phrase grounding tasks.
@inproceedings{dqdetr,title={{DQ-DETR}: Dual Query Detection Transformer for Phrase Extraction and Grounding},author={Shilong, Liu and Yaoyuan, Liang and Shijia, Huang and Feng, Li and Hao, Zhang and Hang, Su and Jun, Zhu and Lei, Zhang},booktitle={Proceedings of the AAAI Conference on Artificial Intelligence},journal={AAAI},year={2023},}
DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection
Hao Zhang*, Feng Li*, Shilong Liu*, and 5 more authors
In International Conference on Learning Representations, 2023
The first DETR-based object detector that achieved 1st on the COCO detection leaderboard.
@inproceedings{zhang2022dino,title={DINO: DETR with Improved DeNoising Anchor Boxes for End-to-End Object Detection},author={Zhang*, Hao and Li*, Feng and Liu*, Shilong and Zhang, Lei and Su, Hang and Zhu, Jun and Ni, Lionel M. and Shum, Heung-Yeung},booktitle={International Conference on Learning Representations},year={2023},codebadge={https://img.shields.io/github/stars/IDEA-Research/DINO}}
2022
DN-DETR: Accelerate detr training by introducing query denoising
Feng Li*, Hao Zhang*, Shilong Liu, and 3 more authors
In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition, 2022
A novel denoising training strategy for DETR, achieving faster convergence and better performance.
@inproceedings{li2022dn,title={DN-DETR: Accelerate detr training by introducing query denoising},author={Li*, Feng and Zhang*, Hao and Liu, Shilong and Guo, Jian and Ni, Lionel M and Zhang, Lei},booktitle={Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition},pages={13619--13627},year={2022},}
DAB-DETR: Dynamic Anchor Boxes are Better Queries for DETR
Shilong Liu, Feng Li, Hao Zhang, and 5 more authors
In International Conference on Learning Representations, 2022
A deep understanding of DETR’s query, and formulating queries as anchor boxes.
@inproceedings{liu2022dabdetr,title={{DAB}-{DETR}: Dynamic Anchor Boxes are Better Queries for {DETR}},author={Liu, Shilong and Li, Feng and Zhang, Hao and Yang, Xiao and Qi, Xianbiao and Su, Hang and Zhu, Jun and Zhang, Lei},booktitle={International Conference on Learning Representations},year={2022},url={https://openreview.net/forum?id=oMI9PjOb9Jl},codebadge={https://img.shields.io/github/stars/IDEA-Research/DAB-DETR}}
2021
Query2Label: A Simple Transformer Way to Multi-Label Classification
Shilong Liu, Lei Zhang, Xiao Yang, and 2 more authors
arXiv:2107.10834, 2021
A novel transformer-based multi-label classification model, achieving SOTA on four benchmarks.
@article{liu2021query2label,title={Query2Label: A Simple Transformer Way to Multi-Label Classification},author={Liu, Shilong and Zhang, Lei and Yang, Xiao and Su, Hang and Zhu, Jun},year={2021},journal={arXiv:2107.10834},eprint={2107.10834},archiveprefix={arXiv},primaryclass={cs.CV},codebadge={https://img.shields.io/github/stars/SlongLiu/query2labels}}