@article{journals/corr/abs-2307-04964,
added-at = {2024-05-07T00:00:00.000+0200},
author = {Zheng, Rui and Dou, Shihan and Gao, Songyang and Hua, Yuan and Shen, Wei and Wang, Binghai and Liu, Yan and Jin, Senjie and Liu, Qin and Zhou, Yuhao and Xiong, Limao and Chen, Lu and Xi, Zhiheng and Xu, Nuo and Lai, Wenbin and Zhu, Minghao and Chang, Cheng and Yin, Zhangyue and Weng, Rongxiang and Cheng, Wensen and Huang, Haoran and Sun, Tianxiang and Yan, Hang and Gui, Tao and Zhang, Qi and Qiu, Xipeng and Huang, Xuanjing},
biburl = {https://www.bibsonomy.org/bibtex/250073a83934f9b2ad5e695c4aacba75c/dblp},
ee = {https://doi.org/10.48550/arXiv.2307.04964},
interhash = {528691cd341230b3090e52f60b5c3027},
intrahash = {50073a83934f9b2ad5e695c4aacba75c},
journal = {CoRR},
keywords = {dblp},
timestamp = {2024-05-13T07:36:16.000+0200},
title = {Secrets of RLHF in Large Language Models Part I: PPO.},
url = {http://dblp.uni-trier.de/db/journals/corr/corr2307.html#abs-2307-04964},
volume = {abs/2307.04964},
year = 2023
}