{"total":10,"query":{"q":null,"venue":null,"year":null},"results":[{"id":"attention-2017","title":"Attention Is All You Need","authors":["Vaswani","Shazeer","Parmar","Uszkoreit","Jones","Gomez","Kaiser","Polosukhin"],"venue":"NeurIPS 2017","year":2017,"citations":120000,"tags":["transformer","attention"],"abstract":"We propose a new simple network architecture, the Transformer, based solely on attention mechanisms."},{"id":"scaling-laws-2020","title":"Scaling Laws for Neural Language Models","authors":["Kaplan","McCandlish","Henighan","Brown","Chess","Child"],"venue":"arXiv 2020","year":2020,"citations":4800,"tags":["scaling","LLM","compute"],"abstract":"We study empirical scaling laws for language model performance on the cross-entropy loss."},{"id":"rlhf-2022","title":"Training language models to follow instructions with human feedback","authors":["Ouyang","Wu","Jiang","Almeida","Wainwright","Mishkin"],"venue":"NeurIPS 2022","year":2022,"citations":9100,"tags":["RLHF","instruction-tuning","OpenAI"],"abstract":"We show an avenue for aligning language models with user intent by fine-tuning with human feedback."},{"id":"constitutional-ai","title":"Constitutional AI: Harmlessness from AI Feedback","authors":["Bai","Jones","Ndousse","Askell","Chen"],"venue":"arXiv 2022","year":2022,"citations":2100,"tags":["safety","alignment","Anthropic"],"abstract":"We propose a method for training AI systems to be helpful, harmless, and honest using AI-generated feedback."},{"id":"lora-2021","title":"LoRA: Low-Rank Adaptation of Large Language Models","authors":["Hu","Shen","Wallis","Allen-Zhu","Li","Wang"],"venue":"ICLR 2022","year":2021,"citations":9100,"tags":["fine-tuning","LoRA","efficiency"],"abstract":"We propose Low-Rank Adaptation for adapting large pre-trained language models to downstream tasks."},{"id":"llama-3-2024","title":"The Llama 3 Herd of Models","authors":["Meta AI Research"],"venue":"arXiv 2024","year":2024,"citations":3400,"tags":["LLM","Meta","open-source"],"abstract":"We present Llama 3, a family of large language models trained with up to 405B parameters on 15T tokens."},{"id":"applebot-analysis","title":"Who Is Crawling Your Website? Large-Scale Analysis of AI Bot Traffic 2022-2024","authors":["Web Observatory Lab"],"venue":"FAccT 2024","year":2024,"citations":670,"tags":["web-crawling","Applebot","GPTBot","AI-training-data"],"abstract":"A longitudinal analysis of AI crawler traffic across 8,400 domains. Applebot grew 840% between Q3 2023 and Q4 2024."},{"id":"data-provenance","title":"The Data Provenance Initiative: A Large Scale Audit of Dataset Licensing","authors":["Longpre","Mahari","Chen","Obeng-Marnu"],"venue":"ICML 2024","year":2024,"citations":1100,"tags":["datasets","licensing","copyright"],"abstract":"We audit the provenance of 1,800 fine-tuning datasets covering 3,700 text sources."},{"id":"deepseek-r1-paper","title":"DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning","authors":["DeepSeek AI"],"venue":"arXiv 2025","year":2025,"citations":890,"tags":["reasoning","RL","chain-of-thought"],"abstract":"We introduce DeepSeek-R1, our first-generation reasoning model trained via large-scale reinforcement learning."},{"id":"fineweb-2024","title":"FineWeb: Decanting the Web for the Finest Text Data at Scale","authors":["Penedo","Kydlicek","allal","Lozhkov","Mitchell"],"venue":"NeurIPS 2024","year":2024,"citations":560,"tags":["web-data","pretraining","Common Crawl"],"abstract":"We introduce FineWeb, a 15-trillion token dataset derived from 96 Common Crawl snapshots."}]}