@conference {Ke2020, title = {RecNMP: Accelerating Personalized Recommendation with Near-Memory Processing}, year = {2020}, publisher = {The 47th IEEE/ACM International Symposium on Computer Architecture (ISCA 2020)}, organization = {The 47th IEEE/ACM International Symposium on Computer Architecture (ISCA 2020)}, abstract = {Personalized recommendation systems leverage deep learning models and account for the majority of data center AI cycles. Their performance is dominated by memory-bound sparse embedding operations with unique irregular memory access patterns that pose a fundamental challenge to accelerate. This paper proposes a lightweight, commodity DRAM compliant, near-memory processing solution to accelerate personalized recommendation inference. The in-depth characterization of production-grade recommendation models shows that embedding operations with high model-, operator- and data-level parallelism lead to memory bandwidth saturation, limiting recommendation inference performance. We propose RecNMP which provides a scalable solution to improve system throughput, supporting a broad range of sparse embedding models. RecNMP is specifically tailored to production environments with heavy co-location of operators on a single server. Several hardware/software co-optimization techniques such as memory-side caching, table-aware packet scheduling, and hot entry profiling are studied, resulting in up to 9.8x memory latency speedup over a highly-optimized baseline. Overall, RecNMP offers 4.2x throughput improvement and 45.8\% memory energy savings.}, keywords = {accelerators, deep learning, neural network accelerators, Recommendation}, url = {https://doi.org/10.48550/arXiv.1912.12953 }, author = {Liu Ke and Udit Gupta and Carole-Jean Wu and Benjamin Cho and Mark Hempstead and Brandon Reagen and Xuan Zhang and David Brooks and Vikas Chandra and Utku Diril and Amin Firoozshahian and Kim Hazelwood and Bill Jia and Hsien-Hsin Lee and Meng Li and Bert Maher and Dheevatsa Mudigere and Maxim Naumov and Martin Schatz and Mikhail Smelyanskiy and Xiaodong Wang} }