@Article{Goto:2008:HPI, author = "Kazushige Goto and Robert van de Geijn", title = "High Performance Implementation of the Level-3 {BLAS}", journal = "{ACM} Transactions on Mathematical Software", volume = "35", number = "1", month = jul, year = "2008", pages = "4:1--4:14", URL = "http://doi.acm.org/10.1145/1377603.1377607", abstract = "A simple but highly effective approach for transforming high-performance implementations on cache-based architectures of matrix-matrix multiplication into implementations of other commonly used matrix-matrix computations (the level-3 BLAS) is presented. Exceptional performance is demonstrated on various architectures.", }