broadwell

The plots show the relative difference in runtime (LoopVectorization.jl - libxsmm) / libxsmm for every (m, n, k) triplet. Negative / red values are better for LoopVectorization.jl, positive / blue values are better for libxsmm.

1

Q₁ = -0.176. Q₂ = -0.072. Q₃ = 0.690

2

Q₁ = -0.152. Q₂ = 0.154. Q₃ = 0.835

3

Q₁ = -0.368. Q₂ = -0.288. Q₃ = 0.034

4

Q₁ = -0.171. Q₂ = -0.106. Q₃ = -0.061

5

Q₁ = 0.001. Q₂ = 0.214. Q₃ = 0.460

6

Q₁ = -0.235. Q₂ = -0.045. Q₃ = 0.301

7

Q₁ = -0.497. Q₂ = -0.459. Q₃ = -0.349

8

Q₁ = -0.135. Q₂ = -0.093. Q₃ = -0.056

9

Q₁ = -0.121. Q₂ = -0.045. Q₃ = 0.234

10

Q₁ = -0.149. Q₂ = -0.110. Q₃ = -0.016

11

Q₁ = -0.351. Q₂ = -0.307. Q₃ = -0.219

12

Q₁ = -0.028. Q₂ = 0.025. Q₃ = 0.045

13

Q₁ = -0.097. Q₂ = -0.057. Q₃ = 0.023

14

Q₁ = -0.096. Q₂ = -0.060. Q₃ = 0.014

15

Q₁ = -0.295. Q₂ = -0.246. Q₃ = -0.168

16

Q₁ = 0.072. Q₂ = 0.126. Q₃ = 0.159

17

Q₁ = -0.002. Q₂ = 0.050. Q₃ = 0.109

18

Q₁ = -0.004. Q₂ = 0.033. Q₃ = 0.085

19

Q₁ = -0.199. Q₂ = -0.156. Q₃ = -0.097

20

Q₁ = -0.004. Q₂ = 0.028. Q₃ = 0.067

21

Q₁ = -0.132. Q₂ = -0.089. Q₃ = -0.015

22

Q₁ = -0.130. Q₂ = -0.088. Q₃ = -0.015

23

Q₁ = -0.282. Q₂ = -0.232. Q₃ = -0.153

24

Q₁ = -0.028. Q₂ = 0.006. Q₃ = 0.031

25

Q₁ = -0.061. Q₂ = -0.022. Q₃ = 0.027

26

Q₁ = -0.061. Q₂ = -0.021. Q₃ = 0.027

27

Q₁ = -0.201. Q₂ = -0.155. Q₃ = -0.089

28

Q₁ = 0.080. Q₂ = 0.132. Q₃ = 0.158

29

Q₁ = -0.046. Q₂ = -0.012. Q₃ = 0.037

30

Q₁ = -0.038. Q₂ = -0.004. Q₃ = 0.042

31

Q₁ = -0.177. Q₂ = -0.135. Q₃ = -0.077

32

Q₁ = 0.054. Q₂ = 0.094. Q₃ = 0.122