ghc-8.0.1-ww-args-limit.patch 5.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128
  1. commit 5efbf0d243984444cf352ad6f0d147e226c64498
  2. Author: Sergei Trofimovich <slyfox@gentoo.org>
  3. Date: Thu Sep 1 17:34:58 2016 +0100
  4. restore -fmax-worker-args handling (Trac #11565)
  5. maxWorkerArgs handling was accidentally lost 3 years ago
  6. in a major update of demand analysis
  7. commit 0831a12ea2fc73c33652eeec1adc79fa19700578
  8. Old regression is noticeable as:
  9. - code bloat (requires stack reshuffling)
  10. - compilation slowdown (more code to optimise/generate)
  11. - and increased heap usage (DynFlags unboxing/reboxing?)
  12. On a simple compile benchmark this change causes heap
  13. allocation drop from 70G don to 67G (ghc perf build).
  14. Signed-off-by: Sergei Trofimovich <siarheit@google.com>
  15. Reviewers: simonpj, ezyang, goldfire, austin, bgamari
  16. Reviewed By: simonpj, ezyang
  17. Subscribers: thomie
  18. Differential Revision: https://phabricator.haskell.org/D2503
  19. GHC Trac Issues: #11565
  20. diff --git a/compiler/specialise/SpecConstr.hs b/compiler/specialise/SpecConstr.hs
  21. index 10d5614..7166f57 100644
  22. --- a/compiler/specialise/SpecConstr.hs
  23. +++ b/compiler/specialise/SpecConstr.hs
  24. @@ -29,7 +29,7 @@ import CoreFVs ( exprsFreeVarsList )
  25. import CoreMonad
  26. import Literal ( litIsLifted )
  27. import HscTypes ( ModGuts(..) )
  28. -import WwLib ( mkWorkerArgs )
  29. +import WwLib ( isWorkerSmallEnough, mkWorkerArgs )
  30. import DataCon
  31. import Coercion hiding( substCo )
  32. import Rules
  33. @@ -1533,10 +1533,14 @@ specialise env bind_calls (RI { ri_fn = fn, ri_lam_bndrs = arg_bndrs
  34. | Just all_calls <- lookupVarEnv bind_calls fn
  35. = -- pprTrace "specialise entry {" (ppr fn <+> ppr (length all_calls)) $
  36. - do { (boring_call, pats) <- callsToPats env specs arg_occs all_calls
  37. -
  38. + do { (boring_call, all_pats) <- callsToPats env specs arg_occs all_calls
  39. -- Bale out if too many specialisations
  40. - ; let n_pats = length pats
  41. + ; let pats = filter (is_small_enough . fst) all_pats
  42. + is_small_enough vars = isWorkerSmallEnough (sc_dflags env) vars
  43. + -- We are about to construct w/w pair in 'spec_one'.
  44. + -- Omit specialisation leading to high arity workers.
  45. + -- See Note [Limit w/w arity]
  46. + n_pats = length pats
  47. spec_count' = n_pats + spec_count
  48. ; case sc_count env of
  49. Just max | not (sc_force env) && spec_count' > max
  50. diff --git a/compiler/stranal/WwLib.hs b/compiler/stranal/WwLib.hs
  51. index 09bc204..d9460d9 100644
  52. --- a/compiler/stranal/WwLib.hs
  53. +++ b/compiler/stranal/WwLib.hs
  54. @@ -8,6 +8,7 @@
  55. module WwLib ( mkWwBodies, mkWWstr, mkWorkerArgs
  56. , deepSplitProductType_maybe, findTypeShape
  57. + , isWorkerSmallEnough
  58. ) where
  59. #include "HsVersions.h"
  60. @@ -144,7 +145,8 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
  61. wrapper_body = wrap_fn_args . wrap_fn_cpr . wrap_fn_str . applyToVars work_call_args . Var
  62. worker_body = mkLams work_lam_args. work_fn_str . work_fn_cpr . work_fn_args
  63. - ; if useful1 && not (only_one_void_argument) || useful2
  64. + ; if isWorkerSmallEnough dflags work_args
  65. + && (useful1 && not only_one_void_argument || useful2)
  66. then return (Just (worker_args_dmds, wrapper_body, worker_body))
  67. else return Nothing
  68. }
  69. @@ -165,6 +167,12 @@ mkWwBodies dflags fam_envs fun_ty demands res_info one_shots
  70. | otherwise
  71. = False
  72. +-- See Note [Limit w/w arity]
  73. +isWorkerSmallEnough :: DynFlags -> [Var] -> Bool
  74. +isWorkerSmallEnough dflags vars = count isId vars <= maxWorkerArgs dflags
  75. + -- We count only Free variables (isId) to skip Type, Kind
  76. + -- variables which have no runtime representation.
  77. +
  78. {-
  79. Note [Always do CPR w/w]
  80. ~~~~~~~~~~~~~~~~~~~~~~~~
  81. @@ -178,6 +186,30 @@ a disaster, because then the enclosing function might say it has the CPR
  82. property, but now doesn't and there a cascade of disaster. A good example
  83. is Trac #5920.
  84. +Note [Limit w/w arity]
  85. +~~~~~~~~~~~~~~~~~~~~~~~~
  86. +Guard against high worker arity as it generates a lot of stack traffic.
  87. +A simplified example is Trac #11565#comment:6
  88. +
  89. +Current strategy is very simple: don't perform w/w transformation at all
  90. +if the result produces a wrapper with arity higher than -fmax-worker-args=.
  91. +
  92. +It is a bit all or nothing, consider
  93. +
  94. + f (x,y) (a,b,c,d,e ... , z) = rhs
  95. +
  96. +Currently we will remove all w/w ness entirely. But actually we could
  97. +w/w on the (x,y) pair... it's the huge product that is the problem.
  98. +
  99. +Could we instead refrain from w/w on an arg-by-arg basis? Yes, that'd
  100. +solve f. But we can get a lot of args from deeply-nested products:
  101. +
  102. + g (a, (b, (c, (d, ...)))) = rhs
  103. +
  104. +This is harder to spot on an arg-by-arg basis. Previously mkWwStr was
  105. +given some "fuel" saying how many arguments it could add; when we ran
  106. +out of fuel it would stop w/wing.
  107. +Still not very clever because it had a left-right bias.
  108. ************************************************************************
  109. * *