--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/components/golang/patches/0037-release-branch.go1.5-runtime-adjust-huge-page-flags-.patch Thu Jan 21 09:20:59 2016 -0800
@@ -0,0 +1,159 @@
+From 244294f821c9256394d29218ee1f17ab4313551f Mon Sep 17 00:00:00 2001
+From: Austin Clements <[email protected]>
+Date: Wed, 30 Sep 2015 11:52:54 -0400
+Subject: [PATCH 37/63] [release-branch.go1.5] runtime: adjust huge page flags
+ only on huge page granularity
+
+This fixes an issue where the runtime panics with "out of memory" or
+"cannot allocate memory" even though there's ample memory by reducing
+the number of memory mappings created by the memory allocator.
+
+Commit 7e1b61c worked around issue #8832 where Linux's transparent
+huge page support could dramatically increase the RSS of a Go process
+by setting the MADV_NOHUGEPAGE flag on any regions of pages released
+to the OS with MADV_DONTNEED. This had the side effect of also
+increasing the number of VMAs (memory mappings) in a Go address space
+because a separate VMA is needed for every region of the virtual
+address space with different flags. Unfortunately, by default, Linux
+limits the number of VMAs in an address space to 65530, and a large
+heap can quickly reach this limit when the runtime starts scavenging
+memory.
+
+This commit dramatically reduces the number of VMAs. It does this
+primarily by only adjusting the huge page flag at huge page
+granularity. With this change, on amd64, even a pessimal heap that
+alternates between MADV_NOHUGEPAGE and MADV_HUGEPAGE must reach 128GB
+to reach the VMA limit. Because of this rounding to huge page
+granularity, this change is also careful to leave large used and
+unused regions huge page-enabled.
+
+This change reduces the maximum number of VMAs during the runtime
+benchmarks with GODEBUG=scavenge=1 from 692 to 49.
+
+Fixes #12233.
+
+Change-Id: Ic397776d042f20d53783a1cacf122e2e2db00584
+Reviewed-on: https://go-review.googlesource.com/15191
+Reviewed-by: Keith Randall <[email protected]>
+Reviewed-on: https://go-review.googlesource.com/16980
+Run-TryBot: Austin Clements <[email protected]>
+Reviewed-by: Ian Lance Taylor <[email protected]>
+Reviewed-by: Russ Cox <[email protected]>
+---
+ src/runtime/mem_linux.go | 94 +++++++++++++++++++++++++++++++++++++++---------
+ 1 file changed, 77 insertions(+), 17 deletions(-)
+
+diff --git a/src/runtime/mem_linux.go b/src/runtime/mem_linux.go
+index f988e75..e8c8999 100644
+--- a/src/runtime/mem_linux.go
++++ b/src/runtime/mem_linux.go
+@@ -69,29 +69,89 @@ func sysAlloc(n uintptr, sysStat *uint64) unsafe.Pointer {
+ }
+
+ func sysUnused(v unsafe.Pointer, n uintptr) {
+- var s uintptr = hugePageSize // division by constant 0 is a compile-time error :(
+- if s != 0 && (uintptr(v)%s != 0 || n%s != 0) {
+- // See issue 8832
+- // Linux kernel bug: https://bugzilla.kernel.org/show_bug.cgi?id=93111
+- // Mark the region as NOHUGEPAGE so the kernel's khugepaged
+- // doesn't undo our DONTNEED request. khugepaged likes to migrate
+- // regions which are only partially mapped to huge pages, including
+- // regions with some DONTNEED marks. That needlessly allocates physical
+- // memory for our DONTNEED regions.
+- madvise(v, n, _MADV_NOHUGEPAGE)
++ // By default, Linux's "transparent huge page" support will
++ // merge pages into a huge page if there's even a single
++ // present regular page, undoing the effects of the DONTNEED
++ // below. On amd64, that means khugepaged can turn a single
++ // 4KB page to 2MB, bloating the process's RSS by as much as
++ // 512X. (See issue #8832 and Linux kernel bug
++ // https://bugzilla.kernel.org/show_bug.cgi?id=93111)
++ //
++ // To work around this, we explicitly disable transparent huge
++ // pages when we release pages of the heap. However, we have
++ // to do this carefully because changing this flag tends to
++ // split the VMA (memory mapping) containing v in to three
++ // VMAs in order to track the different values of the
++ // MADV_NOHUGEPAGE flag in the different regions. There's a
++ // default limit of 65530 VMAs per address space (sysctl
++ // vm.max_map_count), so we must be careful not to create too
++ // many VMAs (see issue #12233).
++ //
++ // Since huge pages are huge, there's little use in adjusting
++ // the MADV_NOHUGEPAGE flag on a fine granularity, so we avoid
++ // exploding the number of VMAs by only adjusting the
++ // MADV_NOHUGEPAGE flag on a large granularity. This still
++ // gets most of the benefit of huge pages while keeping the
++ // number of VMAs under control. With hugePageSize = 2MB, even
++ // a pessimal heap can reach 128GB before running out of VMAs.
++ if hugePageSize != 0 {
++ var s uintptr = hugePageSize // division by constant 0 is a compile-time error :(
++
++ // If it's a large allocation, we want to leave huge
++ // pages enabled. Hence, we only adjust the huge page
++ // flag on the huge pages containing v and v+n-1, and
++ // only if those aren't aligned.
++ var head, tail uintptr
++ if uintptr(v)%s != 0 {
++ // Compute huge page containing v.
++ head = uintptr(v) &^ (s - 1)
++ }
++ if (uintptr(v)+n)%s != 0 {
++ // Compute huge page containing v+n-1.
++ tail = (uintptr(v) + n - 1) &^ (s - 1)
++ }
++
++ // Note that madvise will return EINVAL if the flag is
++ // already set, which is quite likely. We ignore
++ // errors.
++ if head != 0 && head+hugePageSize == tail {
++ // head and tail are different but adjacent,
++ // so do this in one call.
++ madvise(unsafe.Pointer(head), 2*hugePageSize, _MADV_NOHUGEPAGE)
++ } else {
++ // Advise the huge pages containing v and v+n-1.
++ if head != 0 {
++ madvise(unsafe.Pointer(head), hugePageSize, _MADV_NOHUGEPAGE)
++ }
++ if tail != 0 && tail != head {
++ madvise(unsafe.Pointer(tail), hugePageSize, _MADV_NOHUGEPAGE)
++ }
++ }
+ }
++
+ madvise(v, n, _MADV_DONTNEED)
+ }
+
+ func sysUsed(v unsafe.Pointer, n uintptr) {
+ if hugePageSize != 0 {
+- // Undo the NOHUGEPAGE marks from sysUnused. There is no alignment check
+- // around this call as spans may have been merged in the interim.
+- // Note that this might enable huge pages for regions which were
+- // previously disabled. Unfortunately there is no easy way to detect
+- // what the previous state was, and in any case we probably want huge
+- // pages to back our heap if the kernel can arrange that.
+- madvise(v, n, _MADV_HUGEPAGE)
++ // Partially undo the NOHUGEPAGE marks from sysUnused
++ // for whole huge pages between v and v+n. This may
++ // leave huge pages off at the end points v and v+n
++ // even though allocations may cover these entire huge
++ // pages. We could detect this and undo NOHUGEPAGE on
++ // the end points as well, but it's probably not worth
++ // the cost because when neighboring allocations are
++ // freed sysUnused will just set NOHUGEPAGE again.
++ var s uintptr = hugePageSize
++
++ // Round v up to a huge page boundary.
++ beg := (uintptr(v) + (s - 1)) &^ (s - 1)
++ // Round v+n down to a huge page boundary.
++ end := (uintptr(v) + n) &^ (s - 1)
++
++ if beg < end {
++ madvise(unsafe.Pointer(beg), end-beg, _MADV_HUGEPAGE)
++ }
+ }
+ }
+
+--
+2.6.1
+