1 /*
2 * Copyright (c) 2015, 2020, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 */
23
24 #include "precompiled.hpp"
25 #include "gc/z/zArray.inline.hpp"
26 #include "gc/z/zErrno.hpp"
27 #include "gc/z/zGlobals.hpp"
28 #include "gc/z/zLargePages.inline.hpp"
29 #include "gc/z/zMountPoint_linux.hpp"
30 #include "gc/z/zNUMA.inline.hpp"
31 #include "gc/z/zPhysicalMemoryBacking_linux.hpp"
32 #include "gc/z/zSyscall_linux.hpp"
33 #include "logging/log.hpp"
34 #include "runtime/init.hpp"
35 #include "runtime/os.hpp"
36 #include "utilities/align.hpp"
37 #include "utilities/debug.hpp"
38 #include "utilities/growableArray.hpp"
39
40 #include <fcntl.h>
41 #include <stdio.h>
42 #include <sys/mman.h>
43 #include <sys/stat.h>
44 #include <sys/statfs.h>
45 #include <sys/types.h>
46 #include <unistd.h>
47
48 //
49 // Support for building on older Linux systems
50 //
51
52 // memfd_create(2) flags
53 #ifndef MFD_CLOEXEC
54 #define MFD_CLOEXEC 0x0001U
55 #endif
56 #ifndef MFD_HUGETLB
57 #define MFD_HUGETLB 0x0004U
58 #endif
59
60 // open(2) flags
61 #ifndef O_CLOEXEC
62 #define O_CLOEXEC 02000000
63 #endif
64 #ifndef O_TMPFILE
65 #define O_TMPFILE (020000000 | O_DIRECTORY)
66 #endif
67
68 // fallocate(2) flags
69 #ifndef FALLOC_FL_KEEP_SIZE
70 #define FALLOC_FL_KEEP_SIZE 0x01
71 #endif
72 #ifndef FALLOC_FL_PUNCH_HOLE
73 #define FALLOC_FL_PUNCH_HOLE 0x02
74 #endif
75
76 // Filesystem types, see statfs(2)
77 #ifndef TMPFS_MAGIC
78 #define TMPFS_MAGIC 0x01021994
79 #endif
80 #ifndef HUGETLBFS_MAGIC
81 #define HUGETLBFS_MAGIC 0x958458f6
82 #endif
83
84 // Filesystem names
85 #define ZFILESYSTEM_TMPFS "tmpfs"
86 #define ZFILESYSTEM_HUGETLBFS "hugetlbfs"
87
88 // Proc file entry for max map mount
89 #define ZFILENAME_PROC_MAX_MAP_COUNT "/proc/sys/vm/max_map_count"
90
91 // Sysfs file for transparent huge page on tmpfs
92 #define ZFILENAME_SHMEM_ENABLED "/sys/kernel/mm/transparent_hugepage/shmem_enabled"
93
94 // Java heap filename
95 #define ZFILENAME_HEAP "java_heap"
96
97 // Preferred tmpfs mount points, ordered by priority
98 static const char* z_preferred_tmpfs_mountpoints[] = {
99 "/dev/shm",
100 "/run/shm",
101 NULL
102 };
103
104 // Preferred hugetlbfs mount points, ordered by priority
105 static const char* z_preferred_hugetlbfs_mountpoints[] = {
106 "/dev/hugepages",
107 "/hugepages",
108 NULL
109 };
110
111 static int z_fallocate_hugetlbfs_attempts = 3;
112 static bool z_fallocate_supported = true;
113
114 ZPhysicalMemoryBacking::ZPhysicalMemoryBacking() :
115 _fd(-1),
116 _size(0),
117 _filesystem(0),
118 _block_size(0),
119 _available(0),
120 _initialized(false) {
121
122 // Create backing file
123 _fd = create_fd(ZFILENAME_HEAP);
124 if (_fd == -1) {
125 return;
126 }
127
128 // Get filesystem statistics
129 struct statfs buf;
130 if (fstatfs(_fd, &buf) == -1) {
131 ZErrno err;
132 log_error(gc)("Failed to determine filesystem type for backing file (%s)", err.to_string());
133 return;
134 }
135
136 _filesystem = buf.f_type;
137 _block_size = buf.f_bsize;
138 _available = buf.f_bavail * _block_size;
139
140 // Make sure we're on a supported filesystem
141 if (!is_tmpfs() && !is_hugetlbfs()) {
142 log_error(gc)("Backing file must be located on a %s or a %s filesystem",
143 ZFILESYSTEM_TMPFS, ZFILESYSTEM_HUGETLBFS);
144 return;
145 }
146
147 // Make sure the filesystem type matches requested large page type
148 if (ZLargePages::is_transparent() && !is_tmpfs()) {
149 log_error(gc)("-XX:+UseTransparentHugePages can only be enable when using a %s filesystem",
150 ZFILESYSTEM_TMPFS);
151 return;
152 }
153
154 if (ZLargePages::is_transparent() && !tmpfs_supports_transparent_huge_pages()) {
155 log_error(gc)("-XX:+UseTransparentHugePages on a %s filesystem not supported by kernel",
156 ZFILESYSTEM_TMPFS);
157 return;
158 }
159
160 if (ZLargePages::is_explicit() && !is_hugetlbfs()) {
161 log_error(gc)("-XX:+UseLargePages (without -XX:+UseTransparentHugePages) can only be enabled "
162 "when using a %s filesystem", ZFILESYSTEM_HUGETLBFS);
163 return;
164 }
165
166 if (!ZLargePages::is_explicit() && is_hugetlbfs()) {
167 log_error(gc)("-XX:+UseLargePages must be enabled when using a %s filesystem",
168 ZFILESYSTEM_HUGETLBFS);
169 return;
170 }
171
172 const size_t expected_block_size = is_tmpfs() ? os::vm_page_size() : os::large_page_size();
173 if (expected_block_size != _block_size) {
174 log_error(gc)("%s filesystem has unexpected block size " SIZE_FORMAT " (expected " SIZE_FORMAT ")",
175 is_tmpfs() ? ZFILESYSTEM_TMPFS : ZFILESYSTEM_HUGETLBFS, _block_size, expected_block_size);
176 return;
177 }
178
179 // Successfully initialized
180 _initialized = true;
181 }
182
183 int ZPhysicalMemoryBacking::create_mem_fd(const char* name) const {
184 // Create file name
185 char filename[PATH_MAX];
186 snprintf(filename, sizeof(filename), "%s%s", name, ZLargePages::is_explicit() ? ".hugetlb" : "");
187
188 // Create file
189 const int extra_flags = ZLargePages::is_explicit() ? MFD_HUGETLB : 0;
190 const int fd = ZSyscall::memfd_create(filename, MFD_CLOEXEC | extra_flags);
191 if (fd == -1) {
192 ZErrno err;
193 log_debug(gc, init)("Failed to create memfd file (%s)",
194 ((ZLargePages::is_explicit() && err == EINVAL) ? "Hugepages not supported" : err.to_string()));
195 return -1;
196 }
197
198 log_info(gc, init)("Heap backed by file: /memfd:%s", filename);
199
200 return fd;
201 }
202
203 int ZPhysicalMemoryBacking::create_file_fd(const char* name) const {
204 const char* const filesystem = ZLargePages::is_explicit()
205 ? ZFILESYSTEM_HUGETLBFS
206 : ZFILESYSTEM_TMPFS;
207 const char** const preferred_mountpoints = ZLargePages::is_explicit()
208 ? z_preferred_hugetlbfs_mountpoints
209 : z_preferred_tmpfs_mountpoints;
210
211 // Find mountpoint
212 ZMountPoint mountpoint(filesystem, preferred_mountpoints);
213 if (mountpoint.get() == NULL) {
214 log_error(gc)("Use -XX:AllocateHeapAt to specify the path to a %s filesystem", filesystem);
215 return -1;
216 }
217
218 // Try to create an anonymous file using the O_TMPFILE flag. Note that this
219 // flag requires kernel >= 3.11. If this fails we fall back to open/unlink.
220 const int fd_anon = os::open(mountpoint.get(), O_TMPFILE|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
221 if (fd_anon == -1) {
222 ZErrno err;
223 log_debug(gc, init)("Failed to create anonymous file in %s (%s)", mountpoint.get(),
224 (err == EINVAL ? "Not supported" : err.to_string()));
225 } else {
226 // Get inode number for anonymous file
227 struct stat stat_buf;
228 if (fstat(fd_anon, &stat_buf) == -1) {
229 ZErrno err;
230 log_error(gc)("Failed to determine inode number for anonymous file (%s)", err.to_string());
231 return -1;
232 }
233
234 log_info(gc, init)("Heap backed by file: %s/#" UINT64_FORMAT, mountpoint.get(), (uint64_t)stat_buf.st_ino);
235
236 return fd_anon;
237 }
238
239 log_debug(gc, init)("Falling back to open/unlink");
240
241 // Create file name
242 char filename[PATH_MAX];
243 snprintf(filename, sizeof(filename), "%s/%s.%d", mountpoint.get(), name, os::current_process_id());
244
245 // Create file
246 const int fd = os::open(filename, O_CREAT|O_EXCL|O_RDWR|O_CLOEXEC, S_IRUSR|S_IWUSR);
247 if (fd == -1) {
248 ZErrno err;
249 log_error(gc)("Failed to create file %s (%s)", filename, err.to_string());
250 return -1;
251 }
252
253 // Unlink file
254 if (unlink(filename) == -1) {
255 ZErrno err;
256 log_error(gc)("Failed to unlink file %s (%s)", filename, err.to_string());
257 return -1;
258 }
259
260 log_info(gc, init)("Heap backed by file: %s", filename);
261
262 return fd;
263 }
264
265 int ZPhysicalMemoryBacking::create_fd(const char* name) const {
266 if (AllocateHeapAt == NULL) {
267 // If the path is not explicitly specified, then we first try to create a memfd file
268 // instead of looking for a tmpfd/hugetlbfs mount point. Note that memfd_create() might
269 // not be supported at all (requires kernel >= 3.17), or it might not support large
270 // pages (requires kernel >= 4.14). If memfd_create() fails, then we try to create a
271 // file on an accessible tmpfs or hugetlbfs mount point.
272 const int fd = create_mem_fd(name);
273 if (fd != -1) {
274 return fd;
275 }
276
277 log_debug(gc, init)("Falling back to searching for an accessible mount point");
278 }
279
280 return create_file_fd(name);
281 }
282
283 bool ZPhysicalMemoryBacking::is_initialized() const {
284 return _initialized;
285 }
286
287 void ZPhysicalMemoryBacking::warn_available_space(size_t max) const {
288 // Note that the available space on a tmpfs or a hugetlbfs filesystem
289 // will be zero if no size limit was specified when it was mounted.
290 if (_available == 0) {
291 // No size limit set, skip check
292 log_info(gc, init)("Available space on backing filesystem: N/A");
293 return;
294 }
295
296 log_info(gc, init)("Available space on backing filesystem: " SIZE_FORMAT "M", _available / M);
297
298 // Warn if the filesystem doesn't currently have enough space available to hold
299 // the max heap size. The max heap size will be capped if we later hit this limit
300 // when trying to expand the heap.
301 if (_available < max) {
302 log_warning(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****");
303 log_warning(gc)("Not enough space available on the backing filesystem to hold the current max Java heap");
304 log_warning(gc)("size (" SIZE_FORMAT "M). Please adjust the size of the backing filesystem accordingly "
305 "(available", max / M);
306 log_warning(gc)("space is currently " SIZE_FORMAT "M). Continuing execution with the current filesystem "
307 "size could", _available / M);
308 log_warning(gc)("lead to a premature OutOfMemoryError being thrown, due to failure to map memory.");
309 }
310 }
311
312 void ZPhysicalMemoryBacking::warn_max_map_count(size_t max) const {
313 const char* const filename = ZFILENAME_PROC_MAX_MAP_COUNT;
314 FILE* const file = fopen(filename, "r");
315 if (file == NULL) {
316 // Failed to open file, skip check
317 log_debug(gc, init)("Failed to open %s", filename);
318 return;
319 }
320
321 size_t actual_max_map_count = 0;
322 const int result = fscanf(file, SIZE_FORMAT, &actual_max_map_count);
323 fclose(file);
324 if (result != 1) {
325 // Failed to read file, skip check
326 log_debug(gc, init)("Failed to read %s", filename);
327 return;
328 }
329
330 // The required max map count is impossible to calculate exactly since subsystems
331 // other than ZGC are also creating memory mappings, and we have no control over that.
332 // However, ZGC tends to create the most mappings and dominate the total count.
333 // In the worst cases, ZGC will map each granule three times, i.e. once per heap view.
334 // We speculate that we need another 20% to allow for non-ZGC subsystems to map memory.
335 const size_t required_max_map_count = (max / ZGranuleSize) * 3 * 1.2;
336 if (actual_max_map_count < required_max_map_count) {
337 log_warning(gc)("***** WARNING! INCORRECT SYSTEM CONFIGURATION DETECTED! *****");
338 log_warning(gc)("The system limit on number of memory mappings per process might be too low for the given");
339 log_warning(gc)("max Java heap size (" SIZE_FORMAT "M). Please adjust %s to allow for at",
340 max / M, filename);
341 log_warning(gc)("least " SIZE_FORMAT " mappings (current limit is " SIZE_FORMAT "). Continuing execution "
342 "with the current", required_max_map_count, actual_max_map_count);
343 log_warning(gc)("limit could lead to a fatal error, due to failure to map memory.");
344 }
345 }
346
347 void ZPhysicalMemoryBacking::warn_commit_limits(size_t max) const {
348 // Warn if available space is too low
349 warn_available_space(max);
350
351 // Warn if max map count is too low
352 warn_max_map_count(max);
353 }
354
355 size_t ZPhysicalMemoryBacking::size() const {
356 return _size;
357 }
358
359 bool ZPhysicalMemoryBacking::is_tmpfs() const {
360 return _filesystem == TMPFS_MAGIC;
361 }
362
363 bool ZPhysicalMemoryBacking::is_hugetlbfs() const {
364 return _filesystem == HUGETLBFS_MAGIC;
365 }
366
367 bool ZPhysicalMemoryBacking::tmpfs_supports_transparent_huge_pages() const {
368 // If the shmem_enabled file exists and is readable then we
369 // know the kernel supports transparent huge pages for tmpfs.
370 return access(ZFILENAME_SHMEM_ENABLED, R_OK) == 0;
371 }
372
373 ZErrno ZPhysicalMemoryBacking::fallocate_compat_ftruncate(size_t size) const {
374 while (ftruncate(_fd, size) == -1) {
375 if (errno != EINTR) {
376 // Failed
377 return errno;
378 }
379 }
380
381 // Success
382 return 0;
383 }
384
385 ZErrno ZPhysicalMemoryBacking::fallocate_compat_mmap(size_t offset, size_t length, bool touch) const {
386 // On hugetlbfs, mapping a file segment will fail immediately, without
387 // the need to touch the mapped pages first, if there aren't enough huge
388 // pages available to back the mapping.
389 void* const addr = mmap(0, length, PROT_READ|PROT_WRITE, MAP_SHARED, _fd, offset);
390 if (addr == MAP_FAILED) {
391 // Failed
392 return errno;
393 }
394
395 // Once mapped, the huge pages are only reserved. We need to touch them
396 // to associate them with the file segment. Note that we can not punch
397 // hole in file segments which only have reserved pages.
398 if (touch) {
399 char* const start = (char*)addr;
400 char* const end = start + length;
401 os::pretouch_memory(start, end, _block_size);
402 }
403
404 // Unmap again. From now on, the huge pages that were mapped are allocated
405 // to this file. There's no risk in getting SIGBUS when touching them.
406 if (munmap(addr, length) == -1) {
407 // Failed
408 return errno;
409 }
410
411 // Success
412 return 0;
413 }
414
415 ZErrno ZPhysicalMemoryBacking::fallocate_compat_pwrite(size_t offset, size_t length) const {
416 uint8_t data = 0;
417
418 // Allocate backing memory by writing to each block
419 for (size_t pos = offset; pos < offset + length; pos += _block_size) {
420 if (pwrite(_fd, &data, sizeof(data), pos) == -1) {
421 // Failed
422 return errno;
423 }
424 }
425
426 // Success
427 return 0;
428 }
429
430 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_compat(size_t offset, size_t length) {
431 // fallocate(2) is only supported by tmpfs since Linux 3.5, and by hugetlbfs
432 // since Linux 4.3. When fallocate(2) is not supported we emulate it using
433 // ftruncate/pwrite (for tmpfs) or ftruncate/mmap/munmap (for hugetlbfs).
434
435 const size_t end = offset + length;
436 if (end > _size) {
437 // Increase file size
438 const ZErrno err = fallocate_compat_ftruncate(end);
439 if (err) {
440 // Failed
441 return err;
442 }
443 }
444
445 // Allocate backing memory
446 const ZErrno err = is_hugetlbfs() ? fallocate_compat_mmap(offset, length, false /* touch */)
447 : fallocate_compat_pwrite(offset, length);
448 if (err) {
449 if (end > _size) {
450 // Restore file size
451 fallocate_compat_ftruncate(_size);
452 }
453
454 // Failed
455 return err;
456 }
457
458 if (end > _size) {
459 // Record new file size
460 _size = end;
461 }
462
463 // Success
464 return 0;
465 }
466
467 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole_syscall(size_t offset, size_t length) {
468 const int mode = 0; // Allocate
469 const int res = ZSyscall::fallocate(_fd, mode, offset, length);
470 if (res == -1) {
471 // Failed
472 return errno;
473 }
474
475 const size_t end = offset + length;
476 if (end > _size) {
477 // Record new file size
478 _size = end;
479 }
480
481 // Success
482 return 0;
483 }
484
485 ZErrno ZPhysicalMemoryBacking::fallocate_fill_hole(size_t offset, size_t length) {
486 // Using compat mode is more efficient when allocating space on hugetlbfs.
487 // Note that allocating huge pages this way will only reserve them, and not
488 // associate them with segments of the file. We must guarantee that we at
489 // some point touch these segments, otherwise we can not punch hole in them.
490 if (z_fallocate_supported && !is_hugetlbfs()) {
491 const ZErrno err = fallocate_fill_hole_syscall(offset, length);
492 if (!err) {
493 // Success
494 return 0;
495 }
496
497 if (err != ENOSYS && err != EOPNOTSUPP) {
498 // Failed
499 return err;
500 }
501
502 // Not supported
503 log_debug(gc)("Falling back to fallocate() compatibility mode");
504 z_fallocate_supported = false;
505 }
506
507 return fallocate_fill_hole_compat(offset, length);
508 }
509
510 ZErrno ZPhysicalMemoryBacking::fallocate_punch_hole(size_t offset, size_t length) {
511 if (is_hugetlbfs()) {
512 // We can only punch hole in pages that have been touched. Non-touched
513 // pages are only reserved, and not associated with any specific file
514 // segment. We don't know which pages have been previously touched, so
515 // we always touch them here to guarantee that we can punch hole.
516 const ZErrno err = fallocate_compat_mmap(offset, length, true /* touch */);
517 if (err) {
518 // Failed
519 return err;
520 }
521 }
522
523 const int mode = FALLOC_FL_PUNCH_HOLE|FALLOC_FL_KEEP_SIZE;
524 if (ZSyscall::fallocate(_fd, mode, offset, length) == -1) {
525 // Failed
526 return errno;
527 }
528
529 // Success
530 return 0;
531 }
532
533 ZErrno ZPhysicalMemoryBacking::split_and_fallocate(bool punch_hole, size_t offset, size_t length) {
534 // Try first half
535 const size_t offset0 = offset;
536 const size_t length0 = align_up(length / 2, _block_size);
537 const ZErrno err0 = fallocate(punch_hole, offset0, length0);
538 if (err0) {
539 return err0;
540 }
541
542 // Try second half
543 const size_t offset1 = offset0 + length0;
544 const size_t length1 = length - length0;
545 const ZErrno err1 = fallocate(punch_hole, offset1, length1);
546 if (err1) {
547 return err1;
548 }
549
550 // Success
551 return 0;
552 }
553
554 ZErrno ZPhysicalMemoryBacking::fallocate(bool punch_hole, size_t offset, size_t length) {
555 assert(is_aligned(offset, _block_size), "Invalid offset");
556 assert(is_aligned(length, _block_size), "Invalid length");
557
558 const ZErrno err = punch_hole ? fallocate_punch_hole(offset, length) : fallocate_fill_hole(offset, length);
559 if (err == EINTR && length > _block_size) {
560 // Calling fallocate(2) with a large length can take a long time to
561 // complete. When running profilers, such as VTune, this syscall will
562 // be constantly interrupted by signals. Expanding the file in smaller
563 // steps avoids this problem.
564 return split_and_fallocate(punch_hole, offset, length);
565 }
566
567 return err;
568 }
569
570 bool ZPhysicalMemoryBacking::commit_inner(size_t offset, size_t length) {
571 log_trace(gc, heap)("Committing memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
572 offset / M, (offset + length) / M, length / M);
573
574 retry:
575 const ZErrno err = fallocate(false /* punch_hole */, offset, length);
576 if (err) {
577 if (err == ENOSPC && !is_init_completed() && is_hugetlbfs() && z_fallocate_hugetlbfs_attempts-- > 0) {
578 // If we fail to allocate during initialization, due to lack of space on
579 // the hugetlbfs filesystem, then we wait and retry a few times before
580 // giving up. Otherwise there is a risk that running JVMs back-to-back
581 // will fail, since there is a delay between process termination and the
582 // huge pages owned by that process being returned to the huge page pool
583 // and made available for new allocations.
584 log_debug(gc, init)("Failed to commit memory (%s), retrying", err.to_string());
585
586 // Wait and retry in one second, in the hope that huge pages will be
587 // available by then.
588 sleep(1);
589 goto retry;
590 }
591
592 // Failed
593 log_error(gc)("Failed to commit memory (%s)", err.to_string());
594 return false;
595 }
596
597 // Success
598 return true;
599 }
600
601 static int offset_to_node(size_t offset) {
602 const GrowableArray<int>* mapping = os::Linux::numa_nindex_to_node();
603 const size_t nindex = (offset >> ZGranuleSizeShift) % mapping->length();
604 return mapping->at((int)nindex);
605 }
606
607 size_t ZPhysicalMemoryBacking::commit_numa_interleaved(size_t offset, size_t length) {
608 size_t committed = 0;
609
610 // Commit one granule at a time, so that each granule
611 // can be allocated from a different preferred node.
612 while (committed < length) {
613 const size_t granule_offset = offset + committed;
614
615 // Setup NUMA policy to allocate memory from a preferred node
616 os::Linux::numa_set_preferred(offset_to_node(granule_offset));
617
618 if (!commit_inner(granule_offset, ZGranuleSize)) {
619 // Failed
620 break;
621 }
622
623 committed += ZGranuleSize;
624 }
625
626 // Restore NUMA policy
627 os::Linux::numa_set_preferred(-1);
628
629 return committed;
630 }
631
632 size_t ZPhysicalMemoryBacking::commit_default(size_t offset, size_t length) {
633 // Try to commit the whole region
634 if (commit_inner(offset, length)) {
635 // Success
636 return length;
637 }
638
639 // Failed, try to commit as much as possible
640 size_t start = offset;
641 size_t end = offset + length;
642
643 for (;;) {
644 length = align_down((end - start) / 2, ZGranuleSize);
645 if (length < ZGranuleSize) {
646 // Done, don't commit more
647 return start - offset;
648 }
649
650 if (commit_inner(start, length)) {
651 // Success, try commit more
652 start += length;
653 } else {
654 // Failed, try commit less
655 end -= length;
656 }
657 }
658 }
659
660 size_t ZPhysicalMemoryBacking::commit(size_t offset, size_t length) {
661 if (ZNUMA::is_enabled() && !ZLargePages::is_explicit()) {
662 // To get granule-level NUMA interleaving when using non-large pages,
663 // we must explicitly interleave the memory at commit/fallocate time.
664 return commit_numa_interleaved(offset, length);
665 }
666
667 return commit_default(offset, length);
668 }
669
670 size_t ZPhysicalMemoryBacking::uncommit(size_t offset, size_t length) {
671 log_trace(gc, heap)("Uncommitting memory: " SIZE_FORMAT "M-" SIZE_FORMAT "M (" SIZE_FORMAT "M)",
672 offset / M, (offset + length) / M, length / M);
673
674 const ZErrno err = fallocate(true /* punch_hole */, offset, length);
675 if (err) {
676 log_error(gc)("Failed to uncommit memory (%s)", err.to_string());
677 return 0;
678 }
679
680 return length;
681 }
682
683 void ZPhysicalMemoryBacking::map(uintptr_t addr, size_t size, uintptr_t offset) const {
684 const void* const res = mmap((void*)addr, size, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_SHARED, _fd, offset);
685 if (res == MAP_FAILED) {
686 ZErrno err;
687 fatal("Failed to map memory (%s)", err.to_string());
688 }
689 }
690
691 void ZPhysicalMemoryBacking::unmap(uintptr_t addr, size_t size) const {
692 // Note that we must keep the address space reservation intact and just detach
693 // the backing memory. For this reason we map a new anonymous, non-accessible
694 // and non-reserved page over the mapping instead of actually unmapping.
695 const void* const res = mmap((void*)addr, size, PROT_NONE, MAP_FIXED | MAP_ANONYMOUS | MAP_PRIVATE | MAP_NORESERVE, -1, 0);
696 if (res == MAP_FAILED) {
697 ZErrno err;
698 fatal("Failed to map memory (%s)", err.to_string());
699 }
700 }