Volker Simonis [Фолькер Симонис], SAP / volker.simonis@gmail.com
class Assembler : public AbstractAssembler {
// Creation
Assembler(CodeBuffer* code) : AbstractAssembler(code) {...}
// Vanilla instructions in lexical order
void call(Label& L, relocInfo::relocType rtype);
void call(Register reg); // push pc; pc <- reg
...
void imulq(Register dst, Register src);
};
void Assembler::imulq(Register dst, Register src) {
int encode = prefixq_and_encode(dst->encoding(), src->encoding());
emit_int8(0x0F);
emit_int8((unsigned char)0xAF);
emit_int8((unsigned char)(0xC0 | encode));
}
class MacroAssembler: public Assembler {...};
class InterpreterMacroAssembler: public MacroAssembler {...};
// The TemplateTable generates assembler templates for each bytecode.
class TemplateTable {
// The assembler used when generating the bytecode templates.
static InterpreterMacroAssembler* _masm;
...
};
#define __ _masm->
void TemplateTable::lmul() {
__ pop_l(rdx);
__ imulq(rax, rdx);
}
SIGSEGV
, SIGILL
, SIGBUS
,.. ?
$ java -version
openjdk version "9-internal"
OpenJDK Runtime Environment (slowdebug build 9-internal+0-adhoc.simonis.jdk9-dev)
OpenJDK 64-Bit Server VM (slowdebug build 9-internal+0-adhoc.simonis.jdk9-dev, mixed mode)
$ gdb java
GNU gdb (Ubuntu 7.7-0ubuntu3.1) 7.7
...
(gdb) run -version
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffff7fdb700 (LWP 6549)]
0x00007fffd8514513 in ?? ()
(gdb) where
#0 0x00007fffd8514513 in ?? () // gdb can't reliably find out where we are!
#1 0x0000000000000246 in ?? ()
#2 0x00007fffd85142a0 in ?? ()
#3 0x00007ffff0019000 in ?? ()
#4 0x00007ffff7fda400 in ?? ()
#5 0x00007ffff67068c3 in VM_Version::get_processor_features ()
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/cpu/x86/vm/vm_version_x86.cpp:477
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
(gdb) call help() // internal HotSpot helper functions
"Executing help"
basic
pp(void* p) - try to make sense of p
pv(intptr_t p)- ((PrintableResourceObj*) p)->print()
ps() - print current thread stack
pss() - print all thread stacks
pm(int pc) - print Method* given compiled PC
findm(intptr_t pc) - finds Method*
find(intptr_t x) - finds & prints nmethod/stub/bytecode/oop based on pointer into it
pns(void* sp, void* fp, void* pc) - print native (i.e. mixed) stack trace. E.g.
pns($sp, $rbp, $pc) on Linux/amd64 and Solaris/amd64 or
pns($sp, $ebp, $pc) on Linux/x86 or
pns($sp, 0, $pc) on Linux/ppc64 or
pns($sp + 0x7ff, 0, $pc) on Solaris/SPARC
- in gdb do 'set overload-resolution off' before calling pns()
- in dbx do 'frame 1' before calling pns()
misc.
flush() - flushes the log file
events() - dump events from ring buffers
compiler debugging
debug() - to set things up for compiler debugging
ndebug() - undo debug
(gdb) call nm($pc) // print $pc as 'native (i.e. generated) method'
"Executing nm"
[CodeBlob (0x00007fffd849d210)]
Framesize: 0
BufferBlob (0x00007fffd849d210) used for get_cpu_info_stub
(gdb) x /4i $pc
=> 0x7fffd8514513: mov (%rsi),%eax
0x7fffd8514515: lea 0x30(%rbp),%rsi
0x7fffd8514519: mov $0x10000,%eax
0x7fffd851451e: and 0x4(%rsi),%eax
(gdb) print $rsi
$3 = 0 // reading from zero obviously results in a SIGSEGV
(gdb) stepi // entering the HotSpot signal handler
signalHandler (sig=0, info=0x800, uc=0x6f732e6567616d69) at os_linux.cpp:4219
4219 void signalHandler(int sig, siginfo_t* info, void* uc) {
(gdb) step // single step until we get here:
JVM_handle_linux_signal (sig=11, info=..., ucVoid=..., abort_if_unrecognized=1)
at os_linux_x86.cpp:274
274 int abort_if_unrecognized) {
(gdb) next // single step until we get here:
412 if ((sig == SIGSEGV) && VM_Version::is_cpuinfo_segv_addr(pc)) {
(gdb) next
414 stub = VM_Version::cpuinfo_cont_addr();
(gdb) print /x stub
$5 = 0x7fffd8514515
(gdb) tbreak *stub
Temporary breakpoint 2 at 0x7fffd8514515
(gdb) cont
Continuing.
Temporary breakpoint 2, 0x00007fffd8514515 in ?? ()
(gdb) where
#0 0x00007fffd8514515 in ?? () // we're back just one instruction after the
#1 0x0000000000000246 in ?? () // previous SIGSEGV
#2 0x00007fffd85142a0 in ?? ()
#3 0x00007ffff0019000 in ?? ()
#4 0x00007ffff7fda400 in ?? ()
#5 0x00007ffff67068c3 in VM_Version::get_processor_features ()
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/cpu/x86/vm/vm_version_x86.cpp:477
Backtrace stopped: previous frame inner to this frame (corrupt stack?)
(gdb) up // go up the stack until we reach VM_Version::get_processor_features ()
...
#5 0x00007ffff67068c3 in VM_Version::get_processor_features ()
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/cpu/x86/vm/vm_version_x86.cpp:477
477 get_cpu_info_stub(&_cpuid_info);
(gdb) ptype get_cpu_info_stub
type = void (*)(void *)
(gdb) list get_cpu_info_stub // generated by generate_get_cpu_info()
Function "get_cpu_info_stub" not defined // in vm_version_x86.cpp
(gdb) list vm_version_x86.cpp:340,404
340 // Some OSs have a bug when upper 128/256bits of YMM/ZMM
341 // registers are not restored after a signal processing.
342 // Generate SEGV here (reference through NULL)
344 // and check upper YMM/ZMM bits after it.
398 __ xorl(rsi, rsi); // rsi = 0
399 VM_Version::set_cpuinfo_segv_addr(__ pc());// SEGV addr. for signal handler
400 // Generate SEGV
401 __ movl(rax, Address(rsi, 0)); // load from 0
403 VM_Version::set_cpuinfo_cont_addr(__ pc());// continuation for signal hand.
404 // Returns here after signal. Save xmm0 to check it later.
VM_Version_StubGenerator::generate_get_cpu_info()
)SIGSEGV
SIGSEGV
and continues at the next instructionos_supports_avx_vectors()
it checks if the register values were preservedSIGILL
$ java -Xlog:os+cpu -version
[0.021s][info][os,cpu] Logical CPUs per core: 1
[0.021s][info][os,cpu] L1 data cache line size: 64
[0.021s][info][os,cpu] UseSSE=4 UseAVX=1 UseAES=1 MaxVectorSize=64
[0.021s][info][os,cpu] Allocation prefetching: PREFETCHNTA at distance 256, 3 lines of 64 bytes
[0.021s][info][os,cpu] PrefetchCopyIntervalInBytes 576
[0.021s][info][os,cpu] PrefetchScanIntervalInBytes 576
[0.021s][info][os,cpu] PrefetchFieldsAhead 1
[0.021s][info][os,cpu] ContendedPaddingWidth 128
[0.021s][info][os,cpu] CPU:total 4 (initial active 4) (4 cores per cpu, 1 threads per core) family 6 model 69 stepping 1, cmov, cx8, fxsr, mmx, sse, sse2, sse3, ssse3, sse4.1, sse4.2, popcnt, avx, aes, clmul, lzcnt, tsc, tscinvbit
[0.021s][info][os,cpu] CPU Model and flags from /proc/cpuinfo:
[0.021s][info][os,cpu] model name : Intel(R) Core(TM) i5-4300U CPU @ 1.90GHz
[0.021s][info][os,cpu] flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge mca cmov pat pse36 clflush mmx fxsr sse sse2 ht syscall nx rdtscp lm constant_tsc rep_good nopl xtopology nonstop_tsc pni pclmulqdq ssse3 cx16 sse4_1 sse4_2 movbe popcnt aes xsave avx rdrand hypervisor lahf_lm abm
openjdk version "9-internal"
OpenJDK Runtime Environment (slowdebug build 9-internal+0-adhoc.simonis.jdk9-dev)
OpenJDK 64-Bit Server VM (slowdebug build 9-internal+0-adhoc.simonis.jdk9-dev, mixed mode)
(gdb) run -version
Program received signal SIGSEGV, Segmentation fault.
[Switching to Thread 0x7ffff7fdb700 (LWP 7885)]
0x00007fffd8514513 in ?? () // this SIGSEGV is from the CPU detection!
(gdb) cont
Continuing.
[New Thread 0x7fffd833d700 (LWP 7886)]
[New Thread 0x7fffd823c700 (LWP 7887)]
[New Thread 0x7fffd813b700 (LWP 7888)]
[New Thread 0x7fffcbfff700 (LWP 7889)]
[New Thread 0x7fffcbafa700 (LWP 7890)]
[New Thread 0x7fffcb9f9700 (LWP 7891)]
[New Thread 0x7fffcb8f8700 (LWP 7892)]
[New Thread 0x7fffcb7f7700 (LWP 7893)]
[New Thread 0x7fffcb6f6700 (LWP 7894)]
... // HotSpot creates at least 10 threads (GC, JIT, VM, Timer, etc..)
Program received signal SIGSEGV, Segmentation fault.
0x00007fffd8691a5c in ?? ()
(gdb) where
#0 0x00007fffd8691a5c in ?? () // <- generate code
#1 0x00007ffff6432798 in SafeFetch32 (adr=0xabc0000000000abc, errValue=2748)
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/runtime/stubRoutines.hpp:464
#2 0x00007ffff65cf278 in test_safefetch32 () at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/runtime/stubRoutines.cpp:250
#3 0x00007ffff65cfe2d in StubRoutines::initialize2 ()
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/runtime/stubRoutines.cpp:371
#4 0x00007ffff65cfe96 in stubRoutines_init2 () at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/runtime/stubRoutines.cpp:380
#5 0x00007ffff603f9b1 in init_globals () at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/runtime/init.cpp:149
#6 0x00007ffff664b7ff in Threads::create_vm (args=0x7ffff7fdae20, canTryAgain=0x7ffff7fdad0b)
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/runtime/thread.cpp:3625
#7 0x00007ffff60cea48 in JNI_CreateJavaVM_inner (vm=0x7ffff7fdae88, penv=0x7ffff7fdae90, args=0x7ffff7fdae20)
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/prims/jni.cpp:3974
#8 0x00007ffff60cedf0 in JNI_CreateJavaVM (vm=0x7ffff7fdae88, penv=0x7ffff7fdae90, args=0x7ffff7fdae20)
at /share/OpenJDK/jdk9-hs-comp/hotspot/src/share/vm/prims/jni.cpp:4069
#9 0x00007ffff7bcd358 in InitializeJVM (pvm=0x7ffff7fdae88, penv=0x7ffff7fdae90, ifn=0x7ffff7fdaee0)
at /share/OpenJDK/jdk9-hs-comp/jdk/src/java.base/share/native/libjli/java.c:1489
#10 0x00007ffff7bca171 in JavaMain (_args=0x7fffffffaa70)
at /share/OpenJDK/jdk9-hs-comp/jdk/src/java.base/share/native/libjli/java.c:403
#11 0x00007ffff71cd182 in start_thread (arg=0x7ffff7fdb700) at pthread_create.c:312
(gdb) call pns($sp, $rbp, $pc)
"Executing pns"
Native frames: (J=compiled Java code, A=aot compiled Java code, j=interpreted,
Vv=VM code, C=native code)
v ~StubRoutines::SafeFetch32
V [libjvm.so+0x11c60e7] test_safefetch32()+0x4d
V [libjvm.so+0x11c6c9b] StubRoutines::initialize2()+0x9a3
V [libjvm.so+0x11c6d04] stubRoutines_init2()+0x1c
V [libjvm.so+0xc29d35] init_globals()+0xe2
V [libjvm.so+0x1241fed] Threads::create_vm(JavaVMInitArgs*, bool*)+0x3e3
V [libjvm.so+0xcb92a8] JNI_CreateJavaVM_inner(JavaVM_**, void**, void*)+0xf6
V [libjvm.so+0xcb9650] JNI_CreateJavaVM+0x42
C [libjli.so+0x72d3] InitializeJVM+0x13b
C [libjli.so+0x400b] JavaMain+0xd3
C [libpthread.so.0+0x8182] start_thread+0xc2
(gdb) call find($pc)
0x00007fffd8691a5c is at begin+0 in a stub // this is generated code again :)
StubRoutines::SafeFetch32 [0x00007fffd8691a5c, 0x00007fffd8691a62[ (6 bytes)
(gdb) x /3i $pc // %rdi = arg1, %rsi = arg2, %rax = return value
=> 0x7fffd8691a5c: mov (%rdi),%rsi // *arg1 -> arg2
0x7fffd8691a5e: mov %rsi,%rax // arg2 -> return value
0x7fffd8691a61: retq
(gdb) p /x $rdi
$1 = 0xabc0000000000abc
(gdb) x /1g 0xabc0000000000abc
0xabc0000000000abc: Cannot access memory at address 0xabc0000000000abc
(gdb) stepi // and next/step until we reach in JVM_handle_linux_signal() at:
332 if (StubRoutines::is_safefetch_fault(pc)) {
333 ucontext_set_pc(uc, StubRoutines::continuation_for_safefetch_fault(pc));
(gdb) step
StubRoutines::continuation_for_safefetch_fault (pc=0x7fffd8691a5c)
(gdb) finish
Run till exit from #0 StubRoutines::continuation_for_safefetch_fault (...)
Value returned is $9 = (u_char *) 0x7fffd8691a5e // Continuation address
(gdb) finish // three times until we're back in our stub
Run till exit from #0 JVM_handle_linux_signal (...)
Run till exit from #0 signalHandler (...)
0x00007fffd8691a5e in ?? ()
SafeFetch
stubs for this purposeaddr
if possibleSIGSEGV
, errValue
will be returnedSIGSEGV
struct NullCheck {
long l0001;
};
void swapFields(NullCheck* n1, NullCheck* n2) {
long tmp = n1->l0001;
n1->l0001 = n2->l0001;
n2->l0001 = tmp;
}
int main(int argc, char** argv) {
NullCheck* n = (NullCheck*)0;
swapFields(n, n);
}
Unmanaged languages (e.g. C/C++) don't have Null-Pointer checks:
$ g++ -O3 -o NullCheck NullCheck.cpp
$ ./NullCheck
Segmentation fault (core dumped)
$ objdump --disassemble --demangle NullCheck
...
0000000000400500 <swapFields(NullCheck* n1, NullCheck* n2)>:
400500: 48 8b 07 mov (%rdi),%rax // %rax = n1->l0001
400503: 48 8b 16 mov (%rsi),%rdx // %rdx = n2->l0001
400506: 48 89 17 mov %rdx,(%rdi) // n1->l0001 = %rdx (n2->l0001)
400509: 48 89 06 mov %rax,(%rsi) // n2->l0001 = %rax (n1->l0001)
40050c: c3 retq
Managed languages like Java guarantee Null-Pointer checks!
$ java -XX:-TieredCompilation -Xbatch \
-XX:CompileCommand="option *::swapField_2 PrintAssembly" \
org.simonis.NullCheck swapField_2
...
Compiled method (c2) org.simonis.NullCheck::swapField_2 (19 bytes)
# parm0: rsi:rsi = 'org/simonis/NullCheck'
# parm1: rdx:rdx = 'org/simonis/NullCheck'
0x07fc7348e0e4c: test %rsi,%rsi // null-check first arg 'n1'
0x07fc7348e0e4f: je 0x07fc7348e0e7e
0x07fc7348e0e51: test %rdx,%rdx // null-check second arg 'n2'
0x07fc7348e0e54: je 0x07fc7348e0e8d
0x07fc7348e0e56: mov 0x1010(%rdx),%r10 //*getfield l0512
// - NullCheck::swapField_2@7 (line 13)
0x07fc7348e0e5d: mov 0x1010(%rsi),%r11 //*getfield l0512
// - NullCheck::swapField_2@1 (line 12)
0x07fc7348e0e64: mov %r10,0x1010(%rsi) //*putfield l0512
// - NullCheck::swapField_2@10 (line 13)
0x07fc7348e0e6b: mov %r11,0x1010(%rdx) //*putfield l0512
// - NullCheck::swapField_2@15 (line 14)
$ java -XX:-TieredCompilation -Xbatch \
-XX:CompileCommand="option *::swapField_1 PrintAssembly" \
org.simonis.NullCheck swapField_1
...
Compiled method (c2) org.simonis.NullCheck::swapField_1 (19 bytes)
# parm0: rsi:rsi = 'org/simonis/NullCheck'
# parm1: rdx:rdx = 'org/simonis/NullCheck'
0x07f9f0c8e104c: mov 0x18(%rsi),%r10 //*getfield l0001
// - NullCheck::swapField_1@1 (line 6)
0x07f9f0c8e1050: mov 0x18(%rdx),%r11 //*getfield l0001
// - NullCheck::swapField_1@3 (line 7)
0x07f9f0c8e1054: mov %r11,0x18(%rsi) //*putfield l0001
// - NullCheck::swapField_1@10 (line 7)
0x07f9f0c8e1058: mov %r10,0x18(%rdx) //*putfield l0001
// - NullCheck::swapField_1@15 (line 8)
DEMO
public static void main(String[] args) {
if ("swapField_1".equals(args[0])) {
prompt("Calling swapField_1() to JIT-compile it:");
for (int i = 0; i < 20_000; i++) {
swapField_1(n, n);
}
for (int i = 1; i <= 5; i++) {
prompt(i + ": calling swapField_1() with a NULL argument:");
try {
swapField_1(null, n);
} catch (NullPointerException npe) { npe.printStackTrace(System.out); }
}
prompt("Calling swapField_1() to JIT-compile it again:");
for (int i = 0; i < 20_000; i++) {
swapField_1(n, n);
}
}
0x0000
)`getconf PAGESIZE`
)-XX:PerBytecodeTrapLimit
)
But where's the benchmark?
“ A point during program execution at which all GC roots are known
and all heap object contents are consistent. ”
LongLoopWithGC
public class LongLoopWithGC {
...
public static void main(String[] args) {
...
new Thread() {
public void run() {
while(true) {
try { Thread.sleep(1_000); } catch (InterruptedException e) {}
System.gc();
}
}
}.start();
...
}
}
LongLoopWithGC
public class LongLoopWithGC {
static void loop(int count) {
for (long i = 1; i < count; i++)
for (long j = 1; j < 1_000_000; j++)
tmp++;
}
public static void main(String[] args) {
...
long start = System.currentTimeMillis();
loop(Integer.parseInt(args[0]));
long end = System.currentTimeMillis();
System.out.println(end - start + "ms");
}
}
LongLoopWithGC::loop()
B4:
addq R9, #1
movq [R8 + #112 (8-bit)], R9 // Field: org/simonis/LongLoopWithGC.tmp
testl rax, [rip + #offset_to_poll_page] // Safepoint: poll for GC
cmpq RCX, #1000000
jl,s B3
B5:
addq R11, #1
testl rax, [rip + #offset_to_poll_page] // Safepoint: poll for GC
cmpq R11, R10
jge,s B7
B6:
movl RCX, #2
jmp,s B4
B7:
addq rsp, 16 # Destroy frame
popq rbp
testl rax, [rip + #offset_to_poll_page] // Safepoint: poll for GC
ret
LongLoopWithGC
DEMO
IntLoopWithGC
public class IntLoopWithGC {
static void loop(int count) {
for (int i = 1; i < count; i++)
for (int j = 1; j < 1_000_000; j++)
tmp++;
}
public static void main(String[] args) {
...
long start = System.currentTimeMillis();
loop(Integer.parseInt(args[0]));
long end = System.currentTimeMillis();
System.out.println(end - start + "ms");
}
}
IntLoopWithGC::loop()
B3:
movl R8, #1
B4:
addq R11, #1
movq [R10 + #112 (8-bit)], R11 // Field: org/simonis/IntLoopWithGC.tmp
incl R8
cmpl R8, #1000000
jl,s B4
B5:
incl R9
cmpl R9, RSI
jl,s B3
B6:
addq rsp, 16 # Destroy frame
popq rbp
testl rax, [rip + #offset_to_poll_page] // Safepoint: poll for GC
ret
IntLoopWithGC
DEMO
LoopWithGC
public class LoopWithGC {
static double compute(int count, double d) {
for (int i = 0; i < count; i++) {
d += Math.pow(Math.sin(d), Math.sqrt(d));
}
return d;
}
public static void main(String[] args) throws Exception {
...
long start = System.currentTimeMillis();
compute(count, 0.0d);
long end = System.currentTimeMillis();
System.out.println(end - start + "ms");
}
}
LoopWithGC
DEMO
LoopWithGC
LoopWithGC -XX:-InlineMathNatives -XX:DisableIntrinsic=_dsqrt
LoopWithGC
LoopWithGC -XX:-InlineMathNatives