scale up with lock-free algorithms @ javaone

ScaleUpwithLock-FreeAlgorithms

Non-blockingconcurrencyonJVMPresentedatJavaOne 2017/RomanElizarov@JetBrains

Speaker:RomanElizarov

• 16+yearsexperience• Previouslydevelopedhigh-perftradingsoftware@Devexperts• Teachconcurrent&distributedprogramming@St.PetersburgITMOUniversity• Chiefjudge@NorthernEurasiaContest/ACMICPC• NowworkonKotlin@JetBrains

Shared

SharedMutable

SharedMutableState

BigBigData

Data1 Data2 DataN

map map map

Data1 Data2 DataN

map map map

reduce

answer

Embarrassinglyparallel

Data1 Data2 DataN

map map map

reduce

answer

BigBigData

BigBigDataReal-time

Concurrentrequests/processing

BigBigDataReal-time

Concurrentrequests/processing

Performance Scalability

Atoyproblem

Atoyproblem– stack

Atoyproblem– stackclass Node<T>(val next: Node<T>?, val value: T)

public final class Node<T> {private final Node<T> next;private final T value;

public Node(Node<T> next, T value) {this.next = next;this.value = value;

public Node<T> getNext() {return next;

public T getValue() {return value;

Atoyproblem– stack

Atoyproblem– emptystackclass Node<T>(val next: Node<T>?, val value: T)

Atoyproblem– stackpushclass Node<T>(val next: Node<T>?, val value: T)

next = nullvalue = 1

top А

next = Avalue = 2

top А

next = Avalue = 2

class LinkedStack<T> {private var top: Node<T>? = null

fun push(value: T) {top = Node(top, value)

next = Avalue = 2

Atoyproblem– stackpopclass Node<T>(val next: Node<T>?, val value: T)

next = Avalue = 2

curresult = 2

fun pop(): T? {val cur = top ?: return nulltop = cur.nextreturn cur.value

Doesitwork?

Atoyproblem– concurrentpushclass Node<T>(val next: Node<T>?, val value: T)

top А

next = Avalue = 2

top А

next = Avalue = 2

next = Avalue = 3

top А

next = Avalue = 2

next = Avalue = 3

Atoyproblem– synchronizedstackclass Node<T>(val next: Node<T>?, val value: T)

@Synchronizedfun push(value: T) {

top = Node(top, value)}

@Synchronizedfun pop(): T? {

val cur = top ?: return nulltop = cur.nextreturn cur.value

Doesitscale?

Benchmark@State(Scope.Benchmark)open class LinkedStackBenchmark {

private val stack = LinkedStack<Int>()

@Benchmarkfun benchmark() {

stack.push(1)check(stack.pop() == 1)

Benchmark@State(Scope.Benchmark)open class LinkedStackBenchmark {

Benchmarkresults

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LinkedStack

Intel(R)Xeon(R)CPUE5-2680v2@2.80GHz;32HWthreads;JavaHotSpot(TM)64-BitServerVM(build9+181,mixedmode)

Contention

pop2wait

Contention

Q work

pop2wait

Deadlocks

Lock-free?

Lock-freepushclass Node<T>(val next: Node<T>?, val value: T)

top А

next = Avalue = 2

expect

top А

next = Avalue = 2

Bupdate

top А

next = Avalue = 2

Bupdate

expect

AtomicReferencepackage java.util.concurrent.atomic;

/** @since 1.5 */public class AtomicReference<V> {

private volatile V value;

public V get() {return value;

public boolean compareAndSet(V expect, V update) {// …

UsingAtomicReference

class LockFree<T> {private val top = AtomicReference<Node<T>?>(null)

fun push(value: T) {while (true) {

val cur = top.get()val upd = Node(cur, value)if (top.compareAndSet(cur, upd)) return

UsingAtomicReference - push

Powerfulwehavebecome!

UsingAtomicReference - pop

fun push(value: T) { … }

fun pop(): T? {while (true) {

val cur = top.get() ?: return nullif (top.compareAndSet(cur, cur.next)) return cur.value

fun pop(): T? { … }}

It’satrap

Usingvolatilevariable

class LinkedStackLF<T> {@Volatileprivate var top: Node<T>? = null

fun push(value: T) {// ...

UsingAtomicReferenceFieldUpdaterpackage java.util.concurrent.atomic;

/** @since 1.5 */public abstract class AtomicReferenceFieldUpdater<T,V> {

public static <U,W> AtomicReferenceFieldUpdater<U,W> newUpdater(Class<U> tclass, Class<W> vclass, String fieldName;

public abstract boolean compareAndSet(T obj, V expect, V update;}

UsingAtomicReferenceFieldUpdaterprivate volatile Node<T> top;

private static final AtomicReferenceFieldUpdater<LockFree, Node> TOP = AtomicReferenceFieldUpdater

.newUpdater(LockFree.class, Node.class, "top");

UsingAtomicReferenceFieldUpdaterprivate volatile Node<T> top;

private static final AtomicReferenceFieldUpdater<LockFree, Node> TOP = AtomicReferenceFieldUpdater

.newUpdater(LockFree.class, Node.class, "top");

if (TOP.compareAndSet(this, cur, upd)) return;

UsingVarHandlepackage java.lang.invoke;

/** @since 9 */public abstract class VarHandle {

@MethodHandle.PolymorphicSignaturepublic native boolean compareAndSet(Object... args);

UsingVarHandleprivate volatile Node<T> top;

private static final VarHandle TOP;

static {try {

TOP = MethodHandles.lookup().findVarHandle(LockFree.class, "top", Node.class);

} catch (NoSuchFieldException | IllegalAccessException e) {throw new InternalError(e);

UsingVarHandleprivate volatile Node<T> top;

private static final VarHandle TOP;

static {try {

TOP = MethodHandles.lookup().findVarHandle(LockFree.class, "top", Node.class);

} catch (NoSuchFieldException | IllegalAccessException e) {throw new InternalError(e);

if (TOP.compareAndSet(this, cur, upd) return;

UsingAtomicFUJprivate val top = atomic<Node<T>?>(null)

if (top.compareAndSet(cur, upd)) return

CodelikeAtomicReference

BytecodeCodelikeAtomicReference

compile

BytecodeCodelikeAtomicReference AtomicReferenceFUcompile atomicFU

BytecodeCodelikeAtomicReference VarHandlecompile atomicFU

Wasitworthit?

Benchmarkresults

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LockFree

LinkedStack

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LockFree

LinkedStack

Benchmarkresults

Nay…

Contention

Q retry

pop2tryupdate

Tootoyofaproblem?class LinkedStack<T> {

private var top: Node<T>? = null

@Synchronizedfun push(value: T) { … }

val cur = top ?: return nulltop = cur.nextreturn cur.value

Tootoyofaproblem– makeitmore real?class LinkedStack<T> {

private var top: Node<T>? = null

@Synchronizedfun push(value: T) { … }

val cur = top ?: return nulltop = cur.nextBlackhole.consumeCPU(100L) return cur.value

Tootoyofaproblem– makeitmore real?class LockFree<T> {

private val top = atomic<Node<T>?>(null)

fun pop(): T? {while (true) {

val cur = top.value ?: return nullBlackhole.consumeCPU(100L)if (top.compareAndSet(cur, cur.next)) return cur.value

Benchmarkresults

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LockFree

LinkedStack

Workload@State(Scope.Benchmark)open class LinkedStackBenchmark {

Read-dominatedworkload@State(Scope.Benchmark)open class LinkedStackBenchmark {

@Benchmarkfun benchmarkReadDominated() {

stack.push(1)repeat(10) { check(stack.peek() == 1) }check(stack.pop() == 1)

Read-dominatedworkload@State(Scope.Benchmark)open class LinkedStackBenchmark {

class LinkedStack<T> {@Synchronizedfun peek() = top?.value

Read-dominatedworkload@State(Scope.Benchmark)open class LockFreeBenchmark {

private val stack = LockFree<Int>()

class LockFree<T> {fun peek() = top.value?.value

Benchmarkresults– x10reads

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LockFree

LinkedStack

Benchmarkresults– x100reads

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LockFree

LinkedStack

But… scalability?

Real-worldworkload@Benchmarkfun benchmarkReadWorld() {

stack.push(1)repeat(10) {

check(stack.peek() == 1)Blackhole.consumeCPU(100L)

}check(stack.pop() == 1)Blackhole.consumeCPU(100L)

Benchmarkresults– realworld

00.20.40.60.81

1.21.41.61.8

1 2 4 8 16 32 64 128

Millions

Numberofthreads

Throughput(ops/s)

LockFree

LinkedStack

Learntoasktherightquestions

Youshall,youngPadawan.

• JMH http://openjdk.java.net/projects/code-tools/jmh/• Kotlin https://kotlinlang.org• AtomicFU https://github.com/Kotlin/kotlinx.atomicfu

Thankyou

Anyquestions?

Slidesareavailableatwww.slideshare.net/elizarovemailmetoelizarov atgmail

relizarov

Appendix

Atoyproblem– concurrentpopclass Node<T>(val next: Node<T>?, val value: T)

next = Avalue = 2

cur1 cur2

next = Avalue = 2

cur1 cur2

next = Avalue = 2

result1 = 2cur1 cur2

result2 = 2

scale up with lock-free algorithms @ javaone

Technology

datafx 8 (javaone 2014)

distributed algorithms (22903) lecturer: danny hendler...

javaone 2010: osgi migrat

javaone 2013: memory efficient java

javaone 2007 bof session

linkedin javaone

javaone 2014: java debugging

javaone ts4693 v2

duchess javaone 2011

javaone 2013 report

jcp.next panel at javaone

jwebpane presentation at javaone 2009

2007 javaone conference

vijayr javaone final

javaone handout infinispan - jboss

datafx - javaone 2013

retour javaone 2009

live demo from javaone

wdjhit javaone-2011-aa

vraptor 4 - javaone