Skip to content

Commit

Permalink
FEAT: Add Operation throughput and latency metrics by mbean.
Browse files Browse the repository at this point in the history
  • Loading branch information
brido4125 committed Nov 21, 2024
1 parent 132254c commit 31929f7
Show file tree
Hide file tree
Showing 8 changed files with 317 additions and 0 deletions.
5 changes: 5 additions & 0 deletions src/main/java/net/spy/memcached/MemcachedConnection.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@
import net.spy.memcached.compat.SpyObject;
import net.spy.memcached.compat.log.LoggerFactory;
import net.spy.memcached.internal.ReconnDelay;
import net.spy.memcached.metrics.OpLatencyMonitor;
import net.spy.memcached.metrics.OpThroughputMonitor;
import net.spy.memcached.ops.KeyedOperation;
import net.spy.memcached.ops.MultiOperationCallback;
import net.spy.memcached.ops.Operation;
Expand Down Expand Up @@ -990,6 +992,7 @@ private void handleReads(MemcachedNode qa)
throw new IllegalStateException("No read operation.");
}
currentOp.readFromBuffer(rbuf);
OpLatencyMonitor.getInstance().recordLatency(currentOp.getStartTime());
if (currentOp.getState() == OperationState.COMPLETE) {
getLogger().debug("Completed read op: %s and giving the next %d bytes",
currentOp, rbuf.remaining());
Expand Down Expand Up @@ -1519,6 +1522,7 @@ public String toString() {
* @param op
*/
public static void opTimedOut(Operation op) {
OpThroughputMonitor.getInstance().addTimeoutOps();
MemcachedConnection.setTimeout(op, true);
}

Expand All @@ -1528,6 +1532,7 @@ public static void opTimedOut(Operation op) {
* @param ops
*/
public static void opsTimedOut(Collection<Operation> ops) {
OpThroughputMonitor.getInstance().addTimeoutOps(ops.size());
Collection<String> timedOutNodes = new HashSet<>();
for (Operation op : ops) {
try {
Expand Down
175 changes: 175 additions & 0 deletions src/main/java/net/spy/memcached/metrics/OpLatencyMonitor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,175 @@
package net.spy.memcached.metrics;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.atomic.AtomicReferenceArray;

import net.spy.memcached.ArcusMBeanServer;

public final class OpLatencyMonitor implements OpLatencyMonitorMBean {
private AtomicReferenceArray<Long> latencies;
private AtomicInteger currentIndex;
private AtomicInteger count;
private boolean enabled = true;

// 캐시된 메트릭을 원자적으로 관리
private final AtomicReference<MetricsSnapshot> cachedMetrics =
new AtomicReference<>(new MetricsSnapshot(0, 0, 0, 0, 0, 0));

private static final OpLatencyMonitor INSTANCE = new OpLatencyMonitor();
private static final long CACHE_DURATION = 2000; // 2초 캐시
private static final int WINDOW_SIZE = 10_000;
private static final MetricsSnapshot EMPTY_METRICS = new MetricsSnapshot(0, 0, 0, 0, 0, 0);

private OpLatencyMonitor() {
if (System.getProperty("arcus.mbean", "false").toLowerCase().equals("false")) {
enabled = false;
return;
}
this.latencies = new AtomicReferenceArray<>(WINDOW_SIZE);
this.currentIndex = new AtomicInteger(0);
this.count = new AtomicInteger(0);

for (int i = 0; i < WINDOW_SIZE; i++) {
latencies.set(i, 0L);
}

try {
ArcusMBeanServer mbs = ArcusMBeanServer.getInstance();
mbs.registMBean(this, this.getClass().getPackage().getName()
+ ":type=" + this.getClass().getSimpleName());
} catch (Exception e) {
throw new RuntimeException("Failed to register MBean", e);
}
}

public static OpLatencyMonitor getInstance() {
return INSTANCE;
}

public void recordLatency(long startNanos) {
if (!enabled) {
return;
}
long latencyMicros = TimeUnit.NANOSECONDS.toMicros(System.nanoTime() - startNanos);
int index = currentIndex.getAndUpdate(i -> (i + 1) % WINDOW_SIZE);
latencies.lazySet(index, latencyMicros);

if (count.get() < WINDOW_SIZE) {
count.incrementAndGet();
}
}

// 모든 메트릭을 한 번에 계산하고 캐시하는 메서드
private MetricsSnapshot computeMetrics() {
int currentCount = count.get();
if (currentCount == 0) {
return EMPTY_METRICS;
}

// 현재 데이터를 배열로 복사
List<Long> sortedLatencies = new ArrayList<>(currentCount);
int startIndex = currentIndex.get();

for (int i = 0; i < currentCount; i++) {
int idx = (startIndex - i + WINDOW_SIZE) % WINDOW_SIZE;
long value = latencies.get(idx);
if (value != 0) {
sortedLatencies.add(value);
}
}

if (sortedLatencies.isEmpty()) {
return EMPTY_METRICS;
}

sortedLatencies.sort(Long::compareTo);

// 모든 메트릭을 한 번에 계산
long avg = sortedLatencies.stream().mapToLong(Long::longValue).sum() / currentCount;
long min = sortedLatencies.get(0);
long max = sortedLatencies.get(currentCount - 1);
long p25 = sortedLatencies.get((int) Math.ceil((currentCount * 25.0) / 100.0) - 1);
long p50 = sortedLatencies.get((int) Math.ceil((currentCount * 50.0) / 100.0) - 1);
long p75 = sortedLatencies.get((int) Math.ceil((currentCount * 75.0) / 100.0) - 1);

return new MetricsSnapshot(avg, min, max, p25, p50, p75);
}

// 캐시된 메트릭을 가져오거나 필요시 새로 계산
private MetricsSnapshot getMetricsSnapshot() {
MetricsSnapshot current = cachedMetrics.get();
long now = System.currentTimeMillis();

// 캐시가 유효한지 확인
if (now - current.timestamp < CACHE_DURATION) {
return current;
}

// 새로운 메트릭 계산 및 캐시 업데이트
MetricsSnapshot newMetrics = computeMetrics();
cachedMetrics.set(newMetrics);
return newMetrics;
}

@Override
public long getAverageLatencyMicros() {
return getMetricsSnapshot().avgLatency;
}

@Override
public long getMinLatencyMicros() {
return getMetricsSnapshot().minLatency;
}

@Override
public long getMaxLatencyMicros() {
return getMetricsSnapshot().maxLatency;
}

@Override
public long get25thPercentileLatencyMicros() {
return getMetricsSnapshot().p25Latency;
}

@Override
public long get50thPercentileLatencyMicros() {
return getMetricsSnapshot().p50Latency;
}

@Override
public long get75thPercentileLatencyMicros() {
return getMetricsSnapshot().p75Latency;
}

@Override
public void resetStatistics() {
count.set(0);
currentIndex.set(0);
cachedMetrics.set(EMPTY_METRICS);
}

// 캐시된 메트릭을 저장할 불변 클래스
private final static class MetricsSnapshot {
private final long avgLatency;
private final long minLatency;
private final long maxLatency;
private final long p25Latency;
private final long p50Latency;
private final long p75Latency;
private final long timestamp; // 캐시 생성 시간

private MetricsSnapshot(long avg, long min, long max, long p25, long p50, long p75) {
this.avgLatency = avg;
this.minLatency = min;
this.maxLatency = max;
this.p25Latency = p25;
this.p50Latency = p50;
this.p75Latency = p75;
this.timestamp = System.currentTimeMillis();
}
}
}
17 changes: 17 additions & 0 deletions src/main/java/net/spy/memcached/metrics/OpLatencyMonitorMBean.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
package net.spy.memcached.metrics;

public interface OpLatencyMonitorMBean {
long getAverageLatencyMicros();

long getMaxLatencyMicros();

long getMinLatencyMicros();

long get25thPercentileLatencyMicros();

long get50thPercentileLatencyMicros();

long get75thPercentileLatencyMicros();

void resetStatistics();
}
94 changes: 94 additions & 0 deletions src/main/java/net/spy/memcached/metrics/OpThroughputMonitor.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
package net.spy.memcached.metrics;

import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;

import net.spy.memcached.ArcusMBeanServer;

public final class OpThroughputMonitor implements OpThroughputMonitorMBean {
private LongAdder completeOps;
private LongAdder cancelOps;
private LongAdder timeOutOps;
private AtomicLong lastResetTime;
private boolean enabled = true;

private static final OpThroughputMonitor INSTANCE = new OpThroughputMonitor();

private OpThroughputMonitor() {
if (System.getProperty("arcus.mbean", "false").toLowerCase().equals("false")) {
enabled = false;
return;
}

this.completeOps = new LongAdder();
this.cancelOps = new LongAdder();
this.timeOutOps = new LongAdder();
this.lastResetTime = new AtomicLong(System.currentTimeMillis());

try {
ArcusMBeanServer mbs = ArcusMBeanServer.getInstance();
mbs.registMBean(this, this.getClass().getPackage().getName()
+ ":type=" + this.getClass().getSimpleName());
} catch (Exception e) {
throw new RuntimeException("Failed to register Throughput MBean", e);
}
}

public static OpThroughputMonitor getInstance() {
return INSTANCE;
}

public void addCompleteOps() {
if (!enabled) {
return;
}
completeOps.increment();
}

public void addCancelOps() {
if (!enabled) {
return;
}
cancelOps.increment();
}

public void addTimeoutOps() {
if (!enabled) {
return;
}
timeOutOps.increment();
}

public void addTimeoutOps(int count) {
if (!enabled) {
return;
}
timeOutOps.add(count);
}

@Override
public long getCompletedOps() {
return getThroughput(completeOps);
}

@Override
public long getCanceledOps() {
return getThroughput(cancelOps);
}

@Override
public long getTimeoutOps() {
return getThroughput(timeOutOps);
}

private long getThroughput(LongAdder ops) {
long currentTime = System.currentTimeMillis();
long lastTime = lastResetTime.get();
long countValue = ops.sum();

// 경과 시간 계산 (초 단위)
long elapsedSeconds = (long) ((currentTime - lastTime) / 1000.0);

return countValue / elapsedSeconds;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package net.spy.memcached.metrics;

public interface OpThroughputMonitorMBean {
long getCompletedOps();

long getCanceledOps();

long getTimeoutOps();
}
4 changes: 4 additions & 0 deletions src/main/java/net/spy/memcached/ops/Operation.java
Original file line number Diff line number Diff line change
Expand Up @@ -134,4 +134,8 @@ public interface Operation {
/* ENABLE_MIGRATION end */

APIType getAPIType();

void setStartTime(long startTime);

long getStartTime();
}
12 changes: 12 additions & 0 deletions src/main/java/net/spy/memcached/protocol/BaseOperationImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import net.spy.memcached.MemcachedReplicaGroup;
import net.spy.memcached.RedirectHandler;
import net.spy.memcached.compat.SpyObject;
import net.spy.memcached.metrics.OpThroughputMonitor;
import net.spy.memcached.ops.APIType;
import net.spy.memcached.ops.CancelledOperationStatus;
import net.spy.memcached.ops.OperationCallback;
Expand Down Expand Up @@ -58,6 +59,15 @@ public abstract class BaseOperationImpl extends SpyObject {
private OperationType opType = OperationType.UNDEFINED;
private APIType apiType = APIType.UNDEFINED;

private long startTime;

public void setStartTime(long startTime) {
this.startTime = startTime;
}
public long getStartTime() {
return startTime;
}

/* ENABLE_MIGRATION if */
private RedirectHandler redirectHandler = null;
/* ENABLE_MIGRATION end */
Expand Down Expand Up @@ -95,6 +105,7 @@ public final OperationException getException() {
public final boolean cancel(String cause) {
if (callbacked.compareAndSet(false, true)) {
cancelled = true;
OpThroughputMonitor.getInstance().addCancelOps();
if (handlingNode != null) {
cause += " @ " + handlingNode.getNodeName();
}
Expand Down Expand Up @@ -222,6 +233,7 @@ protected final void transitionState(OperationState newState) {
}
if (state == OperationState.COMPLETE &&
callbacked.compareAndSet(false, true)) {
OpThroughputMonitor.getInstance().addCompleteOps();
callback.complete();
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ private Operation getNextWritableOp() {
Operation cancelledOp = removeCurrentWriteOp();
assert o == cancelledOp;
} else {
o.setStartTime(System.nanoTime());
o.writing();
readQ.add(o);
return o;
Expand Down

0 comments on commit 31929f7

Please sign in to comment.