// SPDX-License-Identifier: GPL-2.0-only
/*
 * builtin-timechart.c - make an svg timechart of system activity
 *
 * (C) Copyright 2009 Intel Corporation
 *
 * Authors:
 *     Arjan van de Ven <arjan@linux.intel.com>
 */

#include <errno.h>
#include <inttypes.h>

#include "builtin.h"
#include "util/color.h"
#include <linux/list.h>
#include "util/evlist.h" // for struct evsel_str_handler
#include "util/evsel.h"
#include <linux/kernel.h>
#include <linux/rbtree.h>
#include <linux/time64.h>
#include <linux/zalloc.h>
#include "util/symbol.h"
#include "util/thread.h"
#include "util/callchain.h"

#include "perf.h"
#include "util/header.h"
#include <subcmd/pager.h>
#include <subcmd/parse-options.h>
#include "util/parse-events.h"
#include "util/event.h"
#include "util/session.h"
#include "util/svghelper.h"
#include "util/tool.h"
#include "util/data.h"
#include "util/debug.h"
#include <linux/err.h>

#ifdef LACKS_OPEN_MEMSTREAM_PROTOTYPE
FILE *open_memstream(char **ptr, size_t *sizeloc);
#endif

#define SUPPORT_OLD_POWER_EVENTS 1
#define PWR_EVENT_EXIT -1

struct per_pid;
struct power_event;
struct wake_event;

struct timechart {
	struct perf_tool	tool;
	struct per_pid		*all_data;
	struct power_event	*power_events;
	struct wake_event	*wake_events;
	int			proc_num;
	unsigned int		numcpus;
	u64			min_freq,	/* Lowest CPU frequency seen */
				max_freq,	/* Highest CPU frequency seen */
				turbo_frequency,
				first_time, last_time;
	bool			power_only,
				tasks_only,
				with_backtrace,
				topology;
	bool			force;
	/* IO related settings */
	bool			io_only,
				skip_eagain;
	u64			io_events;
	u64			min_time,
				merge_dist;
};

struct per_pidcomm;
struct cpu_sample;
struct io_sample;

/*
 * Datastructure layout:
 * We keep an list of "pid"s, matching the kernels notion of a task struct.
 * Each "pid" entry, has a list of "comm"s.
 *	this is because we want to track different programs different, while
 *	exec will reuse the original pid (by design).
 * Each comm has a list of samples that will be used to draw
 * final graph.
 */

struct per_pid {
	struct per_pid *next;

	int		pid;
	int		ppid;

	u64		start_time;
	u64		end_time;
	u64		total_time;
	u64		total_bytes;
	int		display;

	struct per_pidcomm *all;
	struct per_pidcomm *current;
};


struct per_pidcomm {
	struct per_pidcomm *next;

	u64		start_time;
	u64		end_time;
	u64		total_time;
	u64		max_bytes;
	u64		total_bytes;

	int		Y;
	int		display;

	long		state;
	u64		state_since;

	char		*comm;

	struct cpu_sample *samples;
	struct io_sample  *io_samples;
};

struct sample_wrapper {
	struct sample_wrapper *next;

	u64		timestamp;
	unsigned char	data[];
};

#define TYPE_NONE	0
#define TYPE_RUNNING	1
#define TYPE_WAITING	2
#define TYPE_BLOCKED	3

struct cpu_sample {
	struct cpu_sample *next;

	u64 start_time;
	u64 end_time;
	int type;
	int cpu;
	const char *backtrace;
};

enum {
	IOTYPE_READ,
	IOTYPE_WRITE,
	IOTYPE_SYNC,
	IOTYPE_TX,
	IOTYPE_RX,
	IOTYPE_POLL,
};

struct io_sample {
	struct io_sample *next;

	u64 start_time;
	u64 end_time;
	u64 bytes;
	int type;
	int fd;
	int err;
	int merges;
};

#define CSTATE 1
#define PSTATE 2

struct power_event {
	struct power_event *next;
	int type;
	int state;
	u64 start_time;
	u64 end_time;
	int cpu;
};

struct wake_event {
	struct wake_event *next;
	int waker;
	int wakee;
	u64 time;
	const char *backtrace;
};

struct process_filter {
	char			*name;
	int			pid;
	struct process_filter	*next;
};

static struct process_filter *process_filter;


static struct per_pid *find_create_pid(struct timechart *tchart, int pid)
{
	struct per_pid *cursor = tchart->all_data;

	while (cursor) {
		if (cursor->pid == pid)
			return cursor;
		cursor = cursor->next;
	}
	cursor = zalloc(sizeof(*cursor));
	assert(cursor != NULL);
	cursor->pid = pid;
	cursor->next = tchart->all_data;
	tchart->all_data = cursor;
	return cursor;
}

static void pid_set_comm(struct timechart *tchart, int pid, char *comm)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	p = find_create_pid(tchart, pid);
	c = p->all;
	while (c) {
		if (c->comm && strcmp(c->comm, comm) == 0) {
			p->current = c;
			return;
		}
		if (!c->comm) {
			c->comm = strdup(comm);
			p->current = c;
			return;
		}
		c = c->next;
	}
	c = zalloc(sizeof(*c));
	assert(c != NULL);
	c->comm = strdup(comm);
	p->current = c;
	c->next = p->all;
	p->all = c;
}

static void pid_fork(struct timechart *tchart, int pid, int ppid, u64 timestamp)
{
	struct per_pid *p, *pp;
	p = find_create_pid(tchart, pid);
	pp = find_create_pid(tchart, ppid);
	p->ppid = ppid;
	if (pp->current && pp->current->comm && !p->current)
		pid_set_comm(tchart, pid, pp->current->comm);

	p->start_time = timestamp;
	if (p->current && !p->current->start_time) {
		p->current->start_time = timestamp;
		p->current->state_since = timestamp;
	}
}

static void pid_exit(struct timechart *tchart, int pid, u64 timestamp)
{
	struct per_pid *p;
	p = find_create_pid(tchart, pid);
	p->end_time = timestamp;
	if (p->current)
		p->current->end_time = timestamp;
}

static void pid_put_sample(struct timechart *tchart, int pid, int type,
			   unsigned int cpu, u64 start, u64 end,
			   const char *backtrace)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	struct cpu_sample *sample;

	p = find_create_pid(tchart, pid);
	c = p->current;
	if (!c) {
		c = zalloc(sizeof(*c));
		assert(c != NULL);
		p->current = c;
		c->next = p->all;
		p->all = c;
	}

	sample = zalloc(sizeof(*sample));
	assert(sample != NULL);
	sample->start_time = start;
	sample->end_time = end;
	sample->type = type;
	sample->next = c->samples;
	sample->cpu = cpu;
	sample->backtrace = backtrace;
	c->samples = sample;

	if (sample->type == TYPE_RUNNING && end > start && start > 0) {
		c->total_time += (end-start);
		p->total_time += (end-start);
	}

	if (c->start_time == 0 || c->start_time > start)
		c->start_time = start;
	if (p->start_time == 0 || p->start_time > start)
		p->start_time = start;
}

#define MAX_CPUS 4096

static u64 cpus_cstate_start_times[MAX_CPUS];
static int cpus_cstate_state[MAX_CPUS];
static u64 cpus_pstate_start_times[MAX_CPUS];
static u64 cpus_pstate_state[MAX_CPUS];

static int process_comm_event(struct perf_tool *tool,
			      union perf_event *event,
			      struct perf_sample *sample __maybe_unused,
			      struct machine *machine __maybe_unused)
{
	struct timechart *tchart = container_of(tool, struct timechart, tool);
	pid_set_comm(tchart, event->comm.tid, event->comm.comm);
	return 0;
}

static int process_fork_event(struct perf_tool *tool,
			      union perf_event *event,
			      struct perf_sample *sample __maybe_unused,
			      struct machine *machine __maybe_unused)
{
	struct timechart *tchart = container_of(tool, struct timechart, tool);
	pid_fork(tchart, event->fork.pid, event->fork.ppid, event->fork.time);
	return 0;
}

static int process_exit_event(struct perf_tool *tool,
			      union perf_event *event,
			      struct perf_sample *sample __maybe_unused,
			      struct machine *machine __maybe_unused)
{
	struct timechart *tchart = container_of(tool, struct timechart, tool);
	pid_exit(tchart, event->fork.pid, event->fork.time);
	return 0;
}

#ifdef SUPPORT_OLD_POWER_EVENTS
static int use_old_power_events;
#endif

static void c_state_start(int cpu, u64 timestamp, int state)
{
	cpus_cstate_start_times[cpu] = timestamp;
	cpus_cstate_state[cpu] = state;
}

static void c_state_end(struct timechart *tchart, int cpu, u64 timestamp)
{
	struct power_event *pwr = zalloc(sizeof(*pwr));

	if (!pwr)
		return;

	pwr->state = cpus_cstate_state[cpu];
	pwr->start_time = cpus_cstate_start_times[cpu];
	pwr->end_time = timestamp;
	pwr->cpu = cpu;
	pwr->type = CSTATE;
	pwr->next = tchart->power_events;

	tchart->power_events = pwr;
}

static void p_state_change(struct timechart *tchart, int cpu, u64 timestamp, u64 new_freq)
{
	struct power_event *pwr;

	if (new_freq > 8000000) /* detect invalid data */
		return;

	pwr = zalloc(sizeof(*pwr));
	if (!pwr)
		return;

	pwr->state = cpus_pstate_state[cpu];
	pwr->start_time = cpus_pstate_start_times[cpu];
	pwr->end_time = timestamp;
	pwr->cpu = cpu;
	pwr->type = PSTATE;
	pwr->next = tchart->power_events;

	if (!pwr->start_time)
		pwr->start_time = tchart->first_time;

	tchart->power_events = pwr;

	cpus_pstate_state[cpu] = new_freq;
	cpus_pstate_start_times[cpu] = timestamp;

	if ((u64)new_freq > tchart->max_freq)
		tchart->max_freq = new_freq;

	if (new_freq < tchart->min_freq || tchart->min_freq == 0)
		tchart->min_freq = new_freq;

	if (new_freq == tchart->max_freq - 1000)
		tchart->turbo_frequency = tchart->max_freq;
}

static void sched_wakeup(struct timechart *tchart, int cpu, u64 timestamp,
			 int waker, int wakee, u8 flags, const char *backtrace)
{
	struct per_pid *p;
	struct wake_event *we = zalloc(sizeof(*we));

	if (!we)
		return;

	we->time = timestamp;
	we->waker = waker;
	we->backtrace = backtrace;

	if ((flags & TRACE_FLAG_HARDIRQ) || (flags & TRACE_FLAG_SOFTIRQ))
		we->waker = -1;

	we->wakee = wakee;
	we->next = tchart->wake_events;
	tchart->wake_events = we;
	p = find_create_pid(tchart, we->wakee);

	if (p && p->current && p->current->state == TYPE_NONE) {
		p->current->state_since = timestamp;
		p->current->state = TYPE_WAITING;
	}
	if (p && p->current && p->current->state == TYPE_BLOCKED) {
		pid_put_sample(tchart, p->pid, p->current->state, cpu,
			       p->current->state_since, timestamp, NULL);
		p->current->state_since = timestamp;
		p->current->state = TYPE_WAITING;
	}
}

static void sched_switch(struct timechart *tchart, int cpu, u64 timestamp,
			 int prev_pid, int next_pid, u64 prev_state,
			 const char *backtrace)
{
	struct per_pid *p = NULL, *prev_p;

	prev_p = find_create_pid(tchart, prev_pid);

	p = find_create_pid(tchart, next_pid);

	if (prev_p->current && prev_p->current->state != TYPE_NONE)
		pid_put_sample(tchart, prev_pid, TYPE_RUNNING, cpu,
			       prev_p->current->state_since, timestamp,
			       backtrace);
	if (p && p->current) {
		if (p->current->state != TYPE_NONE)
			pid_put_sample(tchart, next_pid, p->current->state, cpu,
				       p->current->state_since, timestamp,
				       backtrace);

		p->current->state_since = timestamp;
		p->current->state = TYPE_RUNNING;
	}

	if (prev_p->current) {
		prev_p->current->state = TYPE_NONE;
		prev_p->current->state_since = timestamp;
		if (prev_state & 2)
			prev_p->current->state = TYPE_BLOCKED;
		if (prev_state == 0)
			prev_p->current->state = TYPE_WAITING;
	}
}

static const char *cat_backtrace(union perf_event *event,
				 struct perf_sample *sample,
				 struct machine *machine)
{
	struct addr_location al;
	unsigned int i;
	char *p = NULL;
	size_t p_len;
	u8 cpumode = PERF_RECORD_MISC_USER;
	struct addr_location tal;
	struct ip_callchain *chain = sample->callchain;
	FILE *f = open_memstream(&p, &p_len);

	if (!f) {
		perror("open_memstream error");
		return NULL;
	}

	if (!chain)
		goto exit;

	if (machine__resolve(machine, &al, sample) < 0) {
		fprintf(stderr, "problem processing %d event, skipping it.\n",
			event->header.type);
		goto exit;
	}

	for (i = 0; i < chain->nr; i++) {
		u64 ip;

		if (callchain_param.order == ORDER_CALLEE)
			ip = chain->ips[i];
		else
			ip = chain->ips[chain->nr - i - 1];

		if (ip >= PERF_CONTEXT_MAX) {
			switch (ip) {
			case PERF_CONTEXT_HV:
				cpumode = PERF_RECORD_MISC_HYPERVISOR;
				break;
			case PERF_CONTEXT_KERNEL:
				cpumode = PERF_RECORD_MISC_KERNEL;
				break;
			case PERF_CONTEXT_USER:
				cpumode = PERF_RECORD_MISC_USER;
				break;
			default:
				pr_debug("invalid callchain context: "
					 "%"PRId64"\n", (s64) ip);

				/*
				 * It seems the callchain is corrupted.
				 * Discard all.
				 */
				zfree(&p);
				goto exit_put;
			}
			continue;
		}

		tal.filtered = 0;
		if (thread__find_symbol(al.thread, cpumode, ip, &tal))
			fprintf(f, "..... %016" PRIx64 " %s\n", ip, tal.sym->name);
		else
			fprintf(f, "..... %016" PRIx64 "\n", ip);
	}
exit_put:
	addr_location__put(&al);
exit:
	fclose(f);

	return p;
}

typedef int (*tracepoint_handler)(struct timechart *tchart,
				  struct evsel *evsel,
				  struct perf_sample *sample,
				  const char *backtrace);

static int process_sample_event(struct perf_tool *tool,
				union perf_event *event,
				struct perf_sample *sample,
				struct evsel *evsel,
				struct machine *machine)
{
	struct timechart *tchart = container_of(tool, struct timechart, tool);

	if (evsel->core.attr.sample_type & PERF_SAMPLE_TIME) {
		if (!tchart->first_time || tchart->first_time > sample->time)
			tchart->first_time = sample->time;
		if (tchart->last_time < sample->time)
			tchart->last_time = sample->time;
	}

	if (evsel->handler != NULL) {
		tracepoint_handler f = evsel->handler;
		return f(tchart, evsel, sample,
			 cat_backtrace(event, sample, machine));
	}

	return 0;
}

static int
process_sample_cpu_idle(struct timechart *tchart __maybe_unused,
			struct evsel *evsel,
			struct perf_sample *sample,
			const char *backtrace __maybe_unused)
{
	u32 state  = evsel__intval(evsel, sample, "state");
	u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");

	if (state == (u32)PWR_EVENT_EXIT)
		c_state_end(tchart, cpu_id, sample->time);
	else
		c_state_start(cpu_id, sample->time, state);
	return 0;
}

static int
process_sample_cpu_frequency(struct timechart *tchart,
			     struct evsel *evsel,
			     struct perf_sample *sample,
			     const char *backtrace __maybe_unused)
{
	u32 state  = evsel__intval(evsel, sample, "state");
	u32 cpu_id = evsel__intval(evsel, sample, "cpu_id");

	p_state_change(tchart, cpu_id, sample->time, state);
	return 0;
}

static int
process_sample_sched_wakeup(struct timechart *tchart,
			    struct evsel *evsel,
			    struct perf_sample *sample,
			    const char *backtrace)
{
	u8 flags  = evsel__intval(evsel, sample, "common_flags");
	int waker = evsel__intval(evsel, sample, "common_pid");
	int wakee = evsel__intval(evsel, sample, "pid");

	sched_wakeup(tchart, sample->cpu, sample->time, waker, wakee, flags, backtrace);
	return 0;
}

static int
process_sample_sched_switch(struct timechart *tchart,
			    struct evsel *evsel,
			    struct perf_sample *sample,
			    const char *backtrace)
{
	int prev_pid   = evsel__intval(evsel, sample, "prev_pid");
	int next_pid   = evsel__intval(evsel, sample, "next_pid");
	u64 prev_state = evsel__intval(evsel, sample, "prev_state");

	sched_switch(tchart, sample->cpu, sample->time, prev_pid, next_pid,
		     prev_state, backtrace);
	return 0;
}

#ifdef SUPPORT_OLD_POWER_EVENTS
static int
process_sample_power_start(struct timechart *tchart __maybe_unused,
			   struct evsel *evsel,
			   struct perf_sample *sample,
			   const char *backtrace __maybe_unused)
{
	u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
	u64 value  = evsel__intval(evsel, sample, "value");

	c_state_start(cpu_id, sample->time, value);
	return 0;
}

static int
process_sample_power_end(struct timechart *tchart,
			 struct evsel *evsel __maybe_unused,
			 struct perf_sample *sample,
			 const char *backtrace __maybe_unused)
{
	c_state_end(tchart, sample->cpu, sample->time);
	return 0;
}

static int
process_sample_power_frequency(struct timechart *tchart,
			       struct evsel *evsel,
			       struct perf_sample *sample,
			       const char *backtrace __maybe_unused)
{
	u64 cpu_id = evsel__intval(evsel, sample, "cpu_id");
	u64 value  = evsel__intval(evsel, sample, "value");

	p_state_change(tchart, cpu_id, sample->time, value);
	return 0;
}
#endif /* SUPPORT_OLD_POWER_EVENTS */

/*
 * After the last sample we need to wrap up the current C/P state
 * and close out each CPU for these.
 */
static void end_sample_processing(struct timechart *tchart)
{
	u64 cpu;
	struct power_event *pwr;

	for (cpu = 0; cpu <= tchart->numcpus; cpu++) {
		/* C state */
#if 0
		pwr = zalloc(sizeof(*pwr));
		if (!pwr)
			return;

		pwr->state = cpus_cstate_state[cpu];
		pwr->start_time = cpus_cstate_start_times[cpu];
		pwr->end_time = tchart->last_time;
		pwr->cpu = cpu;
		pwr->type = CSTATE;
		pwr->next = tchart->power_events;

		tchart->power_events = pwr;
#endif
		/* P state */

		pwr = zalloc(sizeof(*pwr));
		if (!pwr)
			return;

		pwr->state = cpus_pstate_state[cpu];
		pwr->start_time = cpus_pstate_start_times[cpu];
		pwr->end_time = tchart->last_time;
		pwr->cpu = cpu;
		pwr->type = PSTATE;
		pwr->next = tchart->power_events;

		if (!pwr->start_time)
			pwr->start_time = tchart->first_time;
		if (!pwr->state)
			pwr->state = tchart->min_freq;
		tchart->power_events = pwr;
	}
}

static int pid_begin_io_sample(struct timechart *tchart, int pid, int type,
			       u64 start, int fd)
{
	struct per_pid *p = find_create_pid(tchart, pid);
	struct per_pidcomm *c = p->current;
	struct io_sample *sample;
	struct io_sample *prev;

	if (!c) {
		c = zalloc(sizeof(*c));
		if (!c)
			return -ENOMEM;
		p->current = c;
		c->next = p->all;
		p->all = c;
	}

	prev = c->io_samples;

	if (prev && prev->start_time && !prev->end_time) {
		pr_warning("Skip invalid start event: "
			   "previous event already started!\n");

		/* remove previous event that has been started,
		 * we are not sure we will ever get an end for it */
		c->io_samples = prev->next;
		free(prev);
		return 0;
	}

	sample = zalloc(sizeof(*sample));
	if (!sample)
		return -ENOMEM;
	sample->start_time = start;
	sample->type = type;
	sample->fd = fd;
	sample->next = c->io_samples;
	c->io_samples = sample;

	if (c->start_time == 0 || c->start_time > start)
		c->start_time = start;

	return 0;
}

static int pid_end_io_sample(struct timechart *tchart, int pid, int type,
			     u64 end, long ret)
{
	struct per_pid *p = find_create_pid(tchart, pid);
	struct per_pidcomm *c = p->current;
	struct io_sample *sample, *prev;

	if (!c) {
		pr_warning("Invalid pidcomm!\n");
		return -1;
	}

	sample = c->io_samples;

	if (!sample) /* skip partially captured events */
		return 0;

	if (sample->end_time) {
		pr_warning("Skip invalid end event: "
			   "previous event already ended!\n");
		return 0;
	}

	if (sample->type != type) {
		pr_warning("Skip invalid end event: invalid event type!\n");
		return 0;
	}

	sample->end_time = end;
	prev = sample->next;

	/* we want to be able to see small and fast transfers, so make them
	 * at least min_time long, but don't overlap them */
	if (sample->end_time - sample->start_time < tchart->min_time)
		sample->end_time = sample->start_time + tchart->min_time;
	if (prev && sample->start_time < prev->end_time) {
		if (prev->err) /* try to make errors more visible */
			sample->start_time = prev->end_time;
		else
			prev->end_time = sample->start_time;
	}

	if (ret < 0) {
		sample->err = ret;
	} else if (type == IOTYPE_READ || type == IOTYPE_WRITE ||
		   type == IOTYPE_TX || type == IOTYPE_RX) {

		if ((u64)ret > c->max_bytes)
			c->max_bytes = ret;

		c->total_bytes += ret;
		p->total_bytes += ret;
		sample->bytes = ret;
	}

	/* merge two requests to make svg smaller and render-friendly */
	if (prev &&
	    prev->type == sample->type &&
	    prev->err == sample->err &&
	    prev->fd == sample->fd &&
	    prev->end_time + tchart->merge_dist >= sample->start_time) {

		sample->bytes += prev->bytes;
		sample->merges += prev->merges + 1;

		sample->start_time = prev->start_time;
		sample->next = prev->next;
		free(prev);

		if (!sample->err && sample->bytes > c->max_bytes)
			c->max_bytes = sample->bytes;
	}

	tchart->io_events++;

	return 0;
}

static int
process_enter_read(struct timechart *tchart,
		   struct evsel *evsel,
		   struct perf_sample *sample)
{
	long fd = evsel__intval(evsel, sample, "fd");
	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_READ,
				   sample->time, fd);
}

static int
process_exit_read(struct timechart *tchart,
		  struct evsel *evsel,
		  struct perf_sample *sample)
{
	long ret = evsel__intval(evsel, sample, "ret");
	return pid_end_io_sample(tchart, sample->tid, IOTYPE_READ,
				 sample->time, ret);
}

static int
process_enter_write(struct timechart *tchart,
		    struct evsel *evsel,
		    struct perf_sample *sample)
{
	long fd = evsel__intval(evsel, sample, "fd");
	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_WRITE,
				   sample->time, fd);
}

static int
process_exit_write(struct timechart *tchart,
		   struct evsel *evsel,
		   struct perf_sample *sample)
{
	long ret = evsel__intval(evsel, sample, "ret");
	return pid_end_io_sample(tchart, sample->tid, IOTYPE_WRITE,
				 sample->time, ret);
}

static int
process_enter_sync(struct timechart *tchart,
		   struct evsel *evsel,
		   struct perf_sample *sample)
{
	long fd = evsel__intval(evsel, sample, "fd");
	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_SYNC,
				   sample->time, fd);
}

static int
process_exit_sync(struct timechart *tchart,
		  struct evsel *evsel,
		  struct perf_sample *sample)
{
	long ret = evsel__intval(evsel, sample, "ret");
	return pid_end_io_sample(tchart, sample->tid, IOTYPE_SYNC,
				 sample->time, ret);
}

static int
process_enter_tx(struct timechart *tchart,
		 struct evsel *evsel,
		 struct perf_sample *sample)
{
	long fd = evsel__intval(evsel, sample, "fd");
	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_TX,
				   sample->time, fd);
}

static int
process_exit_tx(struct timechart *tchart,
		struct evsel *evsel,
		struct perf_sample *sample)
{
	long ret = evsel__intval(evsel, sample, "ret");
	return pid_end_io_sample(tchart, sample->tid, IOTYPE_TX,
				 sample->time, ret);
}

static int
process_enter_rx(struct timechart *tchart,
		 struct evsel *evsel,
		 struct perf_sample *sample)
{
	long fd = evsel__intval(evsel, sample, "fd");
	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_RX,
				   sample->time, fd);
}

static int
process_exit_rx(struct timechart *tchart,
		struct evsel *evsel,
		struct perf_sample *sample)
{
	long ret = evsel__intval(evsel, sample, "ret");
	return pid_end_io_sample(tchart, sample->tid, IOTYPE_RX,
				 sample->time, ret);
}

static int
process_enter_poll(struct timechart *tchart,
		   struct evsel *evsel,
		   struct perf_sample *sample)
{
	long fd = evsel__intval(evsel, sample, "fd");
	return pid_begin_io_sample(tchart, sample->tid, IOTYPE_POLL,
				   sample->time, fd);
}

static int
process_exit_poll(struct timechart *tchart,
		  struct evsel *evsel,
		  struct perf_sample *sample)
{
	long ret = evsel__intval(evsel, sample, "ret");
	return pid_end_io_sample(tchart, sample->tid, IOTYPE_POLL,
				 sample->time, ret);
}

/*
 * Sort the pid datastructure
 */
static void sort_pids(struct timechart *tchart)
{
	struct per_pid *new_list, *p, *cursor, *prev;
	/* sort by ppid first, then by pid, lowest to highest */

	new_list = NULL;

	while (tchart->all_data) {
		p = tchart->all_data;
		tchart->all_data = p->next;
		p->next = NULL;

		if (new_list == NULL) {
			new_list = p;
			p->next = NULL;
			continue;
		}
		prev = NULL;
		cursor = new_list;
		while (cursor) {
			if (cursor->ppid > p->ppid ||
				(cursor->ppid == p->ppid && cursor->pid > p->pid)) {
				/* must insert before */
				if (prev) {
					p->next = prev->next;
					prev->next = p;
					cursor = NULL;
					continue;
				} else {
					p->next = new_list;
					new_list = p;
					cursor = NULL;
					continue;
				}
			}

			prev = cursor;
			cursor = cursor->next;
			if (!cursor)
				prev->next = p;
		}
	}
	tchart->all_data = new_list;
}


static void draw_c_p_states(struct timechart *tchart)
{
	struct power_event *pwr;
	pwr = tchart->power_events;

	/*
	 * two pass drawing so that the P state bars are on top of the C state blocks
	 */
	while (pwr) {
		if (pwr->type == CSTATE)
			svg_cstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
		pwr = pwr->next;
	}

	pwr = tchart->power_events;
	while (pwr) {
		if (pwr->type == PSTATE) {
			if (!pwr->state)
				pwr->state = tchart->min_freq;
			svg_pstate(pwr->cpu, pwr->start_time, pwr->end_time, pwr->state);
		}
		pwr = pwr->next;
	}
}

static void draw_wakeups(struct timechart *tchart)
{
	struct wake_event *we;
	struct per_pid *p;
	struct per_pidcomm *c;

	we = tchart->wake_events;
	while (we) {
		int from = 0, to = 0;
		char *task_from = NULL, *task_to = NULL;

		/* locate the column of the waker and wakee */
		p = tchart->all_data;
		while (p) {
			if (p->pid == we->waker || p->pid == we->wakee) {
				c = p->all;
				while (c) {
					if (c->Y && c->start_time <= we->time && c->end_time >= we->time) {
						if (p->pid == we->waker && !from) {
							from = c->Y;
							task_from = strdup(c->comm);
						}
						if (p->pid == we->wakee && !to) {
							to = c->Y;
							task_to = strdup(c->comm);
						}
					}
					c = c->next;
				}
				c = p->all;
				while (c) {
					if (p->pid == we->waker && !from) {
						from = c->Y;
						task_from = strdup(c->comm);
					}
					if (p->pid == we->wakee && !to) {
						to = c->Y;
						task_to = strdup(c->comm);
					}
					c = c->next;
				}
			}
			p = p->next;
		}

		if (!task_from) {
			task_from = malloc(40);
			sprintf(task_from, "[%i]", we->waker);
		}
		if (!task_to) {
			task_to = malloc(40);
			sprintf(task_to, "[%i]", we->wakee);
		}

		if (we->waker == -1)
			svg_interrupt(we->time, to, we->backtrace);
		else if (from && to && abs(from - to) == 1)
			svg_wakeline(we->time, from, to, we->backtrace);
		else
			svg_partial_wakeline(we->time, from, task_from, to,
					     task_to, we->backtrace);
		we = we->next;

		free(task_from);
		free(task_to);
	}
}

static void draw_cpu_usage(struct timechart *tchart)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	struct cpu_sample *sample;
	p = tchart->all_data;
	while (p) {
		c = p->all;
		while (c) {
			sample = c->samples;
			while (sample) {
				if (sample->type == TYPE_RUNNING) {
					svg_process(sample->cpu,
						    sample->start_time,
						    sample->end_time,
						    p->pid,
						    c->comm,
						    sample->backtrace);
				}

				sample = sample->next;
			}
			c = c->next;
		}
		p = p->next;
	}
}

static void draw_io_bars(struct timechart *tchart)
{
	const char *suf;
	double bytes;
	char comm[256];
	struct per_pid *p;
	struct per_pidcomm *c;
	struct io_sample *sample;
	int Y = 1;

	p = tchart->all_data;
	while (p) {
		c = p->all;
		while (c) {
			if (!c->display) {
				c->Y = 0;
				c = c->next;
				continue;
			}

			svg_box(Y, c->start_time, c->end_time, "process3");
			sample = c->io_samples;
			for (sample = c->io_samples; sample; sample = sample->next) {
				double h = (double)sample->bytes / c->max_bytes;

				if (tchart->skip_eagain &&
				    sample->err == -EAGAIN)
					continue;

				if (sample->err)
					h = 1;

				if (sample->type == IOTYPE_SYNC)
					svg_fbox(Y,
						sample->start_time,
						sample->end_time,
						1,
						sample->err ? "error" : "sync",
						sample->fd,
						sample->err,
						sample->merges);
				else if (sample->type == IOTYPE_POLL)
					svg_fbox(Y,
						sample->start_time,
						sample->end_time,
						1,
						sample->err ? "error" : "poll",
						sample->fd,
						sample->err,
						sample->merges);
				else if (sample->type == IOTYPE_READ)
					svg_ubox(Y,
						sample->start_time,
						sample->end_time,
						h,
						sample->err ? "error" : "disk",
						sample->fd,
						sample->err,
						sample->merges);
				else if (sample->type == IOTYPE_WRITE)
					svg_lbox(Y,
						sample->start_time,
						sample->end_time,
						h,
						sample->err ? "error" : "disk",
						sample->fd,
						sample->err,
						sample->merges);
				else if (sample->type == IOTYPE_RX)
					svg_ubox(Y,
						sample->start_time,
						sample->end_time,
						h,
						sample->err ? "error" : "net",
						sample->fd,
						sample->err,
						sample->merges);
				else if (sample->type == IOTYPE_TX)
					svg_lbox(Y,
						sample->start_time,
						sample->end_time,
						h,
						sample->err ? "error" : "net",
						sample->fd,
						sample->err,
						sample->merges);
			}

			suf = "";
			bytes = c->total_bytes;
			if (bytes > 1024) {
				bytes = bytes / 1024;
				suf = "K";
			}
			if (bytes > 1024) {
				bytes = bytes / 1024;
				suf = "M";
			}
			if (bytes > 1024) {
				bytes = bytes / 1024;
				suf = "G";
			}


			sprintf(comm, "%s:%i (%3.1f %sbytes)", c->comm ?: "", p->pid, bytes, suf);
			svg_text(Y, c->start_time, comm);

			c->Y = Y;
			Y++;
			c = c->next;
		}
		p = p->next;
	}
}

static void draw_process_bars(struct timechart *tchart)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	struct cpu_sample *sample;
	int Y = 0;

	Y = 2 * tchart->numcpus + 2;

	p = tchart->all_data;
	while (p) {
		c = p->all;
		while (c) {
			if (!c->display) {
				c->Y = 0;
				c = c->next;
				continue;
			}

			svg_box(Y, c->start_time, c->end_time, "process");
			sample = c->samples;
			while (sample) {
				if (sample->type == TYPE_RUNNING)
					svg_running(Y, sample->cpu,
						    sample->start_time,
						    sample->end_time,
						    sample->backtrace);
				if (sample->type == TYPE_BLOCKED)
					svg_blocked(Y, sample->cpu,
						    sample->start_time,
						    sample->end_time,
						    sample->backtrace);
				if (sample->type == TYPE_WAITING)
					svg_waiting(Y, sample->cpu,
						    sample->start_time,
						    sample->end_time,
						    sample->backtrace);
				sample = sample->next;
			}

			if (c->comm) {
				char comm[256];
				if (c->total_time > 5000000000) /* 5 seconds */
					sprintf(comm, "%s:%i (%2.2fs)", c->comm, p->pid, c->total_time / (double)NSEC_PER_SEC);
				else
					sprintf(comm, "%s:%i (%3.1fms)", c->comm, p->pid, c->total_time / (double)NSEC_PER_MSEC);

				svg_text(Y, c->start_time, comm);
			}
			c->Y = Y;
			Y++;
			c = c->next;
		}
		p = p->next;
	}
}

static void add_process_filter(const char *string)
{
	int pid = strtoull(string, NULL, 10);
	struct process_filter *filt = malloc(sizeof(*filt));

	if (!filt)
		return;

	filt->name = strdup(string);
	filt->pid  = pid;
	filt->next = process_filter;

	process_filter = filt;
}

static int passes_filter(struct per_pid *p, struct per_pidcomm *c)
{
	struct process_filter *filt;
	if (!process_filter)
		return 1;

	filt = process_filter;
	while (filt) {
		if (filt->pid && p->pid == filt->pid)
			return 1;
		if (strcmp(filt->name, c->comm) == 0)
			return 1;
		filt = filt->next;
	}
	return 0;
}

static int determine_display_tasks_filtered(struct timechart *tchart)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	int count = 0;

	p = tchart->all_data;
	while (p) {
		p->display = 0;
		if (p->start_time == 1)
			p->start_time = tchart->first_time;

		/* no exit marker, task kept running to the end */
		if (p->end_time == 0)
			p->end_time = tchart->last_time;

		c = p->all;

		while (c) {
			c->display = 0;

			if (c->start_time == 1)
				c->start_time = tchart->first_time;

			if (passes_filter(p, c)) {
				c->display = 1;
				p->display = 1;
				count++;
			}

			if (c->end_time == 0)
				c->end_time = tchart->last_time;

			c = c->next;
		}
		p = p->next;
	}
	return count;
}

static int determine_display_tasks(struct timechart *tchart, u64 threshold)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	int count = 0;

	p = tchart->all_data;
	while (p) {
		p->display = 0;
		if (p->start_time == 1)
			p->start_time = tchart->first_time;

		/* no exit marker, task kept running to the end */
		if (p->end_time == 0)
			p->end_time = tchart->last_time;
		if (p->total_time >= threshold)
			p->display = 1;

		c = p->all;

		while (c) {
			c->display = 0;

			if (c->start_time == 1)
				c->start_time = tchart->first_time;

			if (c->total_time >= threshold) {
				c->display = 1;
				count++;
			}

			if (c->end_time == 0)
				c->end_time = tchart->last_time;

			c = c->next;
		}
		p = p->next;
	}
	return count;
}

static int determine_display_io_tasks(struct timechart *timechart, u64 threshold)
{
	struct per_pid *p;
	struct per_pidcomm *c;
	int count = 0;

	p = timechart->all_data;
	while (p) {
		/* no exit marker, task kept running to the end */
		if (p->end_time == 0)
			p->end_time = timechart->last_time;

		c = p->all;

		while (c) {
			c->display = 0;

			if (c->total_bytes >= threshold) {
				c->display = 1;
				count++;
			}

			if (c->end_time == 0)
				c->end_time = timechart->last_time;

			c = c->next;
		}
		p = p->next;
	}
	return count;
}

#define BYTES_THRESH (1 * 1024 * 1024)
#define TIME_THRESH 10000000

static void write_svg_file(struct timechart *tchart, const char *filename)
{
	u64 i;
	int count;
	int thresh = tchart->io_events ? BYTES_THRESH : TIME_THRESH;

	if (tchart->power_only)
		tchart->proc_num = 0;

	/* We'd like to show at least proc_num tasks;
	 * be less picky if we have fewer */
	do {
		if (process_filter)
			count = determine_display_tasks_filtered(tchart);
		else if (tchart->io_events)
			count = determine_display_io_tasks(tchart, thresh);
		else
			count = determine_display_tasks(tchart, thresh);
		thresh /= 10;
	} while (!process_filter && thresh && count < tchart->proc_num);

	if (!tchart->proc_num)
		count = 0;

	if (tchart->io_events) {
		open_svg(filename, 0, count, tchart->first_time, tchart->last_time);

		svg_time_grid(0.5);
		svg_io_legenda();

		draw_io_bars(tchart);
	} else {
		open_svg(filename, tchart->numcpus, count, tchart->first_time, tchart->last_time);

		svg_time_grid(0);

		svg_legenda();

		for (i = 0; i < tchart->numcpus; i++)
			svg_cpu_box(i, tchart->max_freq, tchart->turbo_frequency);

		draw_cpu_usage(tchart);
		if (tchart->proc_num)
			draw_process_bars(tchart);
		if (!tchart->tasks_only)
			draw_c_p_states(tchart);
		if (tchart->proc_num)
			draw_wakeups(tchart);
	}

	svg_close();
}

static int process_header(struct perf_file_section *section __maybe_unused,
			  struct perf_header *ph,
			  int feat,
			  int fd __maybe_unused,
			  void *data)
{
	struct timechart *tchart = data;

	switch (feat) {
	case HEADER_NRCPUS:
		tchart->numcpus = ph->env.nr_cpus_avail;
		break;

	case HEADER_CPU_TOPOLOGY:
		if (!tchart->topology)
			break;

		if (svg_build_topology_map(&ph->env))
			fprintf(stderr, "problem building topology\n");
		break;

	default:
		break;
	}

	return 0;
}

static int __cmd_timechart(struct timechart *tchart, const char *output_name)
{
	const struct evsel_str_handler power_tracepoints[] = {
		{ "power:cpu_idle",		process_sample_cpu_idle },
		{ "power:cpu_frequency",	process_sample_cpu_frequency },
		{ "sched:sched_wakeup",		process_sample_sched_wakeup },
		{ "sched:sched_switch",		process_sample_sched_switch },
#ifdef SUPPORT_OLD_POWER_EVENTS
		{ "power:power_start",		process_sample_power_start },
		{ "power:power_end",		process_sample_power_end },
		{ "power:power_frequency",	process_sample_power_frequency },
#endif

		{ "syscalls:sys_enter_read",		process_enter_read },
		{ "syscalls:sys_enter_pread64",		process_enter_read },
		{ "syscalls:sys_enter_readv",		process_enter_read },
		{ "syscalls:sys_enter_preadv",		process_enter_read },
		{ "syscalls:sys_enter_write",		process_enter_write },
		{ "syscalls:sys_enter_pwrite64",	process_enter_write },
		{ "syscalls:sys_enter_writev",		process_enter_write },
		{ "syscalls:sys_enter_pwritev",		process_enter_write },
		{ "syscalls:sys_enter_sync",		process_enter_sync },
		{ "syscalls:sys_enter_sync_file_range",	process_enter_sync },
		{ "syscalls:sys_enter_fsync",		process_enter_sync },
		{ "syscalls:sys_enter_msync",		process_enter_sync },
		{ "syscalls:sys_enter_recvfrom",	process_enter_rx },
		{ "syscalls:sys_enter_recvmmsg",	process_enter_rx },
		{ "syscalls:sys_enter_recvmsg",		process_enter_rx },
		{ "syscalls:sys_enter_sendto",		process_enter_tx 