Merge pull request #13 from curusarn/dev_2

sanitization release evaluation progress
6 years ago · 22a0bf3f5e
parent 752acb916f 188d8b4204
commit 22a0bf3f5e
19 changed files with 2826 additions and 11 deletions
--- a/.gitignore
+++ b/.gitignore
@ -1,2 +1,4 @@
 resh-collect
 resh-daemon
+resh-sanitize-history
+resh-evaluate
--- a/51
+++ b/51
@ -6,8 +6,43 @@ GOFLAGS=-ldflags "-X main.Version=${VERSION} -X main.Revision=${REVISION}"
 autoinstall: 
 	./install_helper.sh

+sanitize:
+	#
+	#
+	# I'm going to create a sanitized version of your resh history.
+	# Everything is done locally - your history won't leave this machine.
+	# The way this works is that any sensitive information in your history is going to be replaced with its SHA1 hash.
+	# There is also going to be a second version with hashes trimed to 12 characters for readability
+	#
+	#
+	# > full hashes: ~/resh_history_sanitized.json
+	# > 12 char hashes: ~/resh_history_sanitized_trim12.json
+	#
+	#
+	# Encountered any issues? Got questions? -> Hit me up at https://github.com/curusarn/resh/issues
+	#
+	#
+	# Running history sanitization ...
+	resh-sanitize-history -trim-hashes 0 --output ~/resh_history_sanitized.json
+	resh-sanitize-history -trim-hashes 12 --output ~/resh_history_sanitized_trim12.json
+	# 
+	# 
+	# SUCCESS - ALL DONE!
+	#
+	# 
+	# PLEASE HAVE A LOOK AT THE RESULT USING THESE COMMANDS:
+	#
+	# > pretty print JSON:
+	@echo 'cat ~/resh_history_sanitized_trim12.json | jq'
+	#
+	# > only show executed commands, don't show metadata:
+	@echo "cat ~/resh_history_sanitized_trim12.json | jq '.[\"cmdLine\"]'"
+	#
+	#
+	#
+

-build: submodules resh-collect resh-daemon
+build: submodules resh-collect resh-daemon resh-sanitize-history resh-evaluate

 rebuild:
 	make clean
@ -23,6 +58,8 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu
 	cp -f shellrc.sh ~/.resh/shellrc
 	cp -f uuid.sh ~/.resh/bin/resh-uuid
 	cp -f resh-* ~/.resh/bin/
+	cp -f evaluate/resh-evaluate-plot.py ~/.resh/bin/
+	cp -fr sanitizer_data ~/.resh/
 	# backward compatibility: We have a new location for resh history file 
 	[ ! -f ~/.resh/history.json ] || mv ~/.resh/history.json ~/.resh_history.json 
 	# Adding resh shellrc to .bashrc ...
@ -40,24 +77,31 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu
 	# Final touch
 	touch ~/.resh_history.json
 	#
+	#
+	#
 	##########################################################
 	#                                                        #
 	#    SUCCESS - thank you for trying out this project!    #
 	#                                                        #
 	##########################################################
 	#
+	#
 	# WHAT'S NEXT
 	# Please RESTART ALL OPEN TERMINAL WINDOWS (or reload your rc files)
 	# Your resh history is located in `~/.resh_history.json`
 	# You can look at it using e.g. `tail -f ~/.resh_history.json | jq`
 	#
+	#
 	# ISSUES
 	# If anything looks broken create an issue: https://github.com/curusarn/resh/issues
 	# You can uninstall this at any time by running `rm -rf ~/.resh/`
 	# You won't lose any collected history by removing `~/.resh` directory
 	#
+	#
 	# Please give me some contact info using this form: https://forms.gle/227SoyJ5c2iteKt98
 	#
+	#
+	#

 uninstall:
 	# Uninstalling ...
@ -69,6 +113,11 @@ resh-daemon: daemon/resh-daemon.go common/resh-common.go version
 resh-collect: collect/resh-collect.go common/resh-common.go version
 	go build ${GOFLAGS} -o $@ $<

+resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version
+	go build ${GOFLAGS} -o $@ $<
+
+resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version
+	go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go 

 $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config:
 	# Creating dirs ...
--- a/README.md
+++ b/README.md
@ -4,7 +4,7 @@

 This project is the first phase of my Master project.

-It records shell history with rich set of metadata and saves it locally. (device, dir, git, ... see example below)
+It records shell history with rich set of metadata and saves it locally. (device, directory, git, time, terminal session pid, ... see example below)

 It doesn't change the way your shell and your shell history behaves.

@ -17,8 +17,9 @@ If you are not happy with it you can uninstall it with a single command (`rm -rf
 The ultimate point of my thesis is to provide a context-based replacement/enhancement for bash and zsh shell history.

 The idea is to:
- Save each command with metadata (device, dir, gitdir, ...)
- Recommend history based on saved metadata (e.g. it will be easier to get to commands specific to your project)
+- Save each command with metadata (device, directory, git, time, terminal session pid, ... see example below)
+- Recommend history based on saved metadata
+  - e.g. it will be easier to get to commands specific to the project you are currently working on (based on directory, git repository url, ...)
 - Provide a simple way to search whole history by command itself and/or metadata (e.g. imagine searching by project, directory, device, ...)
 - Synchronize history across devices 
 - Provide an API (to make the project easily extensible)
--- a/collect/resh-collect.go
+++ b/collect/resh-collect.go
@ -20,7 +20,10 @@ import (
 	"strings"
 )

+// Version from git set during build
 var Version string
+
+// Revision from git set during build
 var Revision string

 func main() {
--- a/common/resh-common.go
+++ b/common/resh-common.go
@ -1,5 +1,13 @@
 package common

+import (
+	"log"
+	"strconv"
+
+	"github.com/mattn/go-shellwords"
+)
+
+// Record representing single executed command with its metadata
 type Record struct {
 	// core
 	CmdLine   string `json:"cmdLine"`
@ -60,8 +68,180 @@ type Record struct {
 	ReshUuid     string `json:"reshUuid"`
 	ReshVersion  string `json:"reshVersion"`
 	ReshRevision string `json:"reshRevision"`
+
+	// added by sanitizatizer
+	Sanitized bool `json:"sanitized"`
+	CmdLength int  `json:"cmdLength,omitempty"`
+
+	// enriching fields - added "later"
+	FirstWord    string `json:"firstWord,omitempty"`
+	Invalid      bool   `json:"invalid,omitempty"`
+	SeqSessionID uint64 `json:"seqSessionID,omitempty"`
+}
+
+// FallbackRecord when record is too old and can't be parsed into regular Record
+type FallbackRecord struct {
+	// older version of the record where cols and lines are int
+
+	// core
+	CmdLine   string `json:"cmdLine"`
+	ExitCode  int    `json:"exitCode"`
+	Shell     string `json:"shell"`
+	Uname     string `json:"uname"`
+	SessionId string `json:"sessionId"`
+
+	// posix
+	Cols  int    `json:"cols"`  // notice the in type
+	Lines int    `json:"lines"` // notice the in type
+	Home  string `json:"home"`
+	Lang  string `json:"lang"`
+	LcAll string `json:"lcAll"`
+	Login string `json:"login"`
+	//Path     string `json:"path"`
+	Pwd      string `json:"pwd"`
+	PwdAfter string `json:"pwdAfter"`
+	ShellEnv string `json:"shellEnv"`
+	Term     string `json:"term"`
+
+	// non-posix"`
+	RealPwd      string `json:"realPwd"`
+	RealPwdAfter string `json:"realPwdAfter"`
+	Pid          int    `json:"pid"`
+	SessionPid   int    `json:"sessionPid"`
+	Host         string `json:"host"`
+	Hosttype     string `json:"hosttype"`
+	Ostype       string `json:"ostype"`
+	Machtype     string `json:"machtype"`
+	Shlvl        int    `json:"shlvl"`
+
+	// before after
+	TimezoneBefore string `json:"timezoneBefore"`
+	TimezoneAfter  string `json:"timezoneAfter"`
+
+	RealtimeBefore      float64 `json:"realtimeBefore"`
+	RealtimeAfter       float64 `json:"realtimeAfter"`
+	RealtimeBeforeLocal float64 `json:"realtimeBeforeLocal"`
+	RealtimeAfterLocal  float64 `json:"realtimeAfterLocal"`
+
+	RealtimeDuration          float64 `json:"realtimeDuration"`
+	RealtimeSinceSessionStart float64 `json:"realtimeSinceSessionStart"`
+	RealtimeSinceBoot         float64 `json:"realtimeSinceBoot"`
+	//Logs []string      `json: "logs"`
+
+	GitDir          string `json:"gitDir"`
+	GitRealDir      string `json:"gitRealDir"`
+	GitOriginRemote string `json:"gitOriginRemote"`
+	MachineId       string `json:"machineId"`
+
+	OsReleaseId         string `json:"osReleaseId"`
+	OsReleaseVersionId  string `json:"osReleaseVersionId"`
+	OsReleaseIdLike     string `json:"osReleaseIdLike"`
+	OsReleaseName       string `json:"osReleaseName"`
+	OsReleasePrettyName string `json:"osReleasePrettyName"`
+
+	ReshUuid     string `json:"reshUuid"`
+	ReshVersion  string `json:"reshVersion"`
+	ReshRevision string `json:"reshRevision"`
+}
+
+// ConvertRecord from FallbackRecord to Record
+func ConvertRecord(r *FallbackRecord) Record {
+	return Record{
+		// core
+		CmdLine:   r.CmdLine,
+		ExitCode:  r.ExitCode,
+		Shell:     r.Shell,
+		Uname:     r.Uname,
+		SessionId: r.SessionId,
+
+		// posix
+		// these two lines are the only reason we are doing this
+		Cols:  strconv.Itoa(r.Cols),
+		Lines: strconv.Itoa(r.Lines),
+
+		Home:  r.Home,
+		Lang:  r.Lang,
+		LcAll: r.LcAll,
+		Login: r.Login,
+		// Path:     r.path,
+		Pwd:      r.Pwd,
+		PwdAfter: r.PwdAfter,
+		ShellEnv: r.ShellEnv,
+		Term:     r.Term,
+
+		// non-posix
+		RealPwd:      r.RealPwd,
+		RealPwdAfter: r.RealPwdAfter,
+		Pid:          r.Pid,
+		SessionPid:   r.SessionPid,
+		Host:         r.Host,
+		Hosttype:     r.Hosttype,
+		Ostype:       r.Ostype,
+		Machtype:     r.Machtype,
+		Shlvl:        r.Shlvl,
+
+		// before after
+		TimezoneBefore: r.TimezoneBefore,
+		TimezoneAfter:  r.TimezoneAfter,
+
+		RealtimeBefore:      r.RealtimeBefore,
+		RealtimeAfter:       r.RealtimeAfter,
+		RealtimeBeforeLocal: r.RealtimeBeforeLocal,
+		RealtimeAfterLocal:  r.RealtimeAfterLocal,
+
+		RealtimeDuration:          r.RealtimeDuration,
+		RealtimeSinceSessionStart: r.RealtimeSinceSessionStart,
+		RealtimeSinceBoot:         r.RealtimeSinceBoot,
+
+		GitDir:          r.GitDir,
+		GitRealDir:      r.GitRealDir,
+		GitOriginRemote: r.GitOriginRemote,
+		MachineId:       r.MachineId,
+
+		OsReleaseId:         r.OsReleaseId,
+		OsReleaseVersionId:  r.OsReleaseVersionId,
+		OsReleaseIdLike:     r.OsReleaseIdLike,
+		OsReleaseName:       r.OsReleaseName,
+		OsReleasePrettyName: r.OsReleasePrettyName,
+
+		ReshUuid:     r.ReshUuid,
+		ReshVersion:  r.ReshVersion,
+		ReshRevision: r.ReshRevision,
+	}
+}
+
+// Enrich - adds additional fields to the record
+func (r *Record) Enrich() {
+	// Get command/first word from commandline
+	r.FirstWord = GetCommandFromCommandLine(r.CmdLine)
+	err := r.Validate()
+	if err != nil {
+		log.Println("Invalid command:", r.CmdLine)
+		r.Invalid = true
+	}
+	r.Invalid = false
+	// TODO: Detect and mark simple commands r.Simple
+}
+
+// Validate - returns error if the record is invalid
+func (r *Record) Validate() error {
+	return nil
+}
+
+// GetCommandFromCommandLine func
+func GetCommandFromCommandLine(cmdLine string) string {
+	args, err := shellwords.Parse(cmdLine)
+	if err != nil {
+		log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )")
+		return "<error>"
+	}
+	if len(args) > 0 {
+		return args[0]
+	}
+	return ""
 }

+// Config struct
 type Config struct {
 	Port int
 }
--- a/daemon/resh-daemon.go
+++ b/daemon/resh-daemon.go
@ -3,8 +3,6 @@ package main
 import (
 	"encoding/json"
 	//"flag"
-	"github.com/BurntSushi/toml"
-	common "github.com/curusarn/resh/common"
 	"io/ioutil"
 	"log"
 	"net/http"
@ -14,9 +12,15 @@ import (
 	"path/filepath"
 	"strconv"
 	"strings"
+
+	"github.com/BurntSushi/toml"
+	common "github.com/curusarn/resh/common"
 )

+// Version from git set during build
 var Version string
+
+// Revision from git set during build
 var Revision string

 func main() {
--- a/evaluate/resh-evaluate-plot.py
+++ b/evaluate/resh-evaluate-plot.py
@ -0,0 +1,438 @@
+#!/usr/bin/env python3
+
+
+import traceback
+import sys
+import json
+from collections import defaultdict
+import matplotlib.pyplot as plt
+import matplotlib.path as mpath
+import numpy as np
+from graphviz import Digraph
+
+PLOT_WIDTH = 10 # inches
+PLOT_HEIGHT = 7 # inches
+
+PLOT_SIZE_zipf = 20
+
+data = json.load(sys.stdin)
+
+DATA_records = []
+DATA_records_by_session = defaultdict(list) 
+for user in data["UsersRecords"]:
+    for device in user["Devices"]:
+        for record in device["Records"]:
+            if record["invalid"]:
+                continue
+            
+            DATA_records.append(record)
+            DATA_records_by_session[record["sessionId"]].append(record)
+
+DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeAfterLocal"]))
+
+for pid, session in DATA_records_by_session.items():
+    session = list(sorted(session, key=lambda x: x["realtimeAfterLocal"]))
+
+# TODO: this should be a cmdline option
+async_draw = True
+
+# for strategy in data["Strategies"]:
+#     print(json.dumps(strategy))
+
+
+def zipf(length):
+    return list(map(lambda x: 1/2**x, range(0, length)))
+
+
+def trim(text, length, add_elipse=True):
+    if add_elipse and len(text) > length:
+        return text[:length-1] + "…"
+    return text[:length]
+
+
+# Figure 3.1. The normalized command frequency, compared with Zipf.
+def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False):
+    cmdLine_count = defaultdict(int)
+    for record in DATA_records:
+        cmdLine_count[record["cmdLine"]] += 1
+
+    tmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:plotSize]
+    cmdLineFrq = list(map(lambda x: x[1] / tmp[0][1], tmp))
+    labels = list(map(lambda x: trim(x[0], 7), tmp))
+
+    ranks = range(1, len(cmdLineFrq)+1)
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.plot(ranks, zipf(len(ranks)), '-')
+    plt.plot(ranks, cmdLineFrq, 'o-')
+    plt.title("Commandline frequency / rank")
+    plt.ylabel("Normalized commandline frequency")
+    plt.xlabel("Commandline rank")
+    plt.legend(("Zipf", "Commandline"), loc="best")
+    if show_labels:
+        plt.xticks(ranks, labels, rotation=-60)
+    # TODO: make xticks integral
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+
+# similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf.
+def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False):
+    cmd_count = defaultdict(int)
+    for record in DATA_records:
+        cmd = record["firstWord"]
+        if cmd == "":
+            continue
+        cmd_count[cmd] += 1
+
+    tmp = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)[:plotSize]
+    cmdFrq = list(map(lambda x: x[1] / tmp[0][1], tmp))
+    labels = list(map(lambda x: trim(x[0], 7), tmp))
+
+    ranks = range(1, len(cmdFrq)+1)
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.plot(ranks, zipf(len(ranks)), 'o-')
+    plt.plot(ranks, cmdFrq, 'o-')
+    plt.title("Command frequency / rank")
+    plt.ylabel("Normalized command frequency")
+    plt.xlabel("Command rank")
+    plt.legend(("Zipf", "Command"), loc="best")
+    if show_labels:
+        plt.xticks(ranks, labels, rotation=-60)
+    # TODO: make xticks integral
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+# Figure 3.2. Command vocabulary size vs. the number of command lines entered for four individuals.
+def plot_cmdVocabularySize_cmdLinesEntered():
+    cmd_vocabulary = set()
+    y_cmd_count = [0]
+    for record in DATA_records:
+        cmd = record["firstWord"]
+        if cmd in cmd_vocabulary:
+            # repeat last value
+            y_cmd_count.append(y_cmd_count[-1])
+        else:
+            cmd_vocabulary.add(cmd)  
+            # append last value +1
+            y_cmd_count.append(y_cmd_count[-1] + 1)
+
+    # print(cmd_vocabulary)
+    x_cmds_entered = range(0, len(y_cmd_count))
+
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.plot(x_cmds_entered, y_cmd_count, '-')
+    plt.title("Command vocabulary size vs. the number of command lines entered")
+    plt.ylabel("Command vocabulary size")
+    plt.xlabel("# of command lines entered")
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+# Figure 5.6. Command line vocabulary size vs. the number of commands entered for four typical individuals.
+def plot_cmdLineVocabularySize_cmdLinesEntered():
+    cmdLine_vocabulary = set()
+    y_cmdLine_count = [0]
+    for record in DATA_records:
+        cmdLine = record["cmdLine"]
+        if cmdLine in cmdLine_vocabulary:
+            # repeat last value
+            y_cmdLine_count.append(y_cmdLine_count[-1])
+        else:
+            cmdLine_vocabulary.add(cmdLine)  
+            # append last value +1
+            y_cmdLine_count.append(y_cmdLine_count[-1] + 1)
+
+    # print(cmdLine_vocabulary)
+    x_cmdLines_entered = range(0, len(y_cmdLine_count))
+
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.plot(x_cmdLines_entered, y_cmdLine_count, '-')
+    plt.title("Command line vocabulary size vs. the number of command lines entered")
+    plt.ylabel("Command line vocabulary size")
+    plt.xlabel("# of command lines entered")
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+# Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984).
+#       Ball diameters are proportional to stationary probability. Lines indicate significant dependencies,
+#       solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001).
+def graph_cmdSequences(node_count=33, edge_minValue=0.05):
+    START_CMD = "_start_"
+    cmd_count = defaultdict(int)
+    cmdSeq_count = defaultdict(lambda: defaultdict(int))
+    cmd_id = dict()
+    x = 0
+    cmd_id[START_CMD] = str(x) 
+    for pid, session in DATA_records_by_session.items():
+        cmd_count[START_CMD] += 1
+        prev_cmd = START_CMD
+        for record in session:
+            cmd = record["firstWord"]
+            cmdSeq_count[prev_cmd][cmd] += 1
+            cmd_count[cmd] += 1
+            if cmd not in cmd_id:
+                x += 1
+                cmd_id[cmd] = str(x)
+            prev_cmd = cmd
+
+    # get `node_count` of largest nodes
+    sorted_cmd_count = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)
+    print(sorted_cmd_count)
+    cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:node_count]
+
+    # use 3 biggest nodes as a reference point for scaling
+    biggest_node = cmd_count[cmds_to_graph[0]]
+    nd_biggest_node = cmd_count[cmds_to_graph[1]]
+    rd_biggest_node = cmd_count[cmds_to_graph[1]]
+    count2scale_coef = 3 / (biggest_node + nd_biggest_node + rd_biggest_node)
+
+    # scaling constant
+    #       affects node size and node label
+    base_scaling_factor = 21
+    # extra scaling for experiments - not really useful imho
+    #       affects everything nodes, edges, node labels, treshold for turning label into xlabel, xlabel size, ...
+    extra_scaling_factor = 1.0 
+    for x in range(0, 10):
+        # graphviz is not the most reliable piece of software
+        #       -> retry on fail but scale nodes down by 1%
+        scaling_factor = base_scaling_factor * (1 - x * 0.01)
+
+        # overlap: scale -> solve overlap by scaling the graph
+        # overlap_shrink -> try to shrink the graph a bit after you are done
+        # splines -> don't draw edges over nodes
+        # sep: 2.5 -> assume that nodes are 2.5 inches larger
+        graph_attr={'overlap':'scale', 'overlap_shrink':'true',
+                    'splines':'true', 'sep':'0.25'}
+        graph = Digraph(name='command_sequentiality', engine='neato', graph_attr=graph_attr)
+
+        # iterate over all nodes
+        for cmd in cmds_to_graph:
+            seq = cmdSeq_count[cmd]
+            count = cmd_count[cmd]
+
+            # iterate over all "following" commands (for each node)
+            for seq_entry in seq.items():
+                cmd2, seq_count = seq_entry
+                relative_seq_count = seq_count / count
+
+                # check if "follow" command is supposed to be in the graph
+                if cmd2 not in cmds_to_graph:
+                    continue
+                # check if the edge value is high enough
+                if relative_seq_count < edge_minValue:
+                    continue
+                
+                # create starting node and end node for the edge
+                #       duplicates don't matter 
+                for id_, cmd_ in ((cmd_id[cmd], cmd), (cmd_id[cmd2], cmd2)):
+                    count_ = cmd_count[cmd_]
+                    scale_ = count_ * count2scale_coef * scaling_factor * extra_scaling_factor
+                    width_ = 0.08 * scale_
+                    fontsize_ = 8.5 * scale_ / (len(cmd_) + 3)
+
+                    width_ = str(width_) 
+                    if fontsize_ < 12 * extra_scaling_factor:
+                        graph.node(id_, ' ', shape='circle', fixedsize='true', fontname='monospace bold',
+                                width=width_, fontsize=str(12 * extra_scaling_factor), forcelabels='true', xlabel=cmd_)
+                    else:
+                        fontsize_ = str(fontsize_)
+                        graph.node(id_, cmd_, shape='circle', fixedsize='true', fontname='monospace bold',
+                                width=width_, fontsize=fontsize_, forcelabels='true', labelloc='c')
+                
+                # value of the edge (percentage) 1.0 is max
+                scale_ = seq_count / cmd_count[cmd]
+                penwidth_ = str((0.5 + 4.5 * scale_) * extra_scaling_factor)
+                #penwidth_bold_ = str(8 * scale_)
+                if scale_ > 0.5:
+                    graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved',
+                            penwidth=penwidth_, style='bold')
+                elif scale_ > 0.2:
+                    graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved',
+                            penwidth=penwidth_, arrowhead='open')
+                elif scale_ > 0.1:
+                    graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved',
+                            penwidth=penwidth_, style='dashed', arrowhead='open')
+                else:
+                    graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved',
+                            penwidth=penwidth_, style='dotted', arrowhead='empty')
+
+        # graphviz sometimes fails - see above
+        try:
+            graph.view()
+            # graph.render('/tmp/resh-graphviz-cmdSeq.gv', view=True)
+            break
+        except Exception as e:
+            trace = traceback.format_exc()
+            print("GRAPHVIZ EXCEPTION: <{}>\nGRAPHVIZ TRACE: <{}>".format(str(e), trace))
+
+
+def plot_strategies_matches(plot_size=50, selected_strategies=[]):
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.title("Matches at distance")
+    plt.ylabel('%' + " of matches")
+    plt.xlabel("Distance")
+    legend = []
+    x_values = range(1, plot_size+1)
+    saved_matches_total = None
+    saved_dataPoint_count = None
+    for strategy in data["Strategies"]:
+        strategy_title = strategy["Title"]
+        # strategy_description = strategy["Description"]
+
+        if len(selected_strategies) and strategy_title not in selected_strategies:
+            continue
+
+        dataPoint_count = 0
+        matches = [0] * plot_size
+        matches_total = 0
+        charsRecalled = [0] * plot_size
+        charsRecalled_total = 0
+        
+        for match in strategy["Matches"]:
+            dataPoint_count += 1
+
+            if not match["Match"]:
+                continue
+
+            chars = match["CharsRecalled"]
+            charsRecalled_total += chars 
+            matches_total += 1
+
+            dist = match["Distance"]  
+            if dist > plot_size:
+                continue
+
+            matches[dist-1] += 1
+            charsRecalled[dist-1] += chars
+            
+        # recent is very simple strategy so we will believe 
+        #       that there is no bug in it and we can use it to determine total
+        if strategy_title == "recent":
+            saved_matches_total = matches_total
+            saved_dataPoint_count = dataPoint_count
+
+        if len(selected_strategies) and strategy_title not in selected_strategies:
+            continue
+
+        acc = 0
+        matches_cumulative = []
+        for x in matches:
+            acc += x
+            matches_cumulative.append(acc)
+        # matches_cumulative.append(matches_total)
+        matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative))
+
+        plt.plot(x_values, matches_percent, 'o-')
+        legend.append(strategy_title)
+
+    assert(saved_matches_total is not None)
+    assert(saved_dataPoint_count is not None)
+    max_values = [100 * saved_matches_total / saved_dataPoint_count] * len(x_values)
+    plt.plot(x_values, max_values, 'r-')
+    legend.append("maximum possible")
+
+    x_ticks = list(range(1, plot_size+1, 2))
+    x_labels = x_ticks[:]
+    plt.xticks(x_ticks, x_labels)
+    plt.legend(legend, loc="best")
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+
+
+def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]):
+    plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT))
+    plt.title("Average characters recalled at distance")
+    plt.ylabel("Average characters recalled")
+    plt.xlabel("Distance")
+    x_values = range(1, plot_size+1)
+    legend = []
+    saved_charsRecalled_total = None
+    saved_dataPoint_count = None
+    for strategy in data["Strategies"]:
+        strategy_title = strategy["Title"]
+        # strategy_description = strategy["Description"]
+
+        dataPoint_count = 0
+        matches = [0] * plot_size
+        matches_total = 0
+        charsRecalled = [0] * plot_size
+        charsRecalled_total = 0
+        
+        for match in strategy["Matches"]:
+            dataPoint_count += 1
+
+            if not match["Match"]:
+                continue
+
+            chars = match["CharsRecalled"]
+            charsRecalled_total += chars 
+            matches_total += 1
+
+            dist = match["Distance"]  
+            if dist > plot_size:
+                continue
+
+            matches[dist-1] += 1
+            charsRecalled[dist-1] += chars
+            
+        # recent is very simple strategy so we will believe 
+        #       that there is no bug in it and we can use it to determine total
+        if strategy_title == "recent":
+            saved_charsRecalled_total = charsRecalled_total
+            saved_dataPoint_count = dataPoint_count
+
+        if len(selected_strategies) and strategy_title not in selected_strategies:
+            continue
+
+        acc = 0
+        charsRecalled_cumulative = []
+        for x in charsRecalled:
+            acc += x
+            charsRecalled_cumulative.append(acc)
+        charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative))
+
+        plt.plot(x_values, charsRecalled_average, 'o-')
+        legend.append(strategy_title)
+
+    assert(saved_charsRecalled_total is not None)
+    assert(saved_dataPoint_count is not None)
+    max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values)
+    plt.plot(x_values, max_values, 'r-')
+    legend.append("maximum possible")
+
+    x_ticks = list(range(1, plot_size+1, 2))
+    x_labels = x_ticks[:]
+    plt.xticks(x_ticks, x_labels)
+    plt.legend(legend, loc="best")
+    if async_draw:
+        plt.draw()
+    else:
+        plt.show()
+
+
+        
+# graph_cmdSequences(node_count=33, edge_minValue=0.05)
+graph_cmdSequences(node_count=28, edge_minValue=0.06)
+
+plot_cmdLineFrq_rank()
+plot_cmdFrq_rank()
+        
+plot_cmdLineVocabularySize_cmdLinesEntered()
+plot_cmdVocabularySize_cmdLinesEntered()
+
+plot_strategies_matches(20)
+plot_strategies_charsRecalled(20)
+
+if async_draw:
+    plt.show()
+# be careful and check if labels fit the display
--- a/evaluate/resh-evaluate.go
+++ b/evaluate/resh-evaluate.go
@ -0,0 +1,340 @@
+package main
+
+import (
+	"bufio"
+	"bytes"
+	"encoding/json"
+	"flag"
+	"fmt"
+	"io/ioutil"
+	"log"
+	"os"
+	"os/exec"
+	"os/user"
+	"path/filepath"
+	"sort"
+
+	"github.com/curusarn/resh/common"
+)
+
+// Version from git set during build
+var Version string
+
+// Revision from git set during build
+var Revision string
+
+func main() {
+	usr, _ := user.Current()
+	dir := usr.HomeDir
+	historyPath := filepath.Join(dir, ".resh_history.json")
+	historyPathBatchMode := filepath.Join(dir, "resh_history.json")
+	sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json")
+	// tmpPath := "/tmp/resh-evaluate-tmp.json"
+
+	showVersion := flag.Bool("version", false, "Show version and exit")
+	showRevision := flag.Bool("revision", false, "Show git revision and exit")
+	input := flag.String("input", "",
+		"Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+
+			" depending on --sanitized-input option)")
+	// outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory")
+	sanitizedInput := flag.Bool("sanitized-input", false,
+		"Handle input as sanitized (also changes default value for input argument)")
+	plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting")
+	inputDataRoot := flag.String("input-data-root", "",
+		"Input data root, enables batch mode, looks for files matching --input option")
+
+	flag.Parse()
+
+	// handle show{Version,Revision} options
+	if *showVersion == true {
+		fmt.Println(Version)
+		os.Exit(0)
+	}
+	if *showRevision == true {
+		fmt.Println(Revision)
+		os.Exit(0)
+	}
+
+	// handle batch mode
+	batchMode := false
+	if *inputDataRoot != "" {
+		batchMode = true
+	}
+	// set default input
+	if *input == "" {
+		if *sanitizedInput {
+			*input = sanitizedHistoryPath
+		} else if batchMode {
+			*input = historyPathBatchMode
+		} else {
+			*input = historyPath
+		}
+	}
+
+	evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50, BatchMode: batchMode}
+	if batchMode {
+		err := evaluator.initBatchMode(*input, *inputDataRoot)
+		if err != nil {
+			log.Fatal("Evaluator initBatchMode() error:", err)
+		}
+	} else {
+		err := evaluator.init(*input)
+		if err != nil {
+			log.Fatal("Evaluator init() error:", err)
+		}
+	}
+
+	var strategies []strategy
+
+	// dummy := strategyDummy{}
+	// strategies = append(strategies, &dummy)
+
+	recent := strategyRecent{}
+	frequent := strategyFrequent{}
+	frequent.init()
+	directory := strategyDirectorySensitive{}
+	directory.init()
+
+	strategies = append(strategies, &recent, &frequent, &directory)
+
+	for _, strat := range strategies {
+		err := evaluator.evaluate(strat)
+		if err != nil {
+			log.Println("Evaluator evaluate() error:", err)
+		}
+	}
+
+	evaluator.calculateStatsAndPlot(*plottingScript)
+}
+
+type strategy interface {
+	GetTitleAndDescription() (string, string)
+	GetCandidates() []string
+	AddHistoryRecord(record *common.Record) error
+	ResetHistory() error
+}
+
+type matchJSON struct {
+	Match         bool
+	Distance      int
+	CharsRecalled int
+}
+
+type strategyJSON struct {
+	Title       string
+	Description string
+	Matches     []matchJSON
+}
+
+type deviceRecords struct {
+	Name    string
+	Records []common.Record
+}
+
+type userRecords struct {
+	Name    string
+	Devices []deviceRecords
+}
+
+type evaluator struct {
+	sanitizedInput bool
+	BatchMode      bool
+	maxCandidates  int
+	UsersRecords   []userRecords
+	Strategies     []strategyJSON
+}
+
+func (e *evaluator) initBatchMode(input string, inputDataRoot string) error {
+	e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot)
+	e.processRecords()
+	return nil
+}
+
+func (e *evaluator) init(inputPath string) error {
+	records := e.loadHistoryRecords(inputPath)
+	device := deviceRecords{Records: records}
+	user := userRecords{}
+	user.Devices = append(user.Devices, device)
+	e.UsersRecords = append(e.UsersRecords, user)
+	e.processRecords()
+	return nil
+}
+
+func (e *evaluator) calculateStatsAndPlot(scriptName string) {
+	evalJSON, err := json.Marshal(e)
+	if err != nil {
+		log.Fatal("json marshal error", err)
+	}
+	buffer := bytes.Buffer{}
+	buffer.Write(evalJSON)
+	// run python script to stat and plot/
+	cmd := exec.Command(scriptName)
+	cmd.Stdout = os.Stdout
+	cmd.Stderr = os.Stderr
+	cmd.Stdin = &buffer
+	err = cmd.Run()
+	if err != nil {
+		log.Printf("Command finished with error: %v", err)
+	}
+}
+
+// enrich records and add them to serializable structure
+func (e *evaluator) processRecords() {
+	for i := range e.UsersRecords {
+		for j, device := range e.UsersRecords[i].Devices {
+			sessionIDs := map[string]uint64{}
+			var nextID uint64
+			nextID = 0
+			for k, record := range e.UsersRecords[i].Devices[j].Records {
+				id, found := sessionIDs[record.SessionId]
+				if found == false {
+					id = nextID
+					sessionIDs[record.SessionId] = id
+					nextID++
+				}
+				record.SeqSessionID = id
+				// assert
+				if record.Sanitized != e.sanitizedInput {
+					if e.sanitizedInput {
+						log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized")
+					}
+					log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present")
+				}
+
+				e.UsersRecords[i].Devices[j].Records[k].Enrich()
+				// device.Records = append(device.Records, record)
+			}
+			sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool {
+				if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID {
+					return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal
+				}
+				return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID
+			})
+		}
+	}
+}
+
+func (e *evaluator) evaluate(strategy strategy) error {
+	title, description := strategy.GetTitleAndDescription()
+	strategyData := strategyJSON{Title: title, Description: description}
+	for _, record := range e.UsersRecords[0].Devices[0].Records {
+		candidates := strategy.GetCandidates()
+
+		matchFound := false
+		for i, candidate := range candidates {
+			// make an option (--calculate-total) to turn this on/off ?
+			// if i >= e.maxCandidates {
+			// 	break
+			// }
+			if candidate == record.CmdLine {
+				match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength}
+				strategyData.Matches = append(strategyData.Matches, match)
+				matchFound = true
+				break
+			}
+		}
+		if matchFound == false {
+			strategyData.Matches = append(strategyData.Matches, matchJSON{})
+		}
+		err := strategy.AddHistoryRecord(&record)
+		if err != nil {
+			log.Println("Error while evauating", err)
+			return err
+		}
+	}
+	e.Strategies = append(e.Strategies, strategyData)
+	return nil
+}
+
+func (e *evaluator) loadHistoryRecordsBatchMode(fname string, dataRootPath string) []userRecords {
+	var records []userRecords
+	info, err := os.Stat(dataRootPath)
+	if err != nil {
+		log.Fatal("Error: Directory", dataRootPath, "does not exist - exiting! (", err, ")")
+	}
+	if info.IsDir() == false {
+		log.Fatal("Error:", dataRootPath, "is not a directory - exiting!")
+	}
+	users, err := ioutil.ReadDir(dataRootPath)
+	if err != nil {
+		log.Fatal("Could not read directory:", dataRootPath)
+	}
+	fmt.Println("Listing users in <", dataRootPath, ">...")
+	for _, user := range users {
+		userRecords := userRecords{Name: user.Name()}
+		userFullPath := filepath.Join(dataRootPath, user.Name())
+		if user.IsDir() == false {
+			log.Println("Warn: Unexpected file (not a directory) <", userFullPath, "> - skipping.")
+			continue
+		}
+		fmt.Println()
+		fmt.Printf("*- %s\n", user.Name())
+		devices, err := ioutil.ReadDir(userFullPath)
+		if err != nil {
+			log.Fatal("Could not read directory:", userFullPath)
+		}
+		for _, device := range devices {
+			deviceRecords := deviceRecords{Name: device.Name()}
+			deviceFullPath := filepath.Join(userFullPath, device.Name())
+			if device.IsDir() == false {
+				log.Println("Warn: Unexpected file (not a directory) <", deviceFullPath, "> - skipping.")
+				continue
+			}
+			fmt.Printf("   \\- %s\n", device.Name())
+			files, err := ioutil.ReadDir(deviceFullPath)
+			if err != nil {
+				log.Fatal("Could not read directory:", deviceFullPath)
+			}
+			for _, file := range files {
+				fileFullPath := filepath.Join(deviceFullPath, file.Name())
+				if file.Name() == fname {
+					fmt.Printf("      \\- %s - loading ...", file.Name())
+					// load the data
+					deviceRecords.Records = e.loadHistoryRecords(fileFullPath)
+					fmt.Println(" OK ✓")
+				} else {
+					fmt.Printf("      \\- %s - skipped\n", file.Name())
+				}
+			}
+			userRecords.Devices = append(userRecords.Devices, deviceRecords)
+		}
+		records = append(records, userRecords)
+	}
+	return records
+}
+
+func (e *evaluator) loadHistoryRecords(fname string) []common.Record {
+	file, err := os.Open(fname)
+	if err != nil {
+		log.Fatal("Open() resh history file error:", err)
+	}
+	defer file.Close()
+
+	var records []common.Record
+	scanner := bufio.NewScanner(file)
+	for scanner.Scan() {
+		record := common.Record{}
+		fallbackRecord := common.FallbackRecord{}
+		line := scanner.Text()
+		err = json.Unmarshal([]byte(line), &record)
+		if err != nil {
+			err = json.Unmarshal([]byte(line), &fallbackRecord)
+			if err != nil {
+				log.Println("Line:", line)
+				log.Fatal("Decoding error:", err)
+			}
+			record = common.ConvertRecord(&fallbackRecord)
+		}
+		if e.sanitizedInput == false {
+			if record.CmdLength != 0 {
+				log.Fatal("Assert failed - 'cmdLength' is set in raw data. Maybe you want to use '--sanitized-input' option?")
+			}
+			record.CmdLength = len(record.CmdLine)
+		}
+		if record.CmdLength == 0 {
+			log.Fatal("Assert failed - 'cmdLength' is unset in the data. This should not happen.")
+		}
+		records = append(records, record)
+	}
+	return records
+}
--- a/evaluate/strategy-directory-sensitive.go
+++ b/evaluate/strategy-directory-sensitive.go
@ -0,0 +1,42 @@
+package main
+
+import (
+	"github.com/curusarn/resh/common"
+)
+
+type strategyDirectorySensitive struct {
+	history map[string][]string
+	lastPwd string
+}
+
+func (s *strategyDirectorySensitive) init() {
+	s.history = map[string][]string{}
+}
+
+func (s *strategyDirectorySensitive) GetTitleAndDescription() (string, string) {
+	return "directory sensitive (recent)", "Use recent commands executed is the same directory"
+}
+
+func (s *strategyDirectorySensitive) GetCandidates() []string {
+	return s.history[s.lastPwd]
+}
+
+func (s *strategyDirectorySensitive) AddHistoryRecord(record *common.Record) error {
+	// work on history for PWD
+	pwd := record.Pwd
+	// remove previous occurance of record
+	for i, cmd := range s.history[pwd] {
+		if cmd == record.CmdLine {
+			s.history[pwd] = append(s.history[pwd][:i], s.history[pwd][i+1:]...)
+		}
+	}
+	// append new record
+	s.history[pwd] = append([]string{record.CmdLine}, s.history[pwd]...)
+	s.lastPwd = record.PwdAfter
+	return nil
+}
+
+func (s *strategyDirectorySensitive) ResetHistory() error {
+	s.history = map[string][]string{}
+	return nil
+}
--- a/evaluate/strategy-dummy.go
+++ b/evaluate/strategy-dummy.go
@ -0,0 +1,24 @@
+package main
+
+import "github.com/curusarn/resh/common"
+
+type strategyDummy struct {
+	history []string
+}
+
+func (s *strategyDummy) GetTitleAndDescription() (string, string) {
+	return "dummy", "Return empty candidate list"
+}
+
+func (s *strategyDummy) GetCandidates() []string {
+	return nil
+}
+
+func (s *strategyDummy) AddHistoryRecord(record *common.Record) error {
+	s.history = append(s.history, record.CmdLine)
+	return nil
+}
+
+func (s *strategyDummy) ResetHistory() error {
+	return nil
+}
--- a/evaluate/strategy-frequent.go
+++ b/evaluate/strategy-frequent.go
@ -0,0 +1,47 @@
+package main
+
+import (
+	"sort"
+
+	"github.com/curusarn/resh/common"
+)
+
+type strategyFrequent struct {
+	history map[string]int
+}
+
+type strFrqEntry struct {
+	cmdLine string
+	count   int
+}
+
+func (s *strategyFrequent) init() {
+	s.history = map[string]int{}
+}
+
+func (s *strategyFrequent) GetTitleAndDescription() (string, string) {
+	return "frequent", "Use frequent commands"
+}
+
+func (s *strategyFrequent) GetCandidates() []string {
+	var mapItems []strFrqEntry
+	for cmdLine, count := range s.history {
+		mapItems = append(mapItems, strFrqEntry{cmdLine, count})
+	}
+	sort.Slice(mapItems, func(i int, j int) bool { return mapItems[i].count > mapItems[j].count })
+	var hist []string
+	for _, item := range mapItems {
+		hist = append(hist, item.cmdLine)
+	}
+	return hist
+}
+
+func (s *strategyFrequent) AddHistoryRecord(record *common.Record) error {
+	s.history[record.CmdLine]++
+	return nil
+}
+
+func (s *strategyFrequent) ResetHistory() error {
+	s.history = map[string]int{}
+	return nil
+}
--- a/evaluate/strategy-recent.go
+++ b/evaluate/strategy-recent.go
@ -0,0 +1,32 @@
+package main
+
+import "github.com/curusarn/resh/common"
+
+type strategyRecent struct {
+	history []string
+}
+
+func (s *strategyRecent) GetTitleAndDescription() (string, string) {
+	return "recent", "Use recent commands"
+}
+
+func (s *strategyRecent) GetCandidates() []string {
+	return s.history
+}
+
+func (s *strategyRecent) AddHistoryRecord(record *common.Record) error {
+	// remove previous occurance of record
+	for i, cmd := range s.history {
+		if cmd == record.CmdLine {
+			s.history = append(s.history[:i], s.history[i+1:]...)
+		}
+	}
+	// append new record
+	s.history = append([]string{record.CmdLine}, s.history...)
+	return nil
+}
+
+func (s *strategyRecent) ResetHistory() error {
+	s.history = nil
+	return nil
+}
--- a/go.mod
+++ b/go.mod
@ -2,4 +2,11 @@ module github.com/curusarn/resh

 go 1.12

-require github.com/BurntSushi/toml v0.3.1
+require (
+	github.com/BurntSushi/toml v0.3.1
+	github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect
+	github.com/mattn/go-shellwords v1.0.6
+	github.com/wcharczuk/go-chart v2.0.1+incompatible
+	github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa
+	golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 // indirect
+)
--- a/go.sum
+++ b/go.sum
@ -1,2 +1,13 @@
 github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
 github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g=
+github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k=
+github.com/mattn/go-shellwords v1.0.6 h1:9Jok5pILi5S1MnDirGVTufYGtksUs/V2BWUP3ZkeUUI=
+github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o=
+github.com/wcharczuk/go-chart v2.0.1+incompatible h1:0pz39ZAycJFF7ju/1mepnk26RLVLBCWz1STcD3doU0A=
+github.com/wcharczuk/go-chart v2.0.1+incompatible/go.mod h1:PF5tmL4EIx/7Wf+hEkpCqYi5He4u90sw+0+6FhrryuE=
+github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk=
+github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa/go.mod h1:2rx5KE5FLD0HRfkkpyn8JwbVLBdhgeiOb2D2D9LLKM4=
+golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 h1:4dQcAORh9oYBwVSBVIkP489LUPC+f1HBkTYXgmqfR+o=
+golang.org/x/image v0.0.0-20190902063713-cb417be4ba39/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0=
+golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
--- a/sanitize-history/resh-sanitize-history.go
+++ b/sanitize-history/resh-sanitize-history.go
@ -0,0 +1,424 @@
+package main
+
+import (
+	"bufio"
+	"crypto/sha1"
+	"encoding/binary"
+	"encoding/hex"
+	"encoding/json"
+	"errors"
+	"flag"
+	"fmt"
+	"log"
+	"net/url"
+	"os"
+	"os/user"
+	"path"
+	"path/filepath"
+	"strconv"
+	"strings"
+	"unicode"
+
+	"github.com/curusarn/resh/common"
+	giturls "github.com/whilp/git-urls"
+)
+
+// Version from git set during build
+var Version string
+
+// Revision from git set during build
+var Revision string
+
+func main() {
+	usr, _ := user.Current()
+	dir := usr.HomeDir
+	historyPath := filepath.Join(dir, ".resh_history.json")
+	// outputPath := filepath.Join(dir, "resh_history_sanitized.json")
+	sanitizerDataPath := filepath.Join(dir, ".resh", "sanitizer_data")
+
+	showVersion := flag.Bool("version", false, "Show version and exit")
+	showRevision := flag.Bool("revision", false, "Show git revision and exit")
+	trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters, '0' turns off trimming")
+	inputPath := flag.String("input", historyPath, "Input file")
+	outputPath := flag.String("output", "", "Output file (default: use stdout)")
+
+	flag.Parse()
+
+	if *showVersion == true {
+		fmt.Println(Version)
+		os.Exit(0)
+	}
+	if *showRevision == true {
+		fmt.Println(Revision)
+		os.Exit(0)
+	}
+	sanitizer := sanitizer{hashLength: *trimHashes}
+	err := sanitizer.init(sanitizerDataPath)
+	if err != nil {
+		log.Fatal("Sanitizer init() error:", err)
+	}
+
+	inputFile, err := os.Open(*inputPath)
+	if err != nil {
+		log.Fatal("Open() resh history file error:", err)
+	}
+	defer inputFile.Close()
+
+	var writer *bufio.Writer
+	if *outputPath == "" {
+		writer = bufio.NewWriter(os.Stdout)
+	} else {
+		outputFile, err := os.Create(*outputPath)
+		if err != nil {
+			log.Fatal("Create() output file error:", err)
+		}
+		defer outputFile.Close()
+		writer = bufio.NewWriter(outputFile)
+	}
+	defer writer.Flush()
+
+	scanner := bufio.NewScanner(inputFile)
+	for scanner.Scan() {
+		record := common.Record{}
+		fallbackRecord := common.FallbackRecord{}
+		line := scanner.Text()
+		err = json.Unmarshal([]byte(line), &record)
+		if err != nil {
+			err = json.Unmarshal([]byte(line), &fallbackRecord)
+			if err != nil {
+				log.Println("Line:", line)
+				log.Fatal("Decoding error:", err)
+			}
+			record = common.ConvertRecord(&fallbackRecord)
+		}
+		err = sanitizer.sanitizeRecord(&record)
+		if err != nil {
+			log.Println("Line:", line)
+			log.Fatal("Sanitization error:", err)
+		}
+		outLine, err := json.Marshal(&record)
+		if err != nil {
+			log.Println("Line:", line)
+			log.Fatal("Encoding error:", err)
+		}
+		// fmt.Println(string(outLine))
+		n, err := writer.WriteString(string(outLine) + "\n")
+		if err != nil {
+			log.Fatal(err)
+		}
+		if n == 0 {
+			log.Fatal("Nothing was written", n)
+		}
+	}
+}
+
+type sanitizer struct {
+	hashLength int
+	whitelist  map[string]bool
+}
+
+func (s *sanitizer) init(dataPath string) error {
+	globalData := path.Join(dataPath, "whitelist.txt")
+	s.whitelist = loadData(globalData)
+	return nil
+}
+
+func loadData(fname string) map[string]bool {
+	file, err := os.Open(fname)
+	if err != nil {
+		log.Fatal("Open() file error:", err)
+	}
+	defer file.Close()
+
+	scanner := bufio.NewScanner(file)
+	data := make(map[string]bool)
+	for scanner.Scan() {
+		line := scanner.Text()
+		data[line] = true
+	}
+	return data
+}
+
+func (s *sanitizer) sanitizeRecord(record *common.Record) error {
+	// hash directories of the paths
+	record.Pwd = s.sanitizePath(record.Pwd)
+	record.RealPwd = s.sanitizePath(record.RealPwd)
+	record.PwdAfter = s.sanitizePath(record.PwdAfter)
+	record.RealPwdAfter = s.sanitizePath(record.RealPwdAfter)
+	record.GitDir = s.sanitizePath(record.GitDir)
+	record.GitRealDir = s.sanitizePath(record.GitRealDir)
+	record.Home = s.sanitizePath(record.Home)
+	record.ShellEnv = s.sanitizePath(record.ShellEnv)
+
+	// hash the most sensitive info, do not tokenize
+	record.Host = s.hashToken(record.Host)
+	record.Login = s.hashToken(record.Login)
+	record.MachineId = s.hashToken(record.MachineId)
+
+	var err error
+	// this changes git url a bit but I'm still happy with the result
+	// e.g. "git@github.com:curusarn/resh" becomes "ssh://git@github.com/3385162f14d7/5a7b2909005c"
+	// 		notice the "ssh://" prefix
+	record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote)
+	if err != nil {
+		log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err)
+		return err
+	}
+
+	// sanitization destroys original CmdLine length -> save it
+	record.CmdLength = len(record.CmdLine)
+
+	record.CmdLine, err = s.sanitizeCmdLine(record.CmdLine)
+	if err != nil {
+		log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err)
+	}
+
+	// add a flag to signify that the record has been sanitized
+	record.Sanitized = true
+	return nil
+}
+
+func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) {
+	const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=`:@^/+%." // all bash control characters, '=', ...
+	const optionAllowedChars = "-_"                              // characters commonly found inside of options
+	sanCmdLine := ""
+	buff := ""
+
+	// simple options shouldn't be sanitized
+	// 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or any of "=;)"
+	var optionDetected bool
+
+	prevR3 := ' '
+	prevR2 := ' '
+	prevR := ' '
+	for _, r := range cmdLine {
+		switch optionDetected {
+		case true:
+			if unicode.IsSpace(r) || strings.ContainsRune(optionEndingChars, r) {
+				// whitespace or option ends the option
+				// => add option unsanitized
+				optionDetected = false
+				if len(buff) > 0 {
+					sanCmdLine += buff
+					buff = ""
+				}
+				sanCmdLine += string(r)
+			} else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false &&
+				strings.ContainsRune(optionAllowedChars, r) == false {
+				// r is not any of allowed chars for an option: letter, digit, "-" or "_"
+				// => sanitize
+				if len(buff) > 0 {
+					sanToken, err := s.sanitizeCmdToken(buff)
+					if err != nil {
+						log.Println("WARN: got error while sanitizing cmdLine:", cmdLine)
+						// return cmdLine, err
+					}
+					sanCmdLine += sanToken
+					buff = ""
+				}
+				sanCmdLine += string(r)
+			} else {
+				buff += string(r)
+			}
+		case false:
+			// split command on all non-letter and non-digit characters
+			if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false {
+				// split token
+				if len(buff) > 0 {
+					sanToken, err := s.sanitizeCmdToken(buff)
+					if err != nil {
+						log.Println("WARN: got error while sanitizing cmdLine:", cmdLine)
+						// return cmdLine, err
+					}
+					sanCmdLine += sanToken
+					buff = ""
+				}
+				sanCmdLine += string(r)
+			} else {
+				if (unicode.IsSpace(prevR2) && prevR == '-') ||
+					(unicode.IsSpace(prevR3) && prevR2 == '-' && prevR == '-') {
+					optionDetected = true
+				}
+				buff += string(r)
+			}
+		}
+		prevR3 = prevR2
+		prevR2 = prevR
+		prevR = r
+	}
+	if len(buff) <= 0 {
+		// nothing in the buffer => work is done
+		return sanCmdLine, nil
+	}
+	if optionDetected {
+		// option detected => dont sanitize
+		sanCmdLine += buff
+		return sanCmdLine, nil
+	}
+	// sanitize
+	sanToken, err := s.sanitizeCmdToken(buff)
+	if err != nil {
+		log.Println("WARN: got error while sanitizing cmdLine:", cmdLine)
+		// return cmdLine, err
+	}
+	sanCmdLine += sanToken
+	return sanCmdLine, nil
+}
+
+func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) {
+	if len(rawURL) <= 0 {
+		return rawURL, nil
+	}
+	parsedURL, err := giturls.Parse(rawURL)
+	if err != nil {
+		return rawURL, err
+	}
+	return s.sanitizeParsedURL(parsedURL)
+}
+
+func (s *sanitizer) sanitizeURL(rawURL string) (string, error) {
+	if len(rawURL) <= 0 {
+		return rawURL, nil
+	}
+	parsedURL, err := url.Parse(rawURL)
+	if err != nil {
+		return rawURL, err
+	}
+	return s.sanitizeParsedURL(parsedURL)
+}
+
+func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) {
+	parsedURL.Opaque = s.sanitizeToken(parsedURL.Opaque)
+
+	userinfo := parsedURL.User.Username() // only get username => password won't even make it to the sanitized data
+	if len(userinfo) > 0 {
+		parsedURL.User = url.User(s.sanitizeToken(userinfo))
+	} else {
+		// we need to do this because `gitUrls.Parse()` sets `User` to `url.User("")` instead of `nil`
+		parsedURL.User = nil
+	}
+	var err error
+	parsedURL.Host, err = s.sanitizeTwoPartToken(parsedURL.Host, ":")
+	if err != nil {
+		return parsedURL.String(), err
+	}
+	parsedURL.Path = s.sanitizePath(parsedURL.Path)
+	// ForceQuery bool
+	parsedURL.RawQuery = s.sanitizeToken(parsedURL.RawQuery)
+	parsedURL.Fragment = s.sanitizeToken(parsedURL.Fragment)
+
+	return parsedURL.String(), nil
+}
+
+func (s *sanitizer) sanitizePath(path string) string {
+	var sanPath string
+	for _, token := range strings.Split(path, "/") {
+		if s.whitelist[token] != true {
+			token = s.hashToken(token)
+		}
+		sanPath += token + "/"
+	}
+	if len(sanPath) > 0 {
+		sanPath = sanPath[:len(sanPath)-1]
+	}
+	return sanPath
+}
+
+func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string, error) {
+	tokenParts := strings.Split(token, delimeter)
+	if len(tokenParts) <= 1 {
+		return s.sanitizeToken(token), nil
+	}
+	if len(tokenParts) == 2 {
+		return s.sanitizeToken(tokenParts[0]) + delimeter + s.sanitizeToken(tokenParts[1]), nil
+	}
+	return token, errors.New("Token has more than two parts")
+}
+
+func (s *sanitizer) sanitizeCmdToken(token string) (string, error) {
+	// there shouldn't be tokens with letters or digits mixed together with symbols
+	if len(token) <= 1 {
+		// NOTE: do not sanitize single letter tokens
+		return token, nil
+	}
+	if s.isInWhitelist(token) == true {
+		return token, nil
+	}
+
+	isLettersOrDigits := true
+	// isDigits := true
+	isOtherCharacters := true
+	for _, r := range token {
+		if unicode.IsDigit(r) == false && unicode.IsLetter(r) == false {
+			isLettersOrDigits = false
+			// isDigits = false
+		}
+		// if unicode.IsDigit(r) == false {
+		// 	isDigits = false
+		// }
+		if unicode.IsDigit(r) || unicode.IsLetter(r) {
+			isOtherCharacters = false
+		}
+	}
+	// NOTE: I decided that I don't want a special sanitization for numbers
+	// if isDigits {
+	// 	return s.hashNumericToken(token), nil
+	// }
+	if isLettersOrDigits {
+		return s.hashToken(token), nil
+	}
+	if isOtherCharacters {
+		return token, nil
+	}
+	log.Println("WARN: cmd token is made of mix of letters or digits and other characters; token:", token)
+	// return token, errors.New("cmd token is made of mix of letters or digits and other characters")
+	return s.hashToken(token), errors.New("cmd token is made of mix of letters or digits and other characters")
+}
+
+func (s *sanitizer) sanitizeToken(token string) string {
+	if len(token) <= 1 {
+		// NOTE: do not sanitize single letter tokens
+		return token
+	}
+	if s.isInWhitelist(token) {
+		return token
+	}
+	return s.hashToken(token)
+}
+
+func (s *sanitizer) hashToken(token string) string {
+	if len(token) <= 0 {
+		return token
+	}
+	// hash with sha1
+	h := sha1.New()
+	h.Write([]byte(token))
+	sum := h.Sum(nil)
+	return s.trimHash(hex.EncodeToString(sum))
+}
+
+func (s *sanitizer) hashNumericToken(token string) string {
+	if len(token) <= 0 {
+		return token
+	}
+	h := sha1.New()
+	h.Write([]byte(token))
+	sum := h.Sum(nil)
+	sumInt := int(binary.LittleEndian.Uint64(sum))
+	if sumInt < 0 {
+		return strconv.Itoa(sumInt * -1)
+	}
+	return s.trimHash(strconv.Itoa(sumInt))
+}
+
+func (s *sanitizer) trimHash(hash string) string {
+	length := s.hashLength
+	if length <= 0 || len(hash) < length {
+		length = len(hash)
+	}
+	return hash[:length]
+}
+
+func (s *sanitizer) isInWhitelist(token string) bool {
+	return s.whitelist[strings.ToLower(token)] == true
+}
--- a/sanitizer_data/copyright_information.md
+++ b/sanitizer_data/copyright_information.md
@ -0,0 +1,7 @@
+# copyright information
+
+Whitelist contains content from variety of sources.
+
+Part of the whitelist (`./whitelist.txt`) is made of copyrighted content from [FileInfo.com](https://fileinfo.com/filetypes/common).
+
+This content was used with permission from FileInfo.com.
--- a/sanitizer_data/whitelist.txt
+++ b/sanitizer_data/whitelist.txt
--- a/shellrc.sh
+++ b/shellrc.sh
@ -153,10 +153,19 @@ __resh_precmd() {
    __RESH_PWD_AFTER="$PWD"
    if [ -n "${__RESH_COLLECT}" ]; then
        if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then
-            echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})"
+            source ~/.resh/shellrc 
+            if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then
+                echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})"
+            else
+                echo "RESH INFO: New RESH shellrc script was loaded - if you encounter any issues please restart this terminal session."
+            fi
        elif [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then
-            echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -revision); resh version of this terminal session: ${__RESH_REVISION})"
-        else
+            source ~/.resh/shellrc 
+            if [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then
+                echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh revision: $(resh-collect -revision); resh revision of this terminal session: ${__RESH_REVISION})"
+            fi
+        fi
+        if [ "$__RESH_VERSION" == $(resh-collect -version) ] && [ "$__RESH_REVISION" == $(resh-collect -revision) ]; then
            resh-collect -requireVersion "$__RESH_VERSION" \
                        -requireRevision "$__RESH_REVISION" \
                        -cmdLine "$__RESH_CMDLINE" \
--- a/2
+++ b/2
@ -1 +1 @@
-1.1.1
+1.1.3
 @ -1 +1 @@
 .1.1
 .1.3