diff --git a/.gitignore b/.gitignore index afd111b..602e54d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ resh-collect resh-daemon +resh-sanitize-history +resh-evaluate diff --git a/Makefile b/Makefile index e63c1539..687ad36 100644 --- a/Makefile +++ b/Makefile @@ -6,8 +6,43 @@ GOFLAGS=-ldflags "-X main.Version=${VERSION} -X main.Revision=${REVISION}" autoinstall: ./install_helper.sh +sanitize: + # + # + # I'm going to create a sanitized version of your resh history. + # Everything is done locally - your history won't leave this machine. + # The way this works is that any sensitive information in your history is going to be replaced with its SHA1 hash. + # There is also going to be a second version with hashes trimed to 12 characters for readability + # + # + # > full hashes: ~/resh_history_sanitized.json + # > 12 char hashes: ~/resh_history_sanitized_trim12.json + # + # + # Encountered any issues? Got questions? -> Hit me up at https://github.com/curusarn/resh/issues + # + # + # Running history sanitization ... + resh-sanitize-history -trim-hashes 0 --output ~/resh_history_sanitized.json + resh-sanitize-history -trim-hashes 12 --output ~/resh_history_sanitized_trim12.json + # + # + # SUCCESS - ALL DONE! + # + # + # PLEASE HAVE A LOOK AT THE RESULT USING THESE COMMANDS: + # + # > pretty print JSON: + @echo 'cat ~/resh_history_sanitized_trim12.json | jq' + # + # > only show executed commands, don't show metadata: + @echo "cat ~/resh_history_sanitized_trim12.json | jq '.[\"cmdLine\"]'" + # + # + # + -build: submodules resh-collect resh-daemon +build: submodules resh-collect resh-daemon resh-sanitize-history resh-evaluate rebuild: make clean @@ -23,6 +58,8 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu cp -f shellrc.sh ~/.resh/shellrc cp -f uuid.sh ~/.resh/bin/resh-uuid cp -f resh-* ~/.resh/bin/ + cp -f evaluate/resh-evaluate-plot.py ~/.resh/bin/ + cp -fr sanitizer_data ~/.resh/ # backward compatibility: We have a new location for resh history file [ ! -f ~/.resh/history.json ] || mv ~/.resh/history.json ~/.resh_history.json # Adding resh shellrc to .bashrc ... @@ -40,24 +77,31 @@ install: build submodules/bash-preexec/bash-preexec.sh shellrc.sh config.toml uu # Final touch touch ~/.resh_history.json # + # + # ########################################################## # # # SUCCESS - thank you for trying out this project! # # # ########################################################## # + # # WHAT'S NEXT # Please RESTART ALL OPEN TERMINAL WINDOWS (or reload your rc files) # Your resh history is located in `~/.resh_history.json` # You can look at it using e.g. `tail -f ~/.resh_history.json | jq` # + # # ISSUES # If anything looks broken create an issue: https://github.com/curusarn/resh/issues # You can uninstall this at any time by running `rm -rf ~/.resh/` # You won't lose any collected history by removing `~/.resh` directory # + # # Please give me some contact info using this form: https://forms.gle/227SoyJ5c2iteKt98 # + # + # uninstall: # Uninstalling ... @@ -69,6 +113,11 @@ resh-daemon: daemon/resh-daemon.go common/resh-common.go version resh-collect: collect/resh-collect.go common/resh-common.go version go build ${GOFLAGS} -o $@ $< +resh-sanitize-history: sanitize-history/resh-sanitize-history.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< + +resh-evaluate: evaluate/resh-evaluate.go evaluate/strategy-*.go common/resh-common.go version + go build ${GOFLAGS} -o $@ $< evaluate/strategy-*.go $(HOME)/.resh $(HOME)/.resh/bin $(HOME)/.config: # Creating dirs ... diff --git a/README.md b/README.md index 7ddd5ed..74f86d3 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,7 @@ This project is the first phase of my Master project. -It records shell history with rich set of metadata and saves it locally. (device, dir, git, ... see example below) +It records shell history with rich set of metadata and saves it locally. (device, directory, git, time, terminal session pid, ... see example below) It doesn't change the way your shell and your shell history behaves. @@ -17,8 +17,9 @@ If you are not happy with it you can uninstall it with a single command (`rm -rf The ultimate point of my thesis is to provide a context-based replacement/enhancement for bash and zsh shell history. The idea is to: -- Save each command with metadata (device, dir, gitdir, ...) -- Recommend history based on saved metadata (e.g. it will be easier to get to commands specific to your project) +- Save each command with metadata (device, directory, git, time, terminal session pid, ... see example below) +- Recommend history based on saved metadata + - e.g. it will be easier to get to commands specific to the project you are currently working on (based on directory, git repository url, ...) - Provide a simple way to search whole history by command itself and/or metadata (e.g. imagine searching by project, directory, device, ...) - Synchronize history across devices - Provide an API (to make the project easily extensible) diff --git a/collect/resh-collect.go b/collect/resh-collect.go index ae52923..b96cc72 100644 --- a/collect/resh-collect.go +++ b/collect/resh-collect.go @@ -20,7 +20,10 @@ import ( "strings" ) +// Version from git set during build var Version string + +// Revision from git set during build var Revision string func main() { diff --git a/common/resh-common.go b/common/resh-common.go index aa2bb92..7e91094 100644 --- a/common/resh-common.go +++ b/common/resh-common.go @@ -1,5 +1,13 @@ package common +import ( + "log" + "strconv" + + "github.com/mattn/go-shellwords" +) + +// Record representing single executed command with its metadata type Record struct { // core CmdLine string `json:"cmdLine"` @@ -60,8 +68,180 @@ type Record struct { ReshUuid string `json:"reshUuid"` ReshVersion string `json:"reshVersion"` ReshRevision string `json:"reshRevision"` + + // added by sanitizatizer + Sanitized bool `json:"sanitized"` + CmdLength int `json:"cmdLength,omitempty"` + + // enriching fields - added "later" + FirstWord string `json:"firstWord,omitempty"` + Invalid bool `json:"invalid,omitempty"` + SeqSessionID uint64 `json:"seqSessionID,omitempty"` +} + +// FallbackRecord when record is too old and can't be parsed into regular Record +type FallbackRecord struct { + // older version of the record where cols and lines are int + + // core + CmdLine string `json:"cmdLine"` + ExitCode int `json:"exitCode"` + Shell string `json:"shell"` + Uname string `json:"uname"` + SessionId string `json:"sessionId"` + + // posix + Cols int `json:"cols"` // notice the in type + Lines int `json:"lines"` // notice the in type + Home string `json:"home"` + Lang string `json:"lang"` + LcAll string `json:"lcAll"` + Login string `json:"login"` + //Path string `json:"path"` + Pwd string `json:"pwd"` + PwdAfter string `json:"pwdAfter"` + ShellEnv string `json:"shellEnv"` + Term string `json:"term"` + + // non-posix"` + RealPwd string `json:"realPwd"` + RealPwdAfter string `json:"realPwdAfter"` + Pid int `json:"pid"` + SessionPid int `json:"sessionPid"` + Host string `json:"host"` + Hosttype string `json:"hosttype"` + Ostype string `json:"ostype"` + Machtype string `json:"machtype"` + Shlvl int `json:"shlvl"` + + // before after + TimezoneBefore string `json:"timezoneBefore"` + TimezoneAfter string `json:"timezoneAfter"` + + RealtimeBefore float64 `json:"realtimeBefore"` + RealtimeAfter float64 `json:"realtimeAfter"` + RealtimeBeforeLocal float64 `json:"realtimeBeforeLocal"` + RealtimeAfterLocal float64 `json:"realtimeAfterLocal"` + + RealtimeDuration float64 `json:"realtimeDuration"` + RealtimeSinceSessionStart float64 `json:"realtimeSinceSessionStart"` + RealtimeSinceBoot float64 `json:"realtimeSinceBoot"` + //Logs []string `json: "logs"` + + GitDir string `json:"gitDir"` + GitRealDir string `json:"gitRealDir"` + GitOriginRemote string `json:"gitOriginRemote"` + MachineId string `json:"machineId"` + + OsReleaseId string `json:"osReleaseId"` + OsReleaseVersionId string `json:"osReleaseVersionId"` + OsReleaseIdLike string `json:"osReleaseIdLike"` + OsReleaseName string `json:"osReleaseName"` + OsReleasePrettyName string `json:"osReleasePrettyName"` + + ReshUuid string `json:"reshUuid"` + ReshVersion string `json:"reshVersion"` + ReshRevision string `json:"reshRevision"` +} + +// ConvertRecord from FallbackRecord to Record +func ConvertRecord(r *FallbackRecord) Record { + return Record{ + // core + CmdLine: r.CmdLine, + ExitCode: r.ExitCode, + Shell: r.Shell, + Uname: r.Uname, + SessionId: r.SessionId, + + // posix + // these two lines are the only reason we are doing this + Cols: strconv.Itoa(r.Cols), + Lines: strconv.Itoa(r.Lines), + + Home: r.Home, + Lang: r.Lang, + LcAll: r.LcAll, + Login: r.Login, + // Path: r.path, + Pwd: r.Pwd, + PwdAfter: r.PwdAfter, + ShellEnv: r.ShellEnv, + Term: r.Term, + + // non-posix + RealPwd: r.RealPwd, + RealPwdAfter: r.RealPwdAfter, + Pid: r.Pid, + SessionPid: r.SessionPid, + Host: r.Host, + Hosttype: r.Hosttype, + Ostype: r.Ostype, + Machtype: r.Machtype, + Shlvl: r.Shlvl, + + // before after + TimezoneBefore: r.TimezoneBefore, + TimezoneAfter: r.TimezoneAfter, + + RealtimeBefore: r.RealtimeBefore, + RealtimeAfter: r.RealtimeAfter, + RealtimeBeforeLocal: r.RealtimeBeforeLocal, + RealtimeAfterLocal: r.RealtimeAfterLocal, + + RealtimeDuration: r.RealtimeDuration, + RealtimeSinceSessionStart: r.RealtimeSinceSessionStart, + RealtimeSinceBoot: r.RealtimeSinceBoot, + + GitDir: r.GitDir, + GitRealDir: r.GitRealDir, + GitOriginRemote: r.GitOriginRemote, + MachineId: r.MachineId, + + OsReleaseId: r.OsReleaseId, + OsReleaseVersionId: r.OsReleaseVersionId, + OsReleaseIdLike: r.OsReleaseIdLike, + OsReleaseName: r.OsReleaseName, + OsReleasePrettyName: r.OsReleasePrettyName, + + ReshUuid: r.ReshUuid, + ReshVersion: r.ReshVersion, + ReshRevision: r.ReshRevision, + } +} + +// Enrich - adds additional fields to the record +func (r *Record) Enrich() { + // Get command/first word from commandline + r.FirstWord = GetCommandFromCommandLine(r.CmdLine) + err := r.Validate() + if err != nil { + log.Println("Invalid command:", r.CmdLine) + r.Invalid = true + } + r.Invalid = false + // TODO: Detect and mark simple commands r.Simple +} + +// Validate - returns error if the record is invalid +func (r *Record) Validate() error { + return nil +} + +// GetCommandFromCommandLine func +func GetCommandFromCommandLine(cmdLine string) string { + args, err := shellwords.Parse(cmdLine) + if err != nil { + log.Println("shellwords Error:", err, " (cmdLine: <", cmdLine, "> )") + return "" + } + if len(args) > 0 { + return args[0] + } + return "" } +// Config struct type Config struct { Port int } diff --git a/daemon/resh-daemon.go b/daemon/resh-daemon.go index fb1b31a..c8bcb0e 100644 --- a/daemon/resh-daemon.go +++ b/daemon/resh-daemon.go @@ -3,8 +3,6 @@ package main import ( "encoding/json" //"flag" - "github.com/BurntSushi/toml" - common "github.com/curusarn/resh/common" "io/ioutil" "log" "net/http" @@ -14,9 +12,15 @@ import ( "path/filepath" "strconv" "strings" + + "github.com/BurntSushi/toml" + common "github.com/curusarn/resh/common" ) +// Version from git set during build var Version string + +// Revision from git set during build var Revision string func main() { diff --git a/evaluate/resh-evaluate-plot.py b/evaluate/resh-evaluate-plot.py new file mode 100755 index 0000000..45d9322 --- /dev/null +++ b/evaluate/resh-evaluate-plot.py @@ -0,0 +1,438 @@ +#!/usr/bin/env python3 + + +import traceback +import sys +import json +from collections import defaultdict +import matplotlib.pyplot as plt +import matplotlib.path as mpath +import numpy as np +from graphviz import Digraph + +PLOT_WIDTH = 10 # inches +PLOT_HEIGHT = 7 # inches + +PLOT_SIZE_zipf = 20 + +data = json.load(sys.stdin) + +DATA_records = [] +DATA_records_by_session = defaultdict(list) +for user in data["UsersRecords"]: + for device in user["Devices"]: + for record in device["Records"]: + if record["invalid"]: + continue + + DATA_records.append(record) + DATA_records_by_session[record["sessionId"]].append(record) + +DATA_records = list(sorted(DATA_records, key=lambda x: x["realtimeAfterLocal"])) + +for pid, session in DATA_records_by_session.items(): + session = list(sorted(session, key=lambda x: x["realtimeAfterLocal"])) + +# TODO: this should be a cmdline option +async_draw = True + +# for strategy in data["Strategies"]: +# print(json.dumps(strategy)) + + +def zipf(length): + return list(map(lambda x: 1/2**x, range(0, length))) + + +def trim(text, length, add_elipse=True): + if add_elipse and len(text) > length: + return text[:length-1] + "…" + return text[:length] + + +# Figure 3.1. The normalized command frequency, compared with Zipf. +def plot_cmdLineFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): + cmdLine_count = defaultdict(int) + for record in DATA_records: + cmdLine_count[record["cmdLine"]] += 1 + + tmp = sorted(cmdLine_count.items(), key=lambda x: x[1], reverse=True)[:plotSize] + cmdLineFrq = list(map(lambda x: x[1] / tmp[0][1], tmp)) + labels = list(map(lambda x: trim(x[0], 7), tmp)) + + ranks = range(1, len(cmdLineFrq)+1) + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(ranks, zipf(len(ranks)), '-') + plt.plot(ranks, cmdLineFrq, 'o-') + plt.title("Commandline frequency / rank") + plt.ylabel("Normalized commandline frequency") + plt.xlabel("Commandline rank") + plt.legend(("Zipf", "Commandline"), loc="best") + if show_labels: + plt.xticks(ranks, labels, rotation=-60) + # TODO: make xticks integral + if async_draw: + plt.draw() + else: + plt.show() + + +# similar to ~ Figure 3.1. The normalized command frequency, compared with Zipf. +def plot_cmdFrq_rank(plotSize=PLOT_SIZE_zipf, show_labels=False): + cmd_count = defaultdict(int) + for record in DATA_records: + cmd = record["firstWord"] + if cmd == "": + continue + cmd_count[cmd] += 1 + + tmp = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True)[:plotSize] + cmdFrq = list(map(lambda x: x[1] / tmp[0][1], tmp)) + labels = list(map(lambda x: trim(x[0], 7), tmp)) + + ranks = range(1, len(cmdFrq)+1) + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(ranks, zipf(len(ranks)), 'o-') + plt.plot(ranks, cmdFrq, 'o-') + plt.title("Command frequency / rank") + plt.ylabel("Normalized command frequency") + plt.xlabel("Command rank") + plt.legend(("Zipf", "Command"), loc="best") + if show_labels: + plt.xticks(ranks, labels, rotation=-60) + # TODO: make xticks integral + if async_draw: + plt.draw() + else: + plt.show() + +# Figure 3.2. Command vocabulary size vs. the number of command lines entered for four individuals. +def plot_cmdVocabularySize_cmdLinesEntered(): + cmd_vocabulary = set() + y_cmd_count = [0] + for record in DATA_records: + cmd = record["firstWord"] + if cmd in cmd_vocabulary: + # repeat last value + y_cmd_count.append(y_cmd_count[-1]) + else: + cmd_vocabulary.add(cmd) + # append last value +1 + y_cmd_count.append(y_cmd_count[-1] + 1) + + # print(cmd_vocabulary) + x_cmds_entered = range(0, len(y_cmd_count)) + + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(x_cmds_entered, y_cmd_count, '-') + plt.title("Command vocabulary size vs. the number of command lines entered") + plt.ylabel("Command vocabulary size") + plt.xlabel("# of command lines entered") + if async_draw: + plt.draw() + else: + plt.show() + +# Figure 5.6. Command line vocabulary size vs. the number of commands entered for four typical individuals. +def plot_cmdLineVocabularySize_cmdLinesEntered(): + cmdLine_vocabulary = set() + y_cmdLine_count = [0] + for record in DATA_records: + cmdLine = record["cmdLine"] + if cmdLine in cmdLine_vocabulary: + # repeat last value + y_cmdLine_count.append(y_cmdLine_count[-1]) + else: + cmdLine_vocabulary.add(cmdLine) + # append last value +1 + y_cmdLine_count.append(y_cmdLine_count[-1] + 1) + + # print(cmdLine_vocabulary) + x_cmdLines_entered = range(0, len(y_cmdLine_count)) + + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.plot(x_cmdLines_entered, y_cmdLine_count, '-') + plt.title("Command line vocabulary size vs. the number of command lines entered") + plt.ylabel("Command line vocabulary size") + plt.xlabel("# of command lines entered") + if async_draw: + plt.draw() + else: + plt.show() + +# Figure 3.3. Sequential structure of UNIX command usage, from Figure 4 in Hanson et al. (1984). +# Ball diameters are proportional to stationary probability. Lines indicate significant dependencies, +# solid ones being more probable (p < .0001) and dashed ones less probable (.005 < p < .0001). +def graph_cmdSequences(node_count=33, edge_minValue=0.05): + START_CMD = "_start_" + cmd_count = defaultdict(int) + cmdSeq_count = defaultdict(lambda: defaultdict(int)) + cmd_id = dict() + x = 0 + cmd_id[START_CMD] = str(x) + for pid, session in DATA_records_by_session.items(): + cmd_count[START_CMD] += 1 + prev_cmd = START_CMD + for record in session: + cmd = record["firstWord"] + cmdSeq_count[prev_cmd][cmd] += 1 + cmd_count[cmd] += 1 + if cmd not in cmd_id: + x += 1 + cmd_id[cmd] = str(x) + prev_cmd = cmd + + # get `node_count` of largest nodes + sorted_cmd_count = sorted(cmd_count.items(), key=lambda x: x[1], reverse=True) + print(sorted_cmd_count) + cmds_to_graph = list(map(lambda x: x[0], sorted_cmd_count))[:node_count] + + # use 3 biggest nodes as a reference point for scaling + biggest_node = cmd_count[cmds_to_graph[0]] + nd_biggest_node = cmd_count[cmds_to_graph[1]] + rd_biggest_node = cmd_count[cmds_to_graph[1]] + count2scale_coef = 3 / (biggest_node + nd_biggest_node + rd_biggest_node) + + # scaling constant + # affects node size and node label + base_scaling_factor = 21 + # extra scaling for experiments - not really useful imho + # affects everything nodes, edges, node labels, treshold for turning label into xlabel, xlabel size, ... + extra_scaling_factor = 1.0 + for x in range(0, 10): + # graphviz is not the most reliable piece of software + # -> retry on fail but scale nodes down by 1% + scaling_factor = base_scaling_factor * (1 - x * 0.01) + + # overlap: scale -> solve overlap by scaling the graph + # overlap_shrink -> try to shrink the graph a bit after you are done + # splines -> don't draw edges over nodes + # sep: 2.5 -> assume that nodes are 2.5 inches larger + graph_attr={'overlap':'scale', 'overlap_shrink':'true', + 'splines':'true', 'sep':'0.25'} + graph = Digraph(name='command_sequentiality', engine='neato', graph_attr=graph_attr) + + # iterate over all nodes + for cmd in cmds_to_graph: + seq = cmdSeq_count[cmd] + count = cmd_count[cmd] + + # iterate over all "following" commands (for each node) + for seq_entry in seq.items(): + cmd2, seq_count = seq_entry + relative_seq_count = seq_count / count + + # check if "follow" command is supposed to be in the graph + if cmd2 not in cmds_to_graph: + continue + # check if the edge value is high enough + if relative_seq_count < edge_minValue: + continue + + # create starting node and end node for the edge + # duplicates don't matter + for id_, cmd_ in ((cmd_id[cmd], cmd), (cmd_id[cmd2], cmd2)): + count_ = cmd_count[cmd_] + scale_ = count_ * count2scale_coef * scaling_factor * extra_scaling_factor + width_ = 0.08 * scale_ + fontsize_ = 8.5 * scale_ / (len(cmd_) + 3) + + width_ = str(width_) + if fontsize_ < 12 * extra_scaling_factor: + graph.node(id_, ' ', shape='circle', fixedsize='true', fontname='monospace bold', + width=width_, fontsize=str(12 * extra_scaling_factor), forcelabels='true', xlabel=cmd_) + else: + fontsize_ = str(fontsize_) + graph.node(id_, cmd_, shape='circle', fixedsize='true', fontname='monospace bold', + width=width_, fontsize=fontsize_, forcelabels='true', labelloc='c') + + # value of the edge (percentage) 1.0 is max + scale_ = seq_count / cmd_count[cmd] + penwidth_ = str((0.5 + 4.5 * scale_) * extra_scaling_factor) + #penwidth_bold_ = str(8 * scale_) + if scale_ > 0.5: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved', + penwidth=penwidth_, style='bold') + elif scale_ > 0.2: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved', + penwidth=penwidth_, arrowhead='open') + elif scale_ > 0.1: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='true', splines='curved', + penwidth=penwidth_, style='dashed', arrowhead='open') + else: + graph.edge(cmd_id[cmd], cmd_id[cmd2], constraint='false', splines='curved', + penwidth=penwidth_, style='dotted', arrowhead='empty') + + # graphviz sometimes fails - see above + try: + graph.view() + # graph.render('/tmp/resh-graphviz-cmdSeq.gv', view=True) + break + except Exception as e: + trace = traceback.format_exc() + print("GRAPHVIZ EXCEPTION: <{}>\nGRAPHVIZ TRACE: <{}>".format(str(e), trace)) + + +def plot_strategies_matches(plot_size=50, selected_strategies=[]): + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.title("Matches at distance") + plt.ylabel('%' + " of matches") + plt.xlabel("Distance") + legend = [] + x_values = range(1, plot_size+1) + saved_matches_total = None + saved_dataPoint_count = None + for strategy in data["Strategies"]: + strategy_title = strategy["Title"] + # strategy_description = strategy["Description"] + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue + + dataPoint_count = 0 + matches = [0] * plot_size + matches_total = 0 + charsRecalled = [0] * plot_size + charsRecalled_total = 0 + + for match in strategy["Matches"]: + dataPoint_count += 1 + + if not match["Match"]: + continue + + chars = match["CharsRecalled"] + charsRecalled_total += chars + matches_total += 1 + + dist = match["Distance"] + if dist > plot_size: + continue + + matches[dist-1] += 1 + charsRecalled[dist-1] += chars + + # recent is very simple strategy so we will believe + # that there is no bug in it and we can use it to determine total + if strategy_title == "recent": + saved_matches_total = matches_total + saved_dataPoint_count = dataPoint_count + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue + + acc = 0 + matches_cumulative = [] + for x in matches: + acc += x + matches_cumulative.append(acc) + # matches_cumulative.append(matches_total) + matches_percent = list(map(lambda x: 100 * x / dataPoint_count, matches_cumulative)) + + plt.plot(x_values, matches_percent, 'o-') + legend.append(strategy_title) + + assert(saved_matches_total is not None) + assert(saved_dataPoint_count is not None) + max_values = [100 * saved_matches_total / saved_dataPoint_count] * len(x_values) + plt.plot(x_values, max_values, 'r-') + legend.append("maximum possible") + + x_ticks = list(range(1, plot_size+1, 2)) + x_labels = x_ticks[:] + plt.xticks(x_ticks, x_labels) + plt.legend(legend, loc="best") + if async_draw: + plt.draw() + else: + plt.show() + + + +def plot_strategies_charsRecalled(plot_size=50, selected_strategies=[]): + plt.figure(figsize=(PLOT_WIDTH, PLOT_HEIGHT)) + plt.title("Average characters recalled at distance") + plt.ylabel("Average characters recalled") + plt.xlabel("Distance") + x_values = range(1, plot_size+1) + legend = [] + saved_charsRecalled_total = None + saved_dataPoint_count = None + for strategy in data["Strategies"]: + strategy_title = strategy["Title"] + # strategy_description = strategy["Description"] + + dataPoint_count = 0 + matches = [0] * plot_size + matches_total = 0 + charsRecalled = [0] * plot_size + charsRecalled_total = 0 + + for match in strategy["Matches"]: + dataPoint_count += 1 + + if not match["Match"]: + continue + + chars = match["CharsRecalled"] + charsRecalled_total += chars + matches_total += 1 + + dist = match["Distance"] + if dist > plot_size: + continue + + matches[dist-1] += 1 + charsRecalled[dist-1] += chars + + # recent is very simple strategy so we will believe + # that there is no bug in it and we can use it to determine total + if strategy_title == "recent": + saved_charsRecalled_total = charsRecalled_total + saved_dataPoint_count = dataPoint_count + + if len(selected_strategies) and strategy_title not in selected_strategies: + continue + + acc = 0 + charsRecalled_cumulative = [] + for x in charsRecalled: + acc += x + charsRecalled_cumulative.append(acc) + charsRecalled_average = list(map(lambda x: x / dataPoint_count, charsRecalled_cumulative)) + + plt.plot(x_values, charsRecalled_average, 'o-') + legend.append(strategy_title) + + assert(saved_charsRecalled_total is not None) + assert(saved_dataPoint_count is not None) + max_values = [saved_charsRecalled_total / saved_dataPoint_count] * len(x_values) + plt.plot(x_values, max_values, 'r-') + legend.append("maximum possible") + + x_ticks = list(range(1, plot_size+1, 2)) + x_labels = x_ticks[:] + plt.xticks(x_ticks, x_labels) + plt.legend(legend, loc="best") + if async_draw: + plt.draw() + else: + plt.show() + + + +# graph_cmdSequences(node_count=33, edge_minValue=0.05) +graph_cmdSequences(node_count=28, edge_minValue=0.06) + +plot_cmdLineFrq_rank() +plot_cmdFrq_rank() + +plot_cmdLineVocabularySize_cmdLinesEntered() +plot_cmdVocabularySize_cmdLinesEntered() + +plot_strategies_matches(20) +plot_strategies_charsRecalled(20) + +if async_draw: + plt.show() +# be careful and check if labels fit the display \ No newline at end of file diff --git a/evaluate/resh-evaluate.go b/evaluate/resh-evaluate.go new file mode 100644 index 0000000..bef0b24 --- /dev/null +++ b/evaluate/resh-evaluate.go @@ -0,0 +1,340 @@ +package main + +import ( + "bufio" + "bytes" + "encoding/json" + "flag" + "fmt" + "io/ioutil" + "log" + "os" + "os/exec" + "os/user" + "path/filepath" + "sort" + + "github.com/curusarn/resh/common" +) + +// Version from git set during build +var Version string + +// Revision from git set during build +var Revision string + +func main() { + usr, _ := user.Current() + dir := usr.HomeDir + historyPath := filepath.Join(dir, ".resh_history.json") + historyPathBatchMode := filepath.Join(dir, "resh_history.json") + sanitizedHistoryPath := filepath.Join(dir, "resh_history_sanitized.json") + // tmpPath := "/tmp/resh-evaluate-tmp.json" + + showVersion := flag.Bool("version", false, "Show version and exit") + showRevision := flag.Bool("revision", false, "Show git revision and exit") + input := flag.String("input", "", + "Input file (default: "+historyPath+"OR"+sanitizedHistoryPath+ + " depending on --sanitized-input option)") + // outputDir := flag.String("output", "/tmp/resh-evaluate", "Output directory") + sanitizedInput := flag.Bool("sanitized-input", false, + "Handle input as sanitized (also changes default value for input argument)") + plottingScript := flag.String("plotting-script", "resh-evaluate-plot.py", "Script to use for plotting") + inputDataRoot := flag.String("input-data-root", "", + "Input data root, enables batch mode, looks for files matching --input option") + + flag.Parse() + + // handle show{Version,Revision} options + if *showVersion == true { + fmt.Println(Version) + os.Exit(0) + } + if *showRevision == true { + fmt.Println(Revision) + os.Exit(0) + } + + // handle batch mode + batchMode := false + if *inputDataRoot != "" { + batchMode = true + } + // set default input + if *input == "" { + if *sanitizedInput { + *input = sanitizedHistoryPath + } else if batchMode { + *input = historyPathBatchMode + } else { + *input = historyPath + } + } + + evaluator := evaluator{sanitizedInput: *sanitizedInput, maxCandidates: 50, BatchMode: batchMode} + if batchMode { + err := evaluator.initBatchMode(*input, *inputDataRoot) + if err != nil { + log.Fatal("Evaluator initBatchMode() error:", err) + } + } else { + err := evaluator.init(*input) + if err != nil { + log.Fatal("Evaluator init() error:", err) + } + } + + var strategies []strategy + + // dummy := strategyDummy{} + // strategies = append(strategies, &dummy) + + recent := strategyRecent{} + frequent := strategyFrequent{} + frequent.init() + directory := strategyDirectorySensitive{} + directory.init() + + strategies = append(strategies, &recent, &frequent, &directory) + + for _, strat := range strategies { + err := evaluator.evaluate(strat) + if err != nil { + log.Println("Evaluator evaluate() error:", err) + } + } + + evaluator.calculateStatsAndPlot(*plottingScript) +} + +type strategy interface { + GetTitleAndDescription() (string, string) + GetCandidates() []string + AddHistoryRecord(record *common.Record) error + ResetHistory() error +} + +type matchJSON struct { + Match bool + Distance int + CharsRecalled int +} + +type strategyJSON struct { + Title string + Description string + Matches []matchJSON +} + +type deviceRecords struct { + Name string + Records []common.Record +} + +type userRecords struct { + Name string + Devices []deviceRecords +} + +type evaluator struct { + sanitizedInput bool + BatchMode bool + maxCandidates int + UsersRecords []userRecords + Strategies []strategyJSON +} + +func (e *evaluator) initBatchMode(input string, inputDataRoot string) error { + e.UsersRecords = e.loadHistoryRecordsBatchMode(input, inputDataRoot) + e.processRecords() + return nil +} + +func (e *evaluator) init(inputPath string) error { + records := e.loadHistoryRecords(inputPath) + device := deviceRecords{Records: records} + user := userRecords{} + user.Devices = append(user.Devices, device) + e.UsersRecords = append(e.UsersRecords, user) + e.processRecords() + return nil +} + +func (e *evaluator) calculateStatsAndPlot(scriptName string) { + evalJSON, err := json.Marshal(e) + if err != nil { + log.Fatal("json marshal error", err) + } + buffer := bytes.Buffer{} + buffer.Write(evalJSON) + // run python script to stat and plot/ + cmd := exec.Command(scriptName) + cmd.Stdout = os.Stdout + cmd.Stderr = os.Stderr + cmd.Stdin = &buffer + err = cmd.Run() + if err != nil { + log.Printf("Command finished with error: %v", err) + } +} + +// enrich records and add them to serializable structure +func (e *evaluator) processRecords() { + for i := range e.UsersRecords { + for j, device := range e.UsersRecords[i].Devices { + sessionIDs := map[string]uint64{} + var nextID uint64 + nextID = 0 + for k, record := range e.UsersRecords[i].Devices[j].Records { + id, found := sessionIDs[record.SessionId] + if found == false { + id = nextID + sessionIDs[record.SessionId] = id + nextID++ + } + record.SeqSessionID = id + // assert + if record.Sanitized != e.sanitizedInput { + if e.sanitizedInput { + log.Fatal("ASSERT failed: '--sanitized-input' is present but data is not sanitized") + } + log.Fatal("ASSERT failed: data is sanitized but '--sanitized-input' is not present") + } + + e.UsersRecords[i].Devices[j].Records[k].Enrich() + // device.Records = append(device.Records, record) + } + sort.SliceStable(e.UsersRecords[i].Devices[j].Records, func(x, y int) bool { + if device.Records[x].SeqSessionID == device.Records[y].SeqSessionID { + return device.Records[x].RealtimeAfterLocal < device.Records[y].RealtimeAfterLocal + } + return device.Records[x].SeqSessionID < device.Records[y].SeqSessionID + }) + } + } +} + +func (e *evaluator) evaluate(strategy strategy) error { + title, description := strategy.GetTitleAndDescription() + strategyData := strategyJSON{Title: title, Description: description} + for _, record := range e.UsersRecords[0].Devices[0].Records { + candidates := strategy.GetCandidates() + + matchFound := false + for i, candidate := range candidates { + // make an option (--calculate-total) to turn this on/off ? + // if i >= e.maxCandidates { + // break + // } + if candidate == record.CmdLine { + match := matchJSON{Match: true, Distance: i + 1, CharsRecalled: record.CmdLength} + strategyData.Matches = append(strategyData.Matches, match) + matchFound = true + break + } + } + if matchFound == false { + strategyData.Matches = append(strategyData.Matches, matchJSON{}) + } + err := strategy.AddHistoryRecord(&record) + if err != nil { + log.Println("Error while evauating", err) + return err + } + } + e.Strategies = append(e.Strategies, strategyData) + return nil +} + +func (e *evaluator) loadHistoryRecordsBatchMode(fname string, dataRootPath string) []userRecords { + var records []userRecords + info, err := os.Stat(dataRootPath) + if err != nil { + log.Fatal("Error: Directory", dataRootPath, "does not exist - exiting! (", err, ")") + } + if info.IsDir() == false { + log.Fatal("Error:", dataRootPath, "is not a directory - exiting!") + } + users, err := ioutil.ReadDir(dataRootPath) + if err != nil { + log.Fatal("Could not read directory:", dataRootPath) + } + fmt.Println("Listing users in <", dataRootPath, ">...") + for _, user := range users { + userRecords := userRecords{Name: user.Name()} + userFullPath := filepath.Join(dataRootPath, user.Name()) + if user.IsDir() == false { + log.Println("Warn: Unexpected file (not a directory) <", userFullPath, "> - skipping.") + continue + } + fmt.Println() + fmt.Printf("*- %s\n", user.Name()) + devices, err := ioutil.ReadDir(userFullPath) + if err != nil { + log.Fatal("Could not read directory:", userFullPath) + } + for _, device := range devices { + deviceRecords := deviceRecords{Name: device.Name()} + deviceFullPath := filepath.Join(userFullPath, device.Name()) + if device.IsDir() == false { + log.Println("Warn: Unexpected file (not a directory) <", deviceFullPath, "> - skipping.") + continue + } + fmt.Printf(" \\- %s\n", device.Name()) + files, err := ioutil.ReadDir(deviceFullPath) + if err != nil { + log.Fatal("Could not read directory:", deviceFullPath) + } + for _, file := range files { + fileFullPath := filepath.Join(deviceFullPath, file.Name()) + if file.Name() == fname { + fmt.Printf(" \\- %s - loading ...", file.Name()) + // load the data + deviceRecords.Records = e.loadHistoryRecords(fileFullPath) + fmt.Println(" OK ✓") + } else { + fmt.Printf(" \\- %s - skipped\n", file.Name()) + } + } + userRecords.Devices = append(userRecords.Devices, deviceRecords) + } + records = append(records, userRecords) + } + return records +} + +func (e *evaluator) loadHistoryRecords(fname string) []common.Record { + file, err := os.Open(fname) + if err != nil { + log.Fatal("Open() resh history file error:", err) + } + defer file.Close() + + var records []common.Record + scanner := bufio.NewScanner(file) + for scanner.Scan() { + record := common.Record{} + fallbackRecord := common.FallbackRecord{} + line := scanner.Text() + err = json.Unmarshal([]byte(line), &record) + if err != nil { + err = json.Unmarshal([]byte(line), &fallbackRecord) + if err != nil { + log.Println("Line:", line) + log.Fatal("Decoding error:", err) + } + record = common.ConvertRecord(&fallbackRecord) + } + if e.sanitizedInput == false { + if record.CmdLength != 0 { + log.Fatal("Assert failed - 'cmdLength' is set in raw data. Maybe you want to use '--sanitized-input' option?") + } + record.CmdLength = len(record.CmdLine) + } + if record.CmdLength == 0 { + log.Fatal("Assert failed - 'cmdLength' is unset in the data. This should not happen.") + } + records = append(records, record) + } + return records +} diff --git a/evaluate/strategy-directory-sensitive.go b/evaluate/strategy-directory-sensitive.go new file mode 100644 index 0000000..0c00bc4 --- /dev/null +++ b/evaluate/strategy-directory-sensitive.go @@ -0,0 +1,42 @@ +package main + +import ( + "github.com/curusarn/resh/common" +) + +type strategyDirectorySensitive struct { + history map[string][]string + lastPwd string +} + +func (s *strategyDirectorySensitive) init() { + s.history = map[string][]string{} +} + +func (s *strategyDirectorySensitive) GetTitleAndDescription() (string, string) { + return "directory sensitive (recent)", "Use recent commands executed is the same directory" +} + +func (s *strategyDirectorySensitive) GetCandidates() []string { + return s.history[s.lastPwd] +} + +func (s *strategyDirectorySensitive) AddHistoryRecord(record *common.Record) error { + // work on history for PWD + pwd := record.Pwd + // remove previous occurance of record + for i, cmd := range s.history[pwd] { + if cmd == record.CmdLine { + s.history[pwd] = append(s.history[pwd][:i], s.history[pwd][i+1:]...) + } + } + // append new record + s.history[pwd] = append([]string{record.CmdLine}, s.history[pwd]...) + s.lastPwd = record.PwdAfter + return nil +} + +func (s *strategyDirectorySensitive) ResetHistory() error { + s.history = map[string][]string{} + return nil +} diff --git a/evaluate/strategy-dummy.go b/evaluate/strategy-dummy.go new file mode 100644 index 0000000..28ed8ec --- /dev/null +++ b/evaluate/strategy-dummy.go @@ -0,0 +1,24 @@ +package main + +import "github.com/curusarn/resh/common" + +type strategyDummy struct { + history []string +} + +func (s *strategyDummy) GetTitleAndDescription() (string, string) { + return "dummy", "Return empty candidate list" +} + +func (s *strategyDummy) GetCandidates() []string { + return nil +} + +func (s *strategyDummy) AddHistoryRecord(record *common.Record) error { + s.history = append(s.history, record.CmdLine) + return nil +} + +func (s *strategyDummy) ResetHistory() error { + return nil +} diff --git a/evaluate/strategy-frequent.go b/evaluate/strategy-frequent.go new file mode 100644 index 0000000..c41f852 --- /dev/null +++ b/evaluate/strategy-frequent.go @@ -0,0 +1,47 @@ +package main + +import ( + "sort" + + "github.com/curusarn/resh/common" +) + +type strategyFrequent struct { + history map[string]int +} + +type strFrqEntry struct { + cmdLine string + count int +} + +func (s *strategyFrequent) init() { + s.history = map[string]int{} +} + +func (s *strategyFrequent) GetTitleAndDescription() (string, string) { + return "frequent", "Use frequent commands" +} + +func (s *strategyFrequent) GetCandidates() []string { + var mapItems []strFrqEntry + for cmdLine, count := range s.history { + mapItems = append(mapItems, strFrqEntry{cmdLine, count}) + } + sort.Slice(mapItems, func(i int, j int) bool { return mapItems[i].count > mapItems[j].count }) + var hist []string + for _, item := range mapItems { + hist = append(hist, item.cmdLine) + } + return hist +} + +func (s *strategyFrequent) AddHistoryRecord(record *common.Record) error { + s.history[record.CmdLine]++ + return nil +} + +func (s *strategyFrequent) ResetHistory() error { + s.history = map[string]int{} + return nil +} diff --git a/evaluate/strategy-recent.go b/evaluate/strategy-recent.go new file mode 100644 index 0000000..7d24d23 --- /dev/null +++ b/evaluate/strategy-recent.go @@ -0,0 +1,32 @@ +package main + +import "github.com/curusarn/resh/common" + +type strategyRecent struct { + history []string +} + +func (s *strategyRecent) GetTitleAndDescription() (string, string) { + return "recent", "Use recent commands" +} + +func (s *strategyRecent) GetCandidates() []string { + return s.history +} + +func (s *strategyRecent) AddHistoryRecord(record *common.Record) error { + // remove previous occurance of record + for i, cmd := range s.history { + if cmd == record.CmdLine { + s.history = append(s.history[:i], s.history[i+1:]...) + } + } + // append new record + s.history = append([]string{record.CmdLine}, s.history...) + return nil +} + +func (s *strategyRecent) ResetHistory() error { + s.history = nil + return nil +} diff --git a/go.mod b/go.mod index 86da97e..9c901e1 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,11 @@ module github.com/curusarn/resh go 1.12 -require github.com/BurntSushi/toml v0.3.1 +require ( + github.com/BurntSushi/toml v0.3.1 + github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 // indirect + github.com/mattn/go-shellwords v1.0.6 + github.com/wcharczuk/go-chart v2.0.1+incompatible + github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa + golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 // indirect +) diff --git a/go.sum b/go.sum index 9cb2df8..92beac2 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,13 @@ github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0 h1:DACJavvAHhabrF08vX0COfcOBJRhZ8lUbR+ZWIs0Y5g= +github.com/golang/freetype v0.0.0-20170609003504-e2365dfdc4a0/go.mod h1:E/TSTwGwJL78qG/PmXZO1EjYhfJinVAhrmmHX6Z8B9k= +github.com/mattn/go-shellwords v1.0.6 h1:9Jok5pILi5S1MnDirGVTufYGtksUs/V2BWUP3ZkeUUI= +github.com/mattn/go-shellwords v1.0.6/go.mod h1:3xCvwCdWdlDJUrvuMn7Wuy9eWs4pE8vqg+NOMyg4B2o= +github.com/wcharczuk/go-chart v2.0.1+incompatible h1:0pz39ZAycJFF7ju/1mepnk26RLVLBCWz1STcD3doU0A= +github.com/wcharczuk/go-chart v2.0.1+incompatible/go.mod h1:PF5tmL4EIx/7Wf+hEkpCqYi5He4u90sw+0+6FhrryuE= +github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa h1:rW+Lu6281ed/4XGuVIa4/YebTRNvoUJlfJ44ktEVwZk= +github.com/whilp/git-urls v0.0.0-20160530060445-31bac0d230fa/go.mod h1:2rx5KE5FLD0HRfkkpyn8JwbVLBdhgeiOb2D2D9LLKM4= +golang.org/x/image v0.0.0-20190902063713-cb417be4ba39 h1:4dQcAORh9oYBwVSBVIkP489LUPC+f1HBkTYXgmqfR+o= +golang.org/x/image v0.0.0-20190902063713-cb417be4ba39/go.mod h1:FeLwcggjj3mMvU+oOTbSwawSJRM1uh48EjtB4UJZlP0= +golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= diff --git a/sanitize-history/resh-sanitize-history.go b/sanitize-history/resh-sanitize-history.go new file mode 100644 index 0000000..5ce0581 --- /dev/null +++ b/sanitize-history/resh-sanitize-history.go @@ -0,0 +1,424 @@ +package main + +import ( + "bufio" + "crypto/sha1" + "encoding/binary" + "encoding/hex" + "encoding/json" + "errors" + "flag" + "fmt" + "log" + "net/url" + "os" + "os/user" + "path" + "path/filepath" + "strconv" + "strings" + "unicode" + + "github.com/curusarn/resh/common" + giturls "github.com/whilp/git-urls" +) + +// Version from git set during build +var Version string + +// Revision from git set during build +var Revision string + +func main() { + usr, _ := user.Current() + dir := usr.HomeDir + historyPath := filepath.Join(dir, ".resh_history.json") + // outputPath := filepath.Join(dir, "resh_history_sanitized.json") + sanitizerDataPath := filepath.Join(dir, ".resh", "sanitizer_data") + + showVersion := flag.Bool("version", false, "Show version and exit") + showRevision := flag.Bool("revision", false, "Show git revision and exit") + trimHashes := flag.Int("trim-hashes", 12, "Trim hashes to N characters, '0' turns off trimming") + inputPath := flag.String("input", historyPath, "Input file") + outputPath := flag.String("output", "", "Output file (default: use stdout)") + + flag.Parse() + + if *showVersion == true { + fmt.Println(Version) + os.Exit(0) + } + if *showRevision == true { + fmt.Println(Revision) + os.Exit(0) + } + sanitizer := sanitizer{hashLength: *trimHashes} + err := sanitizer.init(sanitizerDataPath) + if err != nil { + log.Fatal("Sanitizer init() error:", err) + } + + inputFile, err := os.Open(*inputPath) + if err != nil { + log.Fatal("Open() resh history file error:", err) + } + defer inputFile.Close() + + var writer *bufio.Writer + if *outputPath == "" { + writer = bufio.NewWriter(os.Stdout) + } else { + outputFile, err := os.Create(*outputPath) + if err != nil { + log.Fatal("Create() output file error:", err) + } + defer outputFile.Close() + writer = bufio.NewWriter(outputFile) + } + defer writer.Flush() + + scanner := bufio.NewScanner(inputFile) + for scanner.Scan() { + record := common.Record{} + fallbackRecord := common.FallbackRecord{} + line := scanner.Text() + err = json.Unmarshal([]byte(line), &record) + if err != nil { + err = json.Unmarshal([]byte(line), &fallbackRecord) + if err != nil { + log.Println("Line:", line) + log.Fatal("Decoding error:", err) + } + record = common.ConvertRecord(&fallbackRecord) + } + err = sanitizer.sanitizeRecord(&record) + if err != nil { + log.Println("Line:", line) + log.Fatal("Sanitization error:", err) + } + outLine, err := json.Marshal(&record) + if err != nil { + log.Println("Line:", line) + log.Fatal("Encoding error:", err) + } + // fmt.Println(string(outLine)) + n, err := writer.WriteString(string(outLine) + "\n") + if err != nil { + log.Fatal(err) + } + if n == 0 { + log.Fatal("Nothing was written", n) + } + } +} + +type sanitizer struct { + hashLength int + whitelist map[string]bool +} + +func (s *sanitizer) init(dataPath string) error { + globalData := path.Join(dataPath, "whitelist.txt") + s.whitelist = loadData(globalData) + return nil +} + +func loadData(fname string) map[string]bool { + file, err := os.Open(fname) + if err != nil { + log.Fatal("Open() file error:", err) + } + defer file.Close() + + scanner := bufio.NewScanner(file) + data := make(map[string]bool) + for scanner.Scan() { + line := scanner.Text() + data[line] = true + } + return data +} + +func (s *sanitizer) sanitizeRecord(record *common.Record) error { + // hash directories of the paths + record.Pwd = s.sanitizePath(record.Pwd) + record.RealPwd = s.sanitizePath(record.RealPwd) + record.PwdAfter = s.sanitizePath(record.PwdAfter) + record.RealPwdAfter = s.sanitizePath(record.RealPwdAfter) + record.GitDir = s.sanitizePath(record.GitDir) + record.GitRealDir = s.sanitizePath(record.GitRealDir) + record.Home = s.sanitizePath(record.Home) + record.ShellEnv = s.sanitizePath(record.ShellEnv) + + // hash the most sensitive info, do not tokenize + record.Host = s.hashToken(record.Host) + record.Login = s.hashToken(record.Login) + record.MachineId = s.hashToken(record.MachineId) + + var err error + // this changes git url a bit but I'm still happy with the result + // e.g. "git@github.com:curusarn/resh" becomes "ssh://git@github.com/3385162f14d7/5a7b2909005c" + // notice the "ssh://" prefix + record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote) + if err != nil { + log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err) + return err + } + + // sanitization destroys original CmdLine length -> save it + record.CmdLength = len(record.CmdLine) + + record.CmdLine, err = s.sanitizeCmdLine(record.CmdLine) + if err != nil { + log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err) + } + + // add a flag to signify that the record has been sanitized + record.Sanitized = true + return nil +} + +func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) { + const optionEndingChars = "\"$'\\#[]!><|;{}()*,?~&=`:@^/+%." // all bash control characters, '=', ... + const optionAllowedChars = "-_" // characters commonly found inside of options + sanCmdLine := "" + buff := "" + + // simple options shouldn't be sanitized + // 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or any of "=;)" + var optionDetected bool + + prevR3 := ' ' + prevR2 := ' ' + prevR := ' ' + for _, r := range cmdLine { + switch optionDetected { + case true: + if unicode.IsSpace(r) || strings.ContainsRune(optionEndingChars, r) { + // whitespace or option ends the option + // => add option unsanitized + optionDetected = false + if len(buff) > 0 { + sanCmdLine += buff + buff = "" + } + sanCmdLine += string(r) + } else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false && + strings.ContainsRune(optionAllowedChars, r) == false { + // r is not any of allowed chars for an option: letter, digit, "-" or "_" + // => sanitize + if len(buff) > 0 { + sanToken, err := s.sanitizeCmdToken(buff) + if err != nil { + log.Println("WARN: got error while sanitizing cmdLine:", cmdLine) + // return cmdLine, err + } + sanCmdLine += sanToken + buff = "" + } + sanCmdLine += string(r) + } else { + buff += string(r) + } + case false: + // split command on all non-letter and non-digit characters + if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false { + // split token + if len(buff) > 0 { + sanToken, err := s.sanitizeCmdToken(buff) + if err != nil { + log.Println("WARN: got error while sanitizing cmdLine:", cmdLine) + // return cmdLine, err + } + sanCmdLine += sanToken + buff = "" + } + sanCmdLine += string(r) + } else { + if (unicode.IsSpace(prevR2) && prevR == '-') || + (unicode.IsSpace(prevR3) && prevR2 == '-' && prevR == '-') { + optionDetected = true + } + buff += string(r) + } + } + prevR3 = prevR2 + prevR2 = prevR + prevR = r + } + if len(buff) <= 0 { + // nothing in the buffer => work is done + return sanCmdLine, nil + } + if optionDetected { + // option detected => dont sanitize + sanCmdLine += buff + return sanCmdLine, nil + } + // sanitize + sanToken, err := s.sanitizeCmdToken(buff) + if err != nil { + log.Println("WARN: got error while sanitizing cmdLine:", cmdLine) + // return cmdLine, err + } + sanCmdLine += sanToken + return sanCmdLine, nil +} + +func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) { + if len(rawURL) <= 0 { + return rawURL, nil + } + parsedURL, err := giturls.Parse(rawURL) + if err != nil { + return rawURL, err + } + return s.sanitizeParsedURL(parsedURL) +} + +func (s *sanitizer) sanitizeURL(rawURL string) (string, error) { + if len(rawURL) <= 0 { + return rawURL, nil + } + parsedURL, err := url.Parse(rawURL) + if err != nil { + return rawURL, err + } + return s.sanitizeParsedURL(parsedURL) +} + +func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) { + parsedURL.Opaque = s.sanitizeToken(parsedURL.Opaque) + + userinfo := parsedURL.User.Username() // only get username => password won't even make it to the sanitized data + if len(userinfo) > 0 { + parsedURL.User = url.User(s.sanitizeToken(userinfo)) + } else { + // we need to do this because `gitUrls.Parse()` sets `User` to `url.User("")` instead of `nil` + parsedURL.User = nil + } + var err error + parsedURL.Host, err = s.sanitizeTwoPartToken(parsedURL.Host, ":") + if err != nil { + return parsedURL.String(), err + } + parsedURL.Path = s.sanitizePath(parsedURL.Path) + // ForceQuery bool + parsedURL.RawQuery = s.sanitizeToken(parsedURL.RawQuery) + parsedURL.Fragment = s.sanitizeToken(parsedURL.Fragment) + + return parsedURL.String(), nil +} + +func (s *sanitizer) sanitizePath(path string) string { + var sanPath string + for _, token := range strings.Split(path, "/") { + if s.whitelist[token] != true { + token = s.hashToken(token) + } + sanPath += token + "/" + } + if len(sanPath) > 0 { + sanPath = sanPath[:len(sanPath)-1] + } + return sanPath +} + +func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string, error) { + tokenParts := strings.Split(token, delimeter) + if len(tokenParts) <= 1 { + return s.sanitizeToken(token), nil + } + if len(tokenParts) == 2 { + return s.sanitizeToken(tokenParts[0]) + delimeter + s.sanitizeToken(tokenParts[1]), nil + } + return token, errors.New("Token has more than two parts") +} + +func (s *sanitizer) sanitizeCmdToken(token string) (string, error) { + // there shouldn't be tokens with letters or digits mixed together with symbols + if len(token) <= 1 { + // NOTE: do not sanitize single letter tokens + return token, nil + } + if s.isInWhitelist(token) == true { + return token, nil + } + + isLettersOrDigits := true + // isDigits := true + isOtherCharacters := true + for _, r := range token { + if unicode.IsDigit(r) == false && unicode.IsLetter(r) == false { + isLettersOrDigits = false + // isDigits = false + } + // if unicode.IsDigit(r) == false { + // isDigits = false + // } + if unicode.IsDigit(r) || unicode.IsLetter(r) { + isOtherCharacters = false + } + } + // NOTE: I decided that I don't want a special sanitization for numbers + // if isDigits { + // return s.hashNumericToken(token), nil + // } + if isLettersOrDigits { + return s.hashToken(token), nil + } + if isOtherCharacters { + return token, nil + } + log.Println("WARN: cmd token is made of mix of letters or digits and other characters; token:", token) + // return token, errors.New("cmd token is made of mix of letters or digits and other characters") + return s.hashToken(token), errors.New("cmd token is made of mix of letters or digits and other characters") +} + +func (s *sanitizer) sanitizeToken(token string) string { + if len(token) <= 1 { + // NOTE: do not sanitize single letter tokens + return token + } + if s.isInWhitelist(token) { + return token + } + return s.hashToken(token) +} + +func (s *sanitizer) hashToken(token string) string { + if len(token) <= 0 { + return token + } + // hash with sha1 + h := sha1.New() + h.Write([]byte(token)) + sum := h.Sum(nil) + return s.trimHash(hex.EncodeToString(sum)) +} + +func (s *sanitizer) hashNumericToken(token string) string { + if len(token) <= 0 { + return token + } + h := sha1.New() + h.Write([]byte(token)) + sum := h.Sum(nil) + sumInt := int(binary.LittleEndian.Uint64(sum)) + if sumInt < 0 { + return strconv.Itoa(sumInt * -1) + } + return s.trimHash(strconv.Itoa(sumInt)) +} + +func (s *sanitizer) trimHash(hash string) string { + length := s.hashLength + if length <= 0 || len(hash) < length { + length = len(hash) + } + return hash[:length] +} + +func (s *sanitizer) isInWhitelist(token string) bool { + return s.whitelist[strings.ToLower(token)] == true +} diff --git a/sanitizer_data/copyright_information.md b/sanitizer_data/copyright_information.md new file mode 100644 index 0000000..abdbf33 --- /dev/null +++ b/sanitizer_data/copyright_information.md @@ -0,0 +1,7 @@ +# copyright information + +Whitelist contains content from variety of sources. + +Part of the whitelist (`./whitelist.txt`) is made of copyrighted content from [FileInfo.com](https://fileinfo.com/filetypes/common). + +This content was used with permission from FileInfo.com. diff --git a/sanitizer_data/whitelist.txt b/sanitizer_data/whitelist.txt new file mode 100644 index 0000000..180e9c3 --- /dev/null +++ b/sanitizer_data/whitelist.txt @@ -0,0 +1,1195 @@ + +! +- +. +.. +: +[ +[[ +]] +{ +} +3dm +3ds +3g2 +3gp +7z +accdb +add +addgnupghome +addgroup +addpart +addr2line +add-shell +adduser +agetty +ai +aif +alias +alternatives +apk +app +applydeltarpm +applygnupgdefaults +apt +apt-cache +apt-cdrom +apt-config +apt-get +apt-key +apt-mark +ar +arch +arpd +arping +as +asf +asm +asp +aspx +au +autoload +avi +awk +b +b2sum +badblocks +bak +base32 +base64 +basename +basenc +bash +bashbug +bashbug-64 +bat +bg +bin +bind +bindkey +bisect +blend +blkdeactivate +blkdiscard +blkid +blkzone +blockdev +bmp +boot +bootctl +br +branch +break +bridge +brotli +build-locale-archive +builtin +bunzip2 +busctl +bye +bz2 +bzcat +bzcmp +bzdiff +bzegrep +bzexe +bzfgrep +bzgrep +bzip2 +bzip2recover +bzless +bzmore +c +cab +cal +ca-legacy +caller +capsh +captoinfo +case +cat +catchsegv +cbr +cc +cd +cer +certutil +cfdisk +cfg +c++filt +cfm +cgi +chacl +chage +chardetect +chattr +chcon +chcpu +chdir +checkout +chfn +chgpasswd +chgrp +chkconfig +chmem +chmod +choom +chown +chpasswd +chroot +chrt +chsh +cksum +class +clear +clear_console +clock +clockdiff +clone +cmp +cmsutil +co +code +col +colcrt +colrm +column +com +combinedeltarpm +comm +command +commit +compadd +comparguments +compcall +compctl +compdescribe +compfiles +compgen +compgroups +complete +compopt +compquote +compset +comptags +comptry +compvalues +conf +continue +convert +coproc +coredumpctl +cp +cpgr +cpio +cpl +cpp +cppw +cracklib-check +cracklib-format +cracklib-packer +cracklib-unpacker +crdownload +create-cracklib-dict +crlutil +crx +cs +csplit +csr +css +csv +ctrlaltdel +ctstat +cue +cur +curl +cut +cvtsudoers +cz +dash +dat +date +db +db_archive +db_checkpoint +db_deadlock +db_dump +db_dump185 +dbf +db_hotbackup +db_load +db_log_verify +db_printlog +db_recover +db_replicate +db_stat +db_tuner +db_upgrade +dbus-binding-tool +dbus-broker +dbus-broker-launch +dbus-cleanup-sockets +dbus-daemon +dbus-monitor +dbus-run-session +dbus-send +dbus-test-tool +dbus-update-activation-environment +dbus-uuidgen +db_verify +dcr +dd +dds +de +deb +debconf +debconf-apt-progress +debconf-communicate +debconf-copydb +debconf-escape +debconf-set-selections +debconf-show +deb-systemd-helper +deb-systemd-invoke +debugfs +debuginfo-install +declare +delgroup +delpart +deluser +dem +depmod +deskthemepack +desktop +dev +devlink +df +dgawk +diff +diff3 +dir +dircolors +dirmngr +dirmngr-client +dirname +dirs +disable +disown +dll +dmesg +dmfilemapd +dmg +dmp +dmsetup +dmstats +dnf +dnf-3 +dnsdomainname +do +doc +docker +Dockerfile +docx +domainname +done +dpkg +dpkg-deb +dpkg-divert +dpkg-maintscript-helper +dpkg-preconfigure +dpkg-query +dpkg-reconfigure +dpkg-split +dpkg-statoverride +dpkg-trigger +dracut +drv +dtd +du +dumpe2fs +dwg +dwp +dxf +e2freefrag +e2fsck +e2image +e2label +e2mmpstatus +e2undo +e4crypt +e4defrag +easy_install-3.7 +echo +echotc +echoti +egrep +eject +elfedit +elif +else +emacs +emulate +enable +end +env +eps +esac +etc +eval +evmctl +ex +exe +exec +exit +expand +expiry +export +expr +factor +faillock +faillog +fallocate +false +fc +fdformat +fdisk +fetch +ffmpeg +fg +fgrep +fi +filefrag +fincore +find +findfs +findmnt +find-repos-of-install +fips-finish-install +fips-mode-setup +fish +fla +float +flock +flv +fmt +fnt +fold +fon +for +foreach +free +fsck +fsck.cramfs +fsck.ext2 +fsck.ext3 +fsck.ext4 +fsck.minix +fsfreeze +fstab-decode +fstrim +function +functions +g13 +g13-syshelp +gadget +gam +gapplication +gawk +gdbus +ged +gencat +genl +getcap +getconf +getent +getfacl +getln +getopt +getopts +getpcaps +getty +gif +gio +gio-launch-desktop +gio-querymodules-64 +git +github.com +glib-compile-schemas +glibc_post_upgrade.x86_64 +go +gpasswd +gpg +gpg2 +gpg-agent +gpgconf +gpg-connect-agent +gpg-error +gpgme-json +gpgparsemail +gpgsplit +gpgv +gpgv2 +gpg-wks-server +gpg-zip +gprof +gpx +grep +groupadd +groupdel +groupmems +groupmod +groups +grpck +grpconv +grpunconv +gsettings +gtar +gunzip +gz +gzexe +gzip +h +halt +hardlink +hash +head +heic +help +hexdump +history +home +hostid +hostname +hostnamectl +hqx +htm +html +http +https +hwclock +i386 +icns +ico +iconv +iconvconfig +iconvconfig.x86_64 +ics +id +idn +if +ifenslave +iff +igawk +in +indd +info +infocmp +infokey +infotocap +ini +init +initctl +insmod +install +install-info +installkernel +integer +invoke-rc.d +ionice +ip +ipcmk +ipcrm +ipcs +ir +ischroot +iso +isosize +it +jar +java +jobs +join +journalctl +jpg +jq +js +json +jsp +kernel-install +key +keychain +kill +killall5 +kml +kmod +kmz +kpartx +ksp +kss +kwd +last +lastb +lastlog +lchage +lchfn +lchsh +ld +ldattach +ld.bfd +ldconfig +ldconfig.real +ldd +ld.gold +let +lgroupadd +lgroupdel +lgroupmod +lib +lib64 +lid +limit +link +linux32 +linux64 +ln +lnewusers +lnk +lnstat +local +locale +locale-check +localectl +localedef +localhost +log +logger +login +loginctl +logname +logout +logsave +look +losetup +lost+found +lpasswd +ls +lsattr +lsblk +lscpu +lsinitrd +lsipc +lslocks +lslogins +lsmem +lsmod +lsns +lua +luac +luseradd +luserdel +lusermod +lz4 +lz4c +lz4cat +m +m3u +m4a +m4p +m4v +machinectl +make +makedb +makedeltarpm +make-dummy-cert +Makefile +man +mapfile +master +mawk +max +mcookie +md5 +md5sum +md5sums +md5sum.textutils +mdb +mdf +media +merge +mesg +mid +mim +mkdict +mkdir +mke2fs +mkfifo +mkfs +mkfs.bfs +mkfs.cramfs +mkfs.ext2 +mkfs.ext3 +mkfs.ext4 +mkfs.minix +mkhomedir_helper +mkinitrd +mklost+found +mknod +mkpasswd +mkswap +mktemp +mnt +mo +modinfo +modprobe +modulemd-validator-v1 +modutil +more +mount +mountpoint +mov +mp3 +mp4 +mpa +mpg +msg +msi +mv +namei +nawk +needs-restarting +nes +net +networkctl +newgidmap +newgrp +newuidmap +newusers +nice +nisdomainname +nl +nm +no +nocorrect +noglob +nohup +nologin +nproc +nsenter +nstat +numfmt +o +obj +objcopy +objdump +od +odt +ogg +oldfind +openssl +opt +org +origin +otf +p11-kit +package-cleanup +packer +pager +pages +pam-auth-update +pam_console_apply +pam_extrausers_chkpwd +pam_extrausers_update +pam_getenv +pam_tally +pam_tally2 +pam_timestamp_check +part +partx +passwd +paste +patch +pathchk +pct +pdb +pdf +perl +perl5.26.1 +perl5.28.1 +pgawk +pgrep +php +phps +phtml +pidof +pinentry +pinentry-curses +ping +ping4 +ping6 +pinky +pip-3 +pip3 +pip-3.7 +pip3.7 +pivot_root +pk12util +pkg +pkg-config +pkill +pkl +pl +pldd +pls +plugin +pmap +png +policy-rc.d +popd +portablectl +pov +poweroff +pps +ppt +pptx +pr +prf +print +printenv +printf +private +prlimit +proc +properties +ps +psd +pspimage +ptx +pull +push +pushd +pushln +pwck +pwconv +pwd +pwdx +pwhistory_helper +pwmake +pwscore +pwunconv +py +pyc +pydoc +pydoc3 +pydoc3.7 +pyo +python +python2 +python2.7 +python3 +python3.7 +python3.7m +pyvenv +pyvenv-3.7 +r +ranlib +rar +raw +rbash +rc +rdf +rdisc +rdma +read +readarray +readelf +readlink +readonly +readprofile +realpath +rebase +reboot +rehash +remove-shell +rename +rename.ul +renew-dummy-cert +renice +repeat +repoclosure +repodiff +repo-graph +repomanage +repoquery +repo-rss +reposync +repotrack +reset +resh +resize2fs +resizepart +resolvconf +resolvectl +return +rev +rfkill +rgrep +rm +rmdir +rmmod +rmt +rmt-tar +rom +root +routef +routel +rpcgen +rpm +rpm2archive +rpm2cpio +rpmdb +rpmdumpheader +rpmkeys +rpmquery +rpmverify +rss +rtacct +rtcwake +rtf +rtmon +rtstat +ru +run +runcon +run-help +runlevel +run-parts +runuser +rvi +rview +s +sasldblistusers2 +saslpasswd2 +sav +savelog +sbin +sched +script +scriptreplay +sdf +sdiff +sed +sefcontext_compile +select +select-editor +sensible-browser +sensible-editor +sensible-pager +seq +service +set +setarch +setcap +setfacl +setopt +setpriv +setsid +setterm +setup-nsssysinit +setup-nsssysinit.sh +sfdisk +sg +sh +sha1sum +sha224sum +sha256sum +sha384sum +sha512sum +shadowconfig +share +sh.distrib +shift +shopt +show +show-changed-rco +show-installed +shred +shuf +shutdown +signtool +signver +sitx +size +skill +slabtop +sleep +sln +snice +so +sort +sotruss +source +split +sprof +sql +sqlite3 +srt +srv +ss +ssh +ssltap +start-stop-daemon +stat +status +stdbuf +strings +strip +stty +su +sudo +sudoedit +sudoreplay +sulogin +sum +suspend +svg +swaplabel +swapoff +swapon +swf +swift +switch_root +sync +sys +sysctl +systemctl +systemd-analyze +systemd-ask-password +systemd-cat +systemd-cgls +systemd-cgtop +systemd-coredumpctl +systemd-delta +systemd-detect-virt +systemd-escape +systemd-firstboot +systemd-hwdb +systemd-id128 +systemd-inhibit +systemd-loginctl +systemd-machine-id-setup +systemd-mount +systemd-notify +systemd-nspawn +systemd-path +systemd-resolve +systemd-run +systemd-socket-activate +systemd-stdio-bridge +systemd-sysusers +systemd-tmpfiles +systemd-tty-ask-password-agent +systemd-umount +tabs +tac +tag +tail +tailf +tar +tarcat +taskset +tax2016 +tax2018 +tc +tee +telinit +tempfile +test +testgdbm +tex +tga +tgz +then +thm +tic +tif +tiff +tig +time +timedatectl +timeout +times +tipc +tload +tmp +toast +toe +top +torrent +touch +tput +tr +tracepath +tracepath6 +trap +true +truncate +trust +tset +tsort +ttf +tty +ttyctl +tune2fs +txt +type +typeset +tzconfig +tzselect +udevadm +uk +ul +ulimit +umask +umount +unalias +uname +uname26 +unbound-anchor +uncompress +unexpand +unfunction +unhash +uniq +unix_chkpwd +unix_update +unlimit +unlink +unlz4 +unminimize +unset +unsetopt +unshare +until +unxz +update-alternatives +update-ca-trust +update-crypto-policies +update-mime-database +update-passwd +update-rc.d +uptime +urlgrabber +useradd +userdel +usermod +users +usr +utmpdump +uue +uuidgen +uuidparse +Vagrantfile +var +vared +vb +vcd +vcf +vcxproj +vdir +verifytree +vi +view +vigr +vim +vipw +visudo +vlc +vmstat +vob +w +wait +wall +watch +watchgnupg +wav +wc +wdctl +weak-modules +whence +where +whereis +which +which-command +while +who +whoami +wipefs +wma +wmv +wpd +w.procps +wps +write +wsf +x86_64 +xargs +xbel +xcodeproj +xhtml +xlr +xls +xlsx +xml +xmlcatalog +xmllint +xmlwf +xpm +xsd +xsl +xz +xzcat +xzcmp +xzdec +xzdiff +xzegrep +xzfgrep +xzgrep +xzless +xzmore +yaourt +yes +ypdomainname +yum +yum-builddep +yum-complete-transaction +yum-config-manager +yumdb +yum-debug-dump +yum-debug-restore +yumdownloader +yum-groups-manager +yuv +Z +zcat +zcmp +zcompile +zdiff +zdump +zegrep +zfgrep +zforce +zformat +zgrep +zic +zip +zipx +zle +zless +zmodload +zmore +znew +zparseopts +zramctl +zregexparse +zsh +zstyle diff --git a/shellrc.sh b/shellrc.sh index 671e8eb..cfb1666 100644 --- a/shellrc.sh +++ b/shellrc.sh @@ -153,10 +153,19 @@ __resh_precmd() { __RESH_PWD_AFTER="$PWD" if [ -n "${__RESH_COLLECT}" ]; then if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then - echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})" + source ~/.resh/shellrc + if [ "$__RESH_VERSION" != $(resh-collect -version) ]; then + echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh version: $(resh-collect -version); resh version of this terminal session: ${__RESH_VERSION})" + else + echo "RESH INFO: New RESH shellrc script was loaded - if you encounter any issues please restart this terminal session." + fi elif [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then - echo "resh WARNING: You probably just updated RESH - please restart or reload this terminal session (resh version: $(resh-collect -revision); resh version of this terminal session: ${__RESH_REVISION})" - else + source ~/.resh/shellrc + if [ "$__RESH_REVISION" != $(resh-collect -revision) ]; then + echo "RESH WARNING: You probably just updated RESH - PLEASE RESTART OR RELOAD THIS TERMINAL SESSION (resh revision: $(resh-collect -revision); resh revision of this terminal session: ${__RESH_REVISION})" + fi + fi + if [ "$__RESH_VERSION" == $(resh-collect -version) ] && [ "$__RESH_REVISION" == $(resh-collect -revision) ]; then resh-collect -requireVersion "$__RESH_VERSION" \ -requireRevision "$__RESH_REVISION" \ -cmdLine "$__RESH_CMDLINE" \ diff --git a/version b/version index 524cb55..781dcb0 100644 --- a/version +++ b/version @@ -1 +1 @@ -1.1.1 +1.1.3