sanitizer works

pull/13/head
Simon Let 6 years ago
parent f08d35eab5
commit b67b4eef13
  1. 3
      common/resh-common.go
  2. 220
      sanitize-history/resh-sanitize-history.go
  3. 23
      sanitizer_data/path_whitelist.txt
  4. 2999
      sanitizer_data/whitelist.txt

@ -60,6 +60,9 @@ type Record struct {
ReshUuid string `json:"reshUuid"` ReshUuid string `json:"reshUuid"`
ReshVersion string `json:"reshVersion"` ReshVersion string `json:"reshVersion"`
ReshRevision string `json:"reshRevision"` ReshRevision string `json:"reshRevision"`
// added by sanitizatizer
CmdLength int `json:"cmdLength"`
} }
type Config struct { type Config struct {

@ -3,6 +3,7 @@ package main
import ( import (
"bufio" "bufio"
"crypto/sha1" "crypto/sha1"
"encoding/binary"
"encoding/hex" "encoding/hex"
"encoding/json" "encoding/json"
"errors" "errors"
@ -14,10 +15,11 @@ import (
"os/user" "os/user"
"path" "path"
"path/filepath" "path/filepath"
"strconv"
"strings" "strings"
"unicode"
"github.com/curusarn/resh/common" "github.com/curusarn/resh/common"
"github.com/mattn/go-shellwords"
giturls "github.com/whilp/git-urls" giturls "github.com/whilp/git-urls"
) )
@ -48,7 +50,7 @@ func main() {
fmt.Println(Revision) fmt.Println(Revision)
os.Exit(0) os.Exit(0)
} }
sanitizer := sanitizer{} sanitizer := sanitizer{hashLength: 4}
err := sanitizer.init(sanitizerDataPath) err := sanitizer.init(sanitizerDataPath)
if err != nil { if err != nil {
log.Fatal("Sanitizer init() error:", err) log.Fatal("Sanitizer init() error:", err)
@ -70,7 +72,7 @@ func main() {
log.Println("Line:", line) log.Println("Line:", line)
return return
} }
err = sanitizer.sanitize(&record) err = sanitizer.sanitizeRecord(&record)
if err != nil { if err != nil {
log.Println("Sanitization error:", err) log.Println("Sanitization error:", err)
log.Println("Line:", line) log.Println("Line:", line)
@ -87,16 +89,13 @@ func main() {
} }
type sanitizer struct { type sanitizer struct {
GlobalWhitelist map[string]bool hashLength int
PathWhitelist map[string]bool whitelist map[string]bool
// CmdWhitelist []string
} }
func (s *sanitizer) init(dataPath string) error { func (s *sanitizer) init(dataPath string) error {
globalData := path.Join(dataPath, "whitelist.txt") globalData := path.Join(dataPath, "whitelist.txt")
s.GlobalWhitelist = loadData(globalData) s.whitelist = loadData(globalData)
pathData := path.Join(dataPath, "path_whitelist.txt")
s.PathWhitelist = loadData(pathData)
return nil return nil
} }
@ -116,7 +115,7 @@ func loadData(fname string) map[string]bool {
return data return data
} }
func (s *sanitizer) sanitize(record *common.Record) error { func (s *sanitizer) sanitizeRecord(record *common.Record) error {
record.Pwd = s.sanitizePath(record.Pwd) record.Pwd = s.sanitizePath(record.Pwd)
record.RealPwd = s.sanitizePath(record.RealPwd) record.RealPwd = s.sanitizePath(record.RealPwd)
record.PwdAfter = s.sanitizePath(record.PwdAfter) record.PwdAfter = s.sanitizePath(record.PwdAfter)
@ -126,51 +125,109 @@ func (s *sanitizer) sanitize(record *common.Record) error {
record.Home = s.sanitizePath(record.Home) record.Home = s.sanitizePath(record.Home)
record.ShellEnv = s.sanitizePath(record.ShellEnv) record.ShellEnv = s.sanitizePath(record.ShellEnv)
record.Host = s.sanitizeTokenDontUseWhitelist(record.Host) record.Host = s.hashToken(record.Host)
record.Uname = s.sanitizeTokenDontUseWhitelist(record.Uname) record.Login = s.hashToken(record.Login)
record.Login = s.sanitizeTokenDontUseWhitelist(record.Login) record.MachineId = s.hashToken(record.MachineId)
record.MachineId = s.sanitizeTokenDontUseWhitelist(record.MachineId)
var err error var err error
// this changes git url a bit but I'm still happy with the result
// e.g. "git@github.com:curusarn/resh" becomes "ssh://git@github.com/3385162f14d7/5a7b2909005c"
// notice the "ssh://" prefix
record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote) record.GitOriginRemote, err = s.sanitizeGitURL(record.GitOriginRemote)
if err != nil { if err != nil {
log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err) log.Println("Error while snitizing GitOriginRemote url", record.GitOriginRemote, ":", err)
return err return err
} }
fmt.Println("....") // sanitization destroys original CmdLine length -> save it
parser := shellwords.NewParser() record.CmdLength = len(record.CmdLine)
args, err := parser.Parse(record.CmdLine) record.CmdLine, err = s.sanitizeCmdLine(record.CmdLine)
if err != nil { if err != nil {
log.Println("Parsing error @ position", parser.Position, ":", err) log.Fatal("Cmd:", record.CmdLine, "; sanitization error:", err)
log.Println("CmdLine:", record.CmdLine)
return err
} }
fmt.Println(args)
return nil return nil
}
func (s *sanitizer) sanitizeCmdLine(cmdLine string) (string, error) {
sanCmdLine := ""
buff := ""
// var tokens []string // simple options shouldn't be sanitized
// word := "" // 1) whitespace 2) "-" or "--" 3) letters, digits, "-", "_" 4) ending whitespace or "="
// for _, char := range strings.Split(, "") { var optionDetected bool
// if unicode.IsSpace([]rune(char)[0]) {
// if len(word) > 0 { prevR3 := ' '
// tokens = append(tokens, word) prevR2 := ' '
// word = "" prevR := ' '
// } for _, r := range cmdLine {
// tokens = append(tokens, char) switch optionDetected {
// } else { case true:
// word += char if unicode.IsSpace(r) || r == '=' || r == ';' {
// } // whitespace, "=" or ";" ends the option
// } // => add option unsanitized
// if len(word) > 0 { optionDetected = false
// tokens = append(tokens, word) if len(buff) > 0 {
// } sanCmdLine += buff
// for _, token := range tokens { buff = ""
// fmt.Println(token) }
// } sanCmdLine += string(r)
// return nil } else if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false && r != '-' && r != '_' {
// r is not any of allowed chars for an option: letter, digit, "-" or "_"
// => sanitize
if len(buff) > 0 {
sanToken, err := s.sanitizeCmdToken(buff)
if err != nil {
return cmdLine, err
}
sanCmdLine += sanToken
buff = ""
}
sanCmdLine += string(r)
} else {
buff += string(r)
}
case false:
// split command on all non-letter and non-digit characters
if unicode.IsLetter(r) == false && unicode.IsDigit(r) == false {
// split token
if len(buff) > 0 {
sanToken, err := s.sanitizeCmdToken(buff)
if err != nil {
return cmdLine, err
}
sanCmdLine += sanToken
buff = ""
}
sanCmdLine += string(r)
} else {
if (unicode.IsSpace(prevR2) && prevR == '-') ||
(unicode.IsSpace(prevR3) && prevR2 == '-' && prevR == '-') {
optionDetected = true
}
buff += string(r)
}
}
prevR3 = prevR2
prevR2 = prevR
prevR = r
}
if len(buff) <= 0 {
// nothing in the buffer => work is done
return sanCmdLine, nil
}
if optionDetected {
// option detected => dont sanitize
sanCmdLine += buff
return sanCmdLine, nil
}
// sanitize
sanToken, err := s.sanitizeCmdToken(buff)
if err != nil {
return cmdLine, err
}
sanCmdLine += sanToken
return sanCmdLine, nil
} }
func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) { func (s *sanitizer) sanitizeGitURL(rawURL string) (string, error) {
@ -216,8 +273,8 @@ func (s *sanitizer) sanitizeParsedURL(parsedURL *url.URL) (string, error) {
func (s *sanitizer) sanitizePath(path string) string { func (s *sanitizer) sanitizePath(path string) string {
var sanPath string var sanPath string
for _, token := range strings.Split(path, "/") { for _, token := range strings.Split(path, "/") {
if s.PathWhitelist[token] != true { if s.whitelist[token] != true {
token = s.sanitizeToken(token) token = s.hashToken(token)
} }
sanPath += token + "/" sanPath += token + "/"
} }
@ -238,19 +295,55 @@ func (s *sanitizer) sanitizeTwoPartToken(token string, delimeter string) (string
return token, errors.New("Token has more than two parts") return token, errors.New("Token has more than two parts")
} }
func (s *sanitizer) sanitizeToken(token string) string { func (s *sanitizer) sanitizeCmdToken(token string) (string, error) {
return s._sanitizeToken(token, true) // there shouldn't be tokens with letters or digits mixed together with symbols
if len(token) <= 0 {
return token, nil
}
if s.whitelist[token] == true {
return token, nil
} }
func (s *sanitizer) sanitizeTokenDontUseWhitelist(token string) string { isLettersOrDigits := true
return s._sanitizeToken(token, false) isDigits := true
isOtherCharacters := true
for _, r := range token {
if unicode.IsDigit(r) == false && unicode.IsLetter(r) == false {
isLettersOrDigits = false
isDigits = false
}
if unicode.IsDigit(r) == false {
isDigits = false
}
if unicode.IsDigit(r) || unicode.IsLetter(r) {
isOtherCharacters = false
}
}
if isDigits {
return s.hashNumericToken(token), nil
}
if isLettersOrDigits {
return s.hashToken(token), nil
}
if isOtherCharacters {
return token, nil
}
log.Println("token:", token)
return token, errors.New("cmd token is made of mix of letters or digits and other characters")
} }
func (s *sanitizer) _sanitizeToken(token string, useWhitelist bool) string { func (s *sanitizer) sanitizeToken(token string) string {
if len(token) <= 0 { if len(token) <= 0 {
return token return token
} }
if useWhitelist == true && s.GlobalWhitelist[token] == true { if s.whitelist[token] {
return token
}
return s.hashToken(token)
}
func (s *sanitizer) hashToken(token string) string {
if len(token) <= 0 {
return token return token
} }
// hash with sha1 // hash with sha1
@ -258,5 +351,30 @@ func (s *sanitizer) _sanitizeToken(token string, useWhitelist bool) string {
h := sha1.New() h := sha1.New()
h.Write([]byte(token)) h.Write([]byte(token))
sum := h.Sum(nil) sum := h.Sum(nil)
return hex.EncodeToString(sum)[:12] // TODO: extend hashes to 12
return s.trimHash(hex.EncodeToString(sum))
}
func (s *sanitizer) hashNumericToken(token string) string {
if len(token) <= 0 {
return token
}
// hash with fnv
// trim to 12 characters
h := sha1.New()
h.Write([]byte(token))
sum := h.Sum(nil)
sumInt := int(binary.LittleEndian.Uint64(sum))
if sumInt < 0 {
return strconv.Itoa(sumInt * -1)
}
return s.trimHash(strconv.Itoa(sumInt))
}
func (s *sanitizer) trimHash(hash string) string {
length := s.hashLength
if length <= 0 || len(hash) < length {
length = len(hash)
}
return hash[:length]
} }

@ -1,23 +0,0 @@
.
..
bin
boot
dev
etc
home
lib
lib64
lost+found
media
mnt
opt
proc
root
run
sbin
srv
sys
tmp
usr
var

File diff suppressed because it is too large Load Diff
Loading…
Cancel
Save