|
@@ -0,0 +1,433 @@
|
|
|
|
+package main
|
|
|
|
+
|
|
|
|
+import (
|
|
|
|
+ "bufio"
|
|
|
|
+ "crypto/sha256"
|
|
|
|
+ "encoding/hex"
|
|
|
|
+ "encoding/json"
|
|
|
|
+ "fmt"
|
|
|
|
+ "io"
|
|
|
|
+ "io/ioutil"
|
|
|
|
+ "log"
|
|
|
|
+ "os"
|
|
|
|
+ "path/filepath"
|
|
|
|
+ "sort"
|
|
|
|
+ "strings"
|
|
|
|
+ "sync"
|
|
|
|
+ "time"
|
|
|
|
+
|
|
|
|
+ "code.cloudfoundry.org/bytefmt"
|
|
|
|
+
|
|
|
|
+ flag "github.com/spf13/pflag"
|
|
|
|
+ "wkla.no-ip.biz/gogs/Willie/GoHash/pkg/hash"
|
|
|
|
+)
|
|
|
|
+
|
|
|
|
+var hashes map[string]hash.Fdhashes
|
|
|
|
+var ignoreLines []string
|
|
|
|
+var mu sync.RWMutex
|
|
|
|
+
|
|
|
|
+var rewrite bool
|
|
|
|
+var prune bool
|
|
|
|
+var outputJson bool
|
|
|
|
+var report string
|
|
|
|
+var ignores string
|
|
|
|
+
|
|
|
|
+func init() {
|
|
|
|
+ flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.")
|
|
|
|
+ flag.StringVarP(&report, "equals", "e", "", "compare all file hashes and writing a equlatity report.")
|
|
|
|
+ flag.BoolVarP(&prune, "prune", "p", false, "checking all fdhashes files.")
|
|
|
|
+ flag.BoolVarP(&outputJson, "json", "j", false, "output as json.")
|
|
|
|
+ flag.StringVarP(&ignores, "ignores", "i", "", "list of files to ignore in report.")
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func main() {
|
|
|
|
+ log.Println("starting GoHash")
|
|
|
|
+ ignoreLines = make([]string, 0)
|
|
|
|
+ hashes = make(map[string]hash.Fdhashes)
|
|
|
|
+ flag.Parse()
|
|
|
|
+ myFile := flag.Arg(0)
|
|
|
|
+ if rewrite {
|
|
|
|
+ log.Println("rewrite active")
|
|
|
|
+ }
|
|
|
|
+ if prune {
|
|
|
|
+ log.Println("prune active")
|
|
|
|
+ }
|
|
|
|
+ if outputJson {
|
|
|
|
+ log.Println("output json format active")
|
|
|
|
+ }
|
|
|
|
+ if report != "" {
|
|
|
|
+ log.Println("report active, file: ", report)
|
|
|
|
+ }
|
|
|
|
+ if ignores != "" {
|
|
|
|
+ log.Println("ignores file: ", ignores)
|
|
|
|
+ }
|
|
|
|
+ file, err := os.Stat(myFile)
|
|
|
|
+ if os.IsNotExist(err) {
|
|
|
|
+ log.Fatalln("File does not exists:", myFile)
|
|
|
|
+ }
|
|
|
|
+ if file.IsDir() {
|
|
|
|
+ log.Println("start with folder:", myFile)
|
|
|
|
+
|
|
|
|
+ if report != "" {
|
|
|
|
+ compareFolder(myFile)
|
|
|
|
+ } else {
|
|
|
|
+ processFolder(myFile)
|
|
|
|
+ saveAllHashFiles()
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile))
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ log.Println("done")
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func getSha256Hash(fileStr string) string {
|
|
|
|
+ f, err := os.Open(fileStr)
|
|
|
|
+ if err != nil {
|
|
|
|
+ log.Fatal(err)
|
|
|
|
+ }
|
|
|
|
+ defer f.Close()
|
|
|
|
+
|
|
|
|
+ h := sha256.New()
|
|
|
|
+ if _, err := io.Copy(h, f); err != nil {
|
|
|
|
+ log.Fatal(err)
|
|
|
|
+ }
|
|
|
|
+ return hex.EncodeToString(h.Sum(nil))
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+var lock1 = sync.RWMutex{}
|
|
|
|
+var lock2 = sync.RWMutex{}
|
|
|
|
+
|
|
|
|
+func calculateHash(fileStr string) {
|
|
|
|
+ var hashFile hash.Fdhashes
|
|
|
|
+ doHash := true
|
|
|
|
+ dir, fileName := filepath.Split(fileStr)
|
|
|
|
+ if fileName == ".fdhashes3" {
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ // checking if hash is present
|
|
|
|
+ mu.Lock()
|
|
|
|
+ hashFile, ok := hashes[dir]
|
|
|
|
+ if !ok {
|
|
|
|
+ _, err := os.Stat(dir + ".fdhashes3")
|
|
|
|
+ if os.IsNotExist(err) || rewrite {
|
|
|
|
+ hashFile = hash.Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
|
|
|
|
+ } else {
|
|
|
|
+ hf, err := hash.LoadHashfile(dir + ".fdhashes3")
|
|
|
|
+ check(err)
|
|
|
|
+ hashFile = *hf
|
|
|
|
+ }
|
|
|
|
+ hashes[dir] = hashFile
|
|
|
|
+ }
|
|
|
|
+ lock1.RLock()
|
|
|
|
+ _, ok = hashFile.Hashes[fileName]
|
|
|
|
+ lock1.RUnlock()
|
|
|
|
+ mu.Unlock()
|
|
|
|
+ doHash = !ok
|
|
|
|
+ // checking if dattime is identically
|
|
|
|
+ file, _ := os.Stat(fileStr)
|
|
|
|
+ time := file.ModTime()
|
|
|
|
+ lock2.RLock()
|
|
|
|
+ savedTime, ok := hashFile.Times[fileName]
|
|
|
|
+ lock2.RUnlock()
|
|
|
|
+ if !time.Equal(savedTime) || !ok {
|
|
|
|
+ doHash = true
|
|
|
|
+ }
|
|
|
|
+ if doHash {
|
|
|
|
+ log.Printf("starting %s\n", fileStr)
|
|
|
|
+ hash := getSha256Hash(fileStr)
|
|
|
|
+ log.Printf("ready %s\n", fileStr)
|
|
|
|
+ mu.Lock()
|
|
|
|
+ lock1.Lock()
|
|
|
|
+ hashFile.Hashes[fileName] = hash
|
|
|
|
+ lock1.Unlock()
|
|
|
|
+
|
|
|
|
+ lock2.Lock()
|
|
|
|
+ hashFile.Times[fileName] = time
|
|
|
|
+ lock2.Unlock()
|
|
|
|
+ dirtyHashfile(&hashFile)
|
|
|
|
+ hashes[dir] = hashFile
|
|
|
|
+ mu.Unlock()
|
|
|
|
+ log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+var count int
|
|
|
|
+var addWork int
|
|
|
|
+var startTime time.Time
|
|
|
|
+
|
|
|
|
+func processFolder(folder string) {
|
|
|
|
+ startTime = time.Now()
|
|
|
|
+ count = 0
|
|
|
|
+ addWork = 0
|
|
|
|
+ err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
|
|
|
|
+ count++
|
|
|
|
+ if (count % 100) == 0 {
|
|
|
|
+ fmt.Print(".")
|
|
|
|
+ }
|
|
|
|
+ if (count % 10000) == 0 {
|
|
|
|
+ fmt.Println()
|
|
|
|
+ }
|
|
|
|
+ filename := info.Name()
|
|
|
|
+ if filename[0:1] != "." {
|
|
|
|
+ if info.IsDir() {
|
|
|
|
+ fmt.Println(path)
|
|
|
|
+ if prune {
|
|
|
|
+ pruneHash(path)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ if !info.IsDir() {
|
|
|
|
+ addWork++
|
|
|
|
+ calculateHash(path)
|
|
|
|
+ if time.Since(startTime).Seconds() > 10.0 {
|
|
|
|
+ startTime = time.Now()
|
|
|
|
+ saveAllHashFiles()
|
|
|
|
+ addWork = 0
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return nil
|
|
|
|
+ })
|
|
|
|
+ if err != nil {
|
|
|
|
+ panic(err)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+/* delete unused hash values from the hash file */
|
|
|
|
+func pruneHash(dir string) {
|
|
|
|
+ _, err := os.Stat(dir + "/.fdhashes3")
|
|
|
|
+ if !os.IsNotExist(err) {
|
|
|
|
+ hashFile, err := hash.LoadHashfile(dir + "/.fdhashes3")
|
|
|
|
+ check(err)
|
|
|
|
+ for filename := range hashFile.Hashes {
|
|
|
|
+ _, err := os.Stat(dir + "/" + filename)
|
|
|
|
+ if os.IsNotExist(err) {
|
|
|
|
+ delete(hashFile.Hashes, filename)
|
|
|
|
+ delete(hashFile.Times, filename)
|
|
|
|
+ hashFile.Dirty = true
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ for filename := range hashFile.Times {
|
|
|
|
+ _, err := os.Stat(dir + "/" + filename)
|
|
|
|
+ if os.IsNotExist(err) {
|
|
|
|
+ delete(hashFile.Hashes, filename)
|
|
|
|
+ delete(hashFile.Times, filename)
|
|
|
|
+ hashFile.Dirty = true
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ saveHashfile(hashFile)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func dirtyHashfile(hashFile *hash.Fdhashes) {
|
|
|
|
+ hashFile.Dirty = true
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func saveAllHashFiles() {
|
|
|
|
+ hashList := make([]hash.Fdhashes, 0)
|
|
|
|
+
|
|
|
|
+ for _, hashFile := range hashes {
|
|
|
|
+ if hashFile.Dirty {
|
|
|
|
+ saveHashfile(&hashFile)
|
|
|
|
+ hashList = append(hashList, hashFile)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ hashes = make(map[string]hash.Fdhashes)
|
|
|
|
+ for _, hashFile := range hashList {
|
|
|
|
+ hashes[hashFile.Path] = hashFile
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func saveHashfile(hashFile *hash.Fdhashes) {
|
|
|
|
+ if hashFile.Dirty {
|
|
|
|
+ hashFile.Dirty = false
|
|
|
|
+ b, err := json.Marshal(hashFile)
|
|
|
|
+ if err != nil {
|
|
|
|
+ fmt.Println(err)
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ err = ioutil.WriteFile(hashFile.Path+"/.fdhashes3", b, 0644)
|
|
|
|
+ if err != nil {
|
|
|
|
+ panic(err)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func compareFolder(folder string) {
|
|
|
|
+ loadIgnoreFile(ignores)
|
|
|
|
+ loadAllHashFiles(folder)
|
|
|
|
+ // putting all hashes into one big map key = hash, value list of files with that hash
|
|
|
|
+ size := len(hashes)
|
|
|
|
+ index := make(map[string][]string)
|
|
|
|
+ count = 0
|
|
|
|
+ for _, hashFile := range hashes {
|
|
|
|
+ count++
|
|
|
|
+ if count%100 == 0 {
|
|
|
|
+ fmt.Printf("%d (%d) merging\n", count, size)
|
|
|
|
+ }
|
|
|
|
+ for filename, hash := range hashFile.Hashes {
|
|
|
|
+ values := index[hash]
|
|
|
|
+ if values == nil {
|
|
|
|
+ values = make([]string, 0)
|
|
|
|
+ }
|
|
|
|
+ filepath := fmt.Sprintf("%s/%s", hashFile.Path, filename)
|
|
|
|
+ pos := sort.SearchStrings(ignoreLines, filepath)
|
|
|
|
+ if pos == len(ignoreLines) {
|
|
|
|
+ _, err := os.Stat(filepath)
|
|
|
|
+ if err == nil {
|
|
|
|
+ values = append(values, filepath)
|
|
|
|
+ index[hash] = values
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ // sorting list of files for every hash and deleting hashes with only 1 entry
|
|
|
|
+ size = len(index)
|
|
|
|
+ myHashes := make([]string, 0)
|
|
|
|
+ count = 0
|
|
|
|
+ for hash, values := range index {
|
|
|
|
+ count++
|
|
|
|
+ if count%100 == 0 {
|
|
|
|
+ fmt.Printf("%d (%d) sorting\n", count, size)
|
|
|
|
+ }
|
|
|
|
+ if len(values) > 1 {
|
|
|
|
+ sort.Strings(values)
|
|
|
|
+ index[hash] = values
|
|
|
|
+ myHashes = append(myHashes, hash)
|
|
|
|
+ // for _, filename := range values {
|
|
|
|
+ // fmt.Printf(" %s\n", filename)
|
|
|
|
+ // }
|
|
|
|
+ } else {
|
|
|
|
+ delete(index, hash)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ sort.Slice(myHashes, func(i, j int) bool { return index[myHashes[i]][0] < index[myHashes[j]][0] })
|
|
|
|
+
|
|
|
|
+ if outputJson {
|
|
|
|
+ size = len(index)
|
|
|
|
+ var filesize int64
|
|
|
|
+ fileCount := 0
|
|
|
|
+ for _, hash := range myHashes {
|
|
|
|
+ values := index[hash]
|
|
|
|
+ count++
|
|
|
|
+ if count%100 == 0 {
|
|
|
|
+ fmt.Printf("%d (%d) checking\n", count, size)
|
|
|
|
+ }
|
|
|
|
+ if len(values) > 1 {
|
|
|
|
+ info, err := os.Stat(values[0])
|
|
|
|
+ if err == nil {
|
|
|
|
+ fmt.Printf("found identically hash: %s size: %d\n", hash, info.Size())
|
|
|
|
+ filesize += int64(len(values)-1) * info.Size()
|
|
|
|
+ }
|
|
|
|
+ fileCount += len(values) - 1
|
|
|
|
+ for _, filename := range values {
|
|
|
|
+ fmt.Printf(" %s\n", filename)
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ delete(index, hash)
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ b, err := json.Marshal(index)
|
|
|
|
+ if err != nil {
|
|
|
|
+ fmt.Println(err)
|
|
|
|
+ return
|
|
|
|
+ }
|
|
|
|
+ err = ioutil.WriteFile(report, b, 0644)
|
|
|
|
+ if err != nil {
|
|
|
|
+ panic(err)
|
|
|
|
+ }
|
|
|
|
+ } else {
|
|
|
|
+ size := len(index)
|
|
|
|
+ f, err := os.Create(report)
|
|
|
|
+ check(err)
|
|
|
|
+ w := bufio.NewWriter(f)
|
|
|
|
+ count := 0
|
|
|
|
+ var filesize int64
|
|
|
|
+ fileCount := 0
|
|
|
|
+ for _, hash := range myHashes {
|
|
|
|
+ values := index[hash]
|
|
|
|
+ count++
|
|
|
|
+ if count%100 == 0 {
|
|
|
|
+ fmt.Printf("%d (%d) checking\n", count, size)
|
|
|
|
+ }
|
|
|
|
+ if len(values) > 1 {
|
|
|
|
+ info, err := os.Stat(values[0])
|
|
|
|
+ if err == nil {
|
|
|
|
+ w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size()))
|
|
|
|
+ filesize += int64(len(values)-1) * info.Size()
|
|
|
|
+ }
|
|
|
|
+ fileCount += len(values) - 1
|
|
|
|
+ for _, filename := range values {
|
|
|
|
+ w.WriteString(fmt.Sprintf(" %s\n", filename))
|
|
|
|
+ }
|
|
|
|
+ w.Flush()
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount))
|
|
|
|
+ w.Flush()
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func loadIgnoreFile(filename string) {
|
|
|
|
+ content, err := ioutil.ReadFile(filename)
|
|
|
|
+ if err == nil {
|
|
|
|
+ ignoreLines = strings.Split(string(content), "\n")
|
|
|
|
+ sort.Strings(ignoreLines)
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) {
|
|
|
|
+ for _, hashFile := range hashes {
|
|
|
|
+ for filename, hash := range hashFile.Hashes {
|
|
|
|
+ if (filename != exFilename) && (hashFile.Path != exFilepath) {
|
|
|
|
+ if hash == srcHash {
|
|
|
|
+ value += fmt.Sprintf("%s/%s;", hashFile.Path, filename)
|
|
|
|
+ found = true
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func loadAllHashFiles(folder string) {
|
|
|
|
+ count = 0
|
|
|
|
+ addWork = 0
|
|
|
|
+ err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
|
|
|
|
+ if info != nil {
|
|
|
|
+ if info.IsDir() {
|
|
|
|
+ count++
|
|
|
|
+ fmt.Print(".")
|
|
|
|
+ if (count % 100) == 0 {
|
|
|
|
+ fmt.Println()
|
|
|
|
+ }
|
|
|
|
+ hashFile, ok := hashes[path]
|
|
|
|
+ if !ok {
|
|
|
|
+ _, err := os.Stat(path + "/.fdhashes3")
|
|
|
|
+ if os.IsNotExist(err) {
|
|
|
|
+ hashFile = hash.Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
|
|
|
|
+ } else {
|
|
|
|
+ hf, err := hash.LoadHashfile(path + "/.fdhashes3")
|
|
|
|
+ check(err)
|
|
|
|
+ hashFile = *hf
|
|
|
|
+ }
|
|
|
|
+ hashes[path] = hashFile
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ }
|
|
|
|
+ return nil
|
|
|
|
+ })
|
|
|
|
+ check(err)
|
|
|
|
+ fmt.Printf("\nfound %d hash files.\n", len(hashes))
|
|
|
|
+}
|
|
|
|
+
|
|
|
|
+func check(e error) {
|
|
|
|
+ if e != nil {
|
|
|
|
+ panic(e)
|
|
|
|
+ }
|
|
|
|
+}
|