package main import ( "bufio" "crypto/sha256" "encoding/hex" "encoding/json" "fmt" "io" "io/ioutil" "log" "os" "path/filepath" "runtime" "sort" "sync" "time" "code.cloudfoundry.org/bytefmt" flag "github.com/spf13/pflag" ) // Fdhashes struct for holding all informations about one folder. type Fdhashes struct { Path string Hashes map[string]string Times map[string]time.Time Dirty bool } var hashes map[string]Fdhashes var mu sync.RWMutex var driveLetter string var rewrite bool var prune bool var outputJson bool var report string func init() { flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.") flag.StringVarP(&report, "equals", "e", "", "compare all file hashes and writing a equlatity report.") flag.BoolVarP(&prune, "prune", "p", false, "checking all fdhashes files.") flag.BoolVarP(&outputJson, "json", "j", false, "output as json.") } func main() { log.Println("starting GoHash") hashes = make(map[string]Fdhashes) flag.Parse() myFile := flag.Arg(0) file, err := os.Stat(myFile) if os.IsNotExist(err) { log.Fatalln("File does not exists:", myFile) } if file.IsDir() { log.Println("start with folder:", myFile) driveLetter = "" if runtime.GOOS == "windows" { driveLetter = filepath.VolumeName(myFile) + "/" } if report != "" { compareFolder(myFile) } else { processFolder(myFile) saveAllHashFiles() } } else { log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile)) } log.Println("done") } func getSha256Hash(fileStr string) string { f, err := os.Open(fileStr) if err != nil { log.Fatal(err) } defer f.Close() h := sha256.New() if _, err := io.Copy(h, f); err != nil { log.Fatal(err) } return hex.EncodeToString(h.Sum(nil)) } var lock1 = sync.RWMutex{} var lock2 = sync.RWMutex{} func calculateHash(fileStr string) { var hashFile Fdhashes doHash := true dir, fileName := filepath.Split(fileStr) if fileName == ".fdhashes3" { return } // checking if hash is present mu.Lock() hashFile, ok := hashes[dir] if !ok { _, err := os.Stat(dir + ".fdhashes3") if os.IsNotExist(err) { hashFile = Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true} } else { hashFile = loadHashfile(dir + ".fdhashes3") } hashes[dir] = hashFile } lock1.RLock() _, ok = hashFile.Hashes[fileName] lock1.RUnlock() mu.Unlock() doHash = !ok // checking if dattime is identically file, _ := os.Stat(fileStr) time := file.ModTime() lock2.RLock() savedTime, ok := hashFile.Times[fileName] lock2.RUnlock() if !time.Equal(savedTime) || !ok { doHash = true } if doHash { log.Printf("starting %s\n", fileStr) hash := getSha256Hash(fileStr) log.Printf("ready %s\n", fileStr) mu.Lock() lock1.Lock() hashFile.Hashes[fileName] = hash lock1.Unlock() lock2.Lock() hashFile.Times[fileName] = time lock2.Unlock() dirtyHashfile(&hashFile) hashes[dir] = hashFile mu.Unlock() log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash) } } var count int var addWork int var startTime time.Time func processFolder(folder string) { startTime = time.Now() count = 0 addWork = 0 err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error { count++ if (count % 100) == 0 { fmt.Print(".") } if (count % 10000) == 0 { fmt.Println() } filename := info.Name() if filename[0:1] != "." { if info.IsDir() { fmt.Println(path) if prune { pruneHash(path) } } if !info.IsDir() { addWork++ calculateHash(path) if time.Since(startTime).Seconds() > 10.0 { startTime = time.Now() saveAllHashFiles() addWork = 0 } } } return nil }) if err != nil { panic(err) } } /* delete unused hash values from the hash file */ func pruneHash(dir string) { _, err := os.Stat(dir + "/.fdhashes3") if !os.IsNotExist(err) { hashFile := loadHashfile(dir + "/.fdhashes3") for filename := range hashFile.Hashes { _, err := os.Stat(dir + "/" + filename) if os.IsNotExist(err) { delete(hashFile.Hashes, filename) delete(hashFile.Times, filename) hashFile.Dirty = true } } for filename := range hashFile.Times { _, err := os.Stat(dir + "/" + filename) if os.IsNotExist(err) { delete(hashFile.Hashes, filename) delete(hashFile.Times, filename) hashFile.Dirty = true } } saveHashfile(&hashFile) } } func dirtyHashfile(hashFile *Fdhashes) { hashFile.Dirty = true } func saveAllHashFiles() { hashList := make([]Fdhashes, 0) for _, hashFile := range hashes { if hashFile.Dirty { saveHashfile(&hashFile) hashList = append(hashList, hashFile) } } hashes = make(map[string]Fdhashes) for _, hashFile := range hashList { hashes[hashFile.Path] = hashFile } } func saveHashfile(hashFile *Fdhashes) { if hashFile.Dirty { hashFile.Dirty = false b, err := json.Marshal(hashFile) if err != nil { fmt.Println(err) return } err = ioutil.WriteFile(hashFile.Path+"/.fdhashes3", b, 0644) if err != nil { panic(err) } } } func loadHashfile(fileStr string) Fdhashes { dir, _ := filepath.Split(fileStr) dir = filepath.ToSlash(filepath.Clean(dir)) data := Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: false} if !rewrite { file, err := ioutil.ReadFile(fileStr) if err != nil { panic(err) } err = json.Unmarshal([]byte(file), &data) if err != nil { log.Printf("can't read file %s", fileStr) } } if data.Path != dir { data.Path = dir data.Dirty = true } return data } func compareFolder(folder string) { loadAllHashFiles(folder) // putting all hashes into one big map key = hash, value list of files with that hash index := make(map[string][]string) for _, hashFile := range hashes { for filename, hash := range hashFile.Hashes { values := index[hash] if values == nil { values = make([]string, 0) } values = append(values, fmt.Sprintf("%s/%s", hashFile.Path, filename)) index[hash] = values } } // sorting list of files for every hash and deleting hashes with only 1 entry size := len(index) myHashes := make([]string, 0) for hash, values := range index { count++ if count%100 == 0 { fmt.Printf("%d (%d) sorting\n", count, size) } if len(values) > 1 { sort.Strings(values) index[hash] = values myHashes = append(myHashes, hash) // for _, filename := range values { // fmt.Printf(" %s\n", filename) // } } else { delete(index, hash) } } sort.Slice(myHashes, func(i, j int) bool { return index[myHashes[i]][0] < index[myHashes[j]][0] }) if outputJson { size = len(index) var filesize int64 fileCount := 0 for _, hash := range myHashes { values := index[hash] count++ if count%100 == 0 { fmt.Printf("%d (%d) checking\n", count, size) } if len(values) > 1 { info, err := os.Stat(values[0]) if err == nil { fmt.Printf("found identically hash: %s size: %d\n", hash, info.Size()) filesize += int64(len(values)-1) * info.Size() } fileCount += len(values) - 1 for _, filename := range values { fmt.Printf(" %s\n", filename) } } else { delete(index, hash) } } b, err := json.Marshal(index) if err != nil { fmt.Println(err) return } err = ioutil.WriteFile(report, b, 0644) if err != nil { panic(err) } } else { size := len(index) f, err := os.Create(report) check(err) w := bufio.NewWriter(f) count := 0 var filesize int64 fileCount := 0 for _, hash := range myHashes { values := index[hash] count++ if count%100 == 0 { fmt.Printf("%d (%d) checking\n", count, size) } if len(values) > 1 { info, err := os.Stat(values[0]) if err == nil { w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size())) filesize += int64(len(values)-1) * info.Size() } fileCount += len(values) - 1 for _, filename := range values { w.WriteString(fmt.Sprintf(" %s\n", filename)) } w.Flush() } } w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount)) w.Flush() } } func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) { for _, hashFile := range hashes { for filename, hash := range hashFile.Hashes { if (filename != exFilename) && (hashFile.Path != exFilepath) { if hash == srcHash { value += fmt.Sprintf("%s/%s;", hashFile.Path, filename) found = true } } } } return } func loadAllHashFiles(folder string) { count = 0 addWork = 0 err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error { if info != nil { if info.IsDir() { count++ fmt.Print(".") if (count % 100) == 0 { fmt.Println() } hashFile, ok := hashes[path] if !ok { _, err := os.Stat(path + "/.fdhashes3") if os.IsNotExist(err) { hashFile = Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true} } else { hashFile = loadHashfile(path + "/.fdhashes3") } hashes[path] = hashFile } } } return nil }) if err != nil { panic(err) } fmt.Printf("\nfound %d hash files.\n", len(hashes)) } func check(e error) { if e != nil { panic(e) } }