| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416 | 
							- package main
 
- import (
 
- 	"bufio"
 
- 	"encoding/json"
 
- 	"fmt"
 
- 	"io/ioutil"
 
- 	"log"
 
- 	"os"
 
- 	"path/filepath"
 
- 	"sort"
 
- 	"strings"
 
- 	"sync"
 
- 	"time"
 
- 	"code.cloudfoundry.org/bytefmt"
 
- 	flag "github.com/spf13/pflag"
 
- 	"wkla.no-ip.biz/gogs/Willie/GoHash/pkg/hash"
 
- )
 
- var hashes map[string]hash.Fdhashes
 
- var ignoreLines []string
 
- var mu sync.RWMutex
 
- var rewrite bool
 
- var prune bool
 
- var outputJson bool
 
- var report string
 
- var ignores string
 
- func init() {
 
- 	flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.")
 
- 	flag.StringVarP(&report, "equals", "e", "", "compare all file hashes and writing a equlatity report.")
 
- 	flag.BoolVarP(&prune, "prune", "p", false, "checking all fdhashes files.")
 
- 	flag.BoolVarP(&outputJson, "json", "j", false, "output as json.")
 
- 	flag.StringVarP(&ignores, "ignores", "i", "", "list of files to ignore in report.")
 
- }
 
- func main() {
 
- 	log.Println("starting GoHash")
 
- 	ignoreLines = make([]string, 0)
 
- 	hashes = make(map[string]hash.Fdhashes)
 
- 	flag.Parse()
 
- 	myFile := flag.Arg(0)
 
- 	if rewrite {
 
- 		log.Println("rewrite active")
 
- 	}
 
- 	if prune {
 
- 		log.Println("prune active")
 
- 	}
 
- 	if outputJson {
 
- 		log.Println("output json format active")
 
- 	}
 
- 	if report != "" {
 
- 		log.Println("report active, file: ", report)
 
- 	}
 
- 	if ignores != "" {
 
- 		log.Println("ignores file: ", ignores)
 
- 	}
 
- 	file, err := os.Stat(myFile)
 
- 	if os.IsNotExist(err) {
 
- 		log.Fatalln("File does not exists:", myFile)
 
- 	}
 
- 	if file.IsDir() {
 
- 		log.Println("start with folder:", myFile)
 
- 		if report != "" {
 
- 			compareFolder(myFile)
 
- 		} else {
 
- 			processFolder(myFile)
 
- 			saveAllHashFiles()
 
- 		}
 
- 	} else {
 
- 		log.Printf("file %s has hash %s\n", myFile, hash.GetFileHash(myFile))
 
- 	}
 
- 	log.Println("done")
 
- }
 
- var lock1 = sync.RWMutex{}
 
- var lock2 = sync.RWMutex{}
 
- func calculateHash(fileStr string) {
 
- 	var hashFile hash.Fdhashes
 
- 	doHash := true
 
- 	dir, fileName := filepath.Split(fileStr)
 
- 	if fileName == ".fdhashes3" {
 
- 		return
 
- 	}
 
- 	// checking if hash is present
 
- 	mu.Lock()
 
- 	hashFile, ok := hashes[dir]
 
- 	if !ok {
 
- 		_, err := os.Stat(dir + ".fdhashes3")
 
- 		if os.IsNotExist(err) || rewrite {
 
- 			hashFile = hash.Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
 
- 		} else {
 
- 			hf, err := hash.LoadHashfile(dir + ".fdhashes3")
 
- 			check(err)
 
- 			hashFile = *hf
 
- 		}
 
- 		hashes[dir] = hashFile
 
- 	}
 
- 	lock1.RLock()
 
- 	_, ok = hashFile.Hashes[fileName]
 
- 	lock1.RUnlock()
 
- 	mu.Unlock()
 
- 	doHash = !ok
 
- 	// checking if dattime is identically
 
- 	file, _ := os.Stat(fileStr)
 
- 	time := file.ModTime()
 
- 	lock2.RLock()
 
- 	savedTime, ok := hashFile.Times[fileName]
 
- 	lock2.RUnlock()
 
- 	if !time.Equal(savedTime) || !ok {
 
- 		doHash = true
 
- 	}
 
- 	if doHash {
 
- 		log.Printf("starting %s\n", fileStr)
 
- 		hash := hash.GetFileHash(fileStr)
 
- 		log.Printf("ready %s\n", fileStr)
 
- 		mu.Lock()
 
- 		lock1.Lock()
 
- 		hashFile.Hashes[fileName] = hash
 
- 		lock1.Unlock()
 
- 		lock2.Lock()
 
- 		hashFile.Times[fileName] = time
 
- 		lock2.Unlock()
 
- 		dirtyHashfile(&hashFile)
 
- 		hashes[dir] = hashFile
 
- 		mu.Unlock()
 
- 		log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash)
 
- 	}
 
- }
 
- var count int
 
- var addWork int
 
- var startTime time.Time
 
- func processFolder(folder string) {
 
- 	startTime = time.Now()
 
- 	count = 0
 
- 	addWork = 0
 
- 	err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
 
- 		count++
 
- 		if (count % 100) == 0 {
 
- 			fmt.Print(".")
 
- 		}
 
- 		if (count % 10000) == 0 {
 
- 			fmt.Println()
 
- 		}
 
- 		filename := info.Name()
 
- 		if filename[0:1] != "." {
 
- 			if info.IsDir() {
 
- 				fmt.Println(path)
 
- 				if prune {
 
- 					pruneHash(path)
 
- 				}
 
- 			}
 
- 			if !info.IsDir() {
 
- 				addWork++
 
- 				calculateHash(path)
 
- 				if time.Since(startTime).Seconds() > 10.0 {
 
- 					startTime = time.Now()
 
- 					saveAllHashFiles()
 
- 					addWork = 0
 
- 				}
 
- 			}
 
- 		}
 
- 		return nil
 
- 	})
 
- 	if err != nil {
 
- 		panic(err)
 
- 	}
 
- }
 
- /* delete unused hash values from the hash file */
 
- func pruneHash(dir string) {
 
- 	_, err := os.Stat(dir + "/.fdhashes3")
 
- 	if !os.IsNotExist(err) {
 
- 		hashFile, err := hash.LoadHashfile(dir + "/.fdhashes3")
 
- 		check(err)
 
- 		for filename := range hashFile.Hashes {
 
- 			_, err := os.Stat(dir + "/" + filename)
 
- 			if os.IsNotExist(err) {
 
- 				delete(hashFile.Hashes, filename)
 
- 				delete(hashFile.Times, filename)
 
- 				hashFile.Dirty = true
 
- 			}
 
- 		}
 
- 		for filename := range hashFile.Times {
 
- 			_, err := os.Stat(dir + "/" + filename)
 
- 			if os.IsNotExist(err) {
 
- 				delete(hashFile.Hashes, filename)
 
- 				delete(hashFile.Times, filename)
 
- 				hashFile.Dirty = true
 
- 			}
 
- 		}
 
- 		saveHashfile(hashFile)
 
- 	}
 
- }
 
- func dirtyHashfile(hashFile *hash.Fdhashes) {
 
- 	hashFile.Dirty = true
 
- }
 
- func saveAllHashFiles() {
 
- 	hashList := make([]hash.Fdhashes, 0)
 
- 	for _, hashFile := range hashes {
 
- 		if hashFile.Dirty {
 
- 			saveHashfile(&hashFile)
 
- 			hashList = append(hashList, hashFile)
 
- 		}
 
- 	}
 
- 	hashes = make(map[string]hash.Fdhashes)
 
- 	for _, hashFile := range hashList {
 
- 		hashes[hashFile.Path] = hashFile
 
- 	}
 
- }
 
- func saveHashfile(hashFile *hash.Fdhashes) {
 
- 	if hashFile.Dirty {
 
- 		hashFile.Dirty = false
 
- 		b, err := json.Marshal(hashFile)
 
- 		if err != nil {
 
- 			fmt.Println(err)
 
- 			return
 
- 		}
 
- 		err = ioutil.WriteFile(hashFile.Path+"/.fdhashes3", b, 0644)
 
- 		if err != nil {
 
- 			panic(err)
 
- 		}
 
- 	}
 
- }
 
- func compareFolder(folder string) {
 
- 	loadIgnoreFile(ignores)
 
- 	loadAllHashFiles(folder)
 
- 	// putting all hashes into one big map key = hash, value list of files with that hash
 
- 	size := len(hashes)
 
- 	index := make(map[string][]string)
 
- 	count = 0
 
- 	for _, hashFile := range hashes {
 
- 		count++
 
- 		if count%100 == 0 {
 
- 			fmt.Printf("%d (%d) merging\n", count, size)
 
- 		}
 
- 		for filename, hash := range hashFile.Hashes {
 
- 			values := index[hash]
 
- 			if values == nil {
 
- 				values = make([]string, 0)
 
- 			}
 
- 			filepath := fmt.Sprintf("%s/%s", hashFile.Path, filename)
 
- 			pos := sort.SearchStrings(ignoreLines, filepath)
 
- 			if pos == len(ignoreLines) {
 
- 				_, err := os.Stat(filepath)
 
- 				if err == nil {
 
- 					values = append(values, filepath)
 
- 					index[hash] = values
 
- 				}
 
- 			}
 
- 		}
 
- 	}
 
- 	// sorting list of files for every hash and deleting hashes with only 1 entry
 
- 	size = len(index)
 
- 	myHashes := make([]string, 0)
 
- 	count = 0
 
- 	for hash, values := range index {
 
- 		count++
 
- 		if count%100 == 0 {
 
- 			fmt.Printf("%d (%d) sorting\n", count, size)
 
- 		}
 
- 		if len(values) > 1 {
 
- 			sort.Strings(values)
 
- 			index[hash] = values
 
- 			myHashes = append(myHashes, hash)
 
- 			//			for _, filename := range values {
 
- 			//				fmt.Printf("  %s\n", filename)
 
- 			//			}
 
- 		} else {
 
- 			delete(index, hash)
 
- 		}
 
- 	}
 
- 	sort.Slice(myHashes, func(i, j int) bool { return index[myHashes[i]][0] < index[myHashes[j]][0] })
 
- 	if outputJson {
 
- 		size = len(index)
 
- 		var filesize int64
 
- 		fileCount := 0
 
- 		for _, hash := range myHashes {
 
- 			values := index[hash]
 
- 			count++
 
- 			if count%100 == 0 {
 
- 				fmt.Printf("%d (%d) checking\n", count, size)
 
- 			}
 
- 			if len(values) > 1 {
 
- 				info, err := os.Stat(values[0])
 
- 				if err == nil {
 
- 					fmt.Printf("found identically hash: %s size: %d\n", hash, info.Size())
 
- 					filesize += int64(len(values)-1) * info.Size()
 
- 				}
 
- 				fileCount += len(values) - 1
 
- 				for _, filename := range values {
 
- 					fmt.Printf("  %s\n", filename)
 
- 				}
 
- 			} else {
 
- 				delete(index, hash)
 
- 			}
 
- 		}
 
- 		b, err := json.Marshal(index)
 
- 		if err != nil {
 
- 			fmt.Println(err)
 
- 			return
 
- 		}
 
- 		err = ioutil.WriteFile(report, b, 0644)
 
- 		if err != nil {
 
- 			panic(err)
 
- 		}
 
- 	} else {
 
- 		size := len(index)
 
- 		f, err := os.Create(report)
 
- 		check(err)
 
- 		w := bufio.NewWriter(f)
 
- 		count := 0
 
- 		var filesize int64
 
- 		fileCount := 0
 
- 		for _, hash := range myHashes {
 
- 			values := index[hash]
 
- 			count++
 
- 			if count%100 == 0 {
 
- 				fmt.Printf("%d (%d) checking\n", count, size)
 
- 			}
 
- 			if len(values) > 1 {
 
- 				info, err := os.Stat(values[0])
 
- 				if err == nil {
 
- 					w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size()))
 
- 					filesize += int64(len(values)-1) * info.Size()
 
- 				}
 
- 				fileCount += len(values) - 1
 
- 				for _, filename := range values {
 
- 					w.WriteString(fmt.Sprintf("  %s\n", filename))
 
- 				}
 
- 				w.Flush()
 
- 			}
 
- 		}
 
- 		w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount))
 
- 		w.Flush()
 
- 	}
 
- }
 
- func loadIgnoreFile(filename string) {
 
- 	content, err := ioutil.ReadFile(filename)
 
- 	if err == nil {
 
- 		ignoreLines = strings.Split(string(content), "\n")
 
- 		sort.Strings(ignoreLines)
 
- 	}
 
- }
 
- func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) {
 
- 	for _, hashFile := range hashes {
 
- 		for filename, hash := range hashFile.Hashes {
 
- 			if (filename != exFilename) && (hashFile.Path != exFilepath) {
 
- 				if hash == srcHash {
 
- 					value += fmt.Sprintf("%s/%s;", hashFile.Path, filename)
 
- 					found = true
 
- 				}
 
- 			}
 
- 		}
 
- 	}
 
- 	return
 
- }
 
- func loadAllHashFiles(folder string) {
 
- 	count = 0
 
- 	addWork = 0
 
- 	err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
 
- 		if info != nil {
 
- 			if info.IsDir() {
 
- 				count++
 
- 				fmt.Print(".")
 
- 				if (count % 100) == 0 {
 
- 					fmt.Println()
 
- 				}
 
- 				hashFile, ok := hashes[path]
 
- 				if !ok {
 
- 					_, err := os.Stat(path + "/.fdhashes3")
 
- 					if os.IsNotExist(err) {
 
- 						hashFile = hash.Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
 
- 					} else {
 
- 						hf, err := hash.LoadHashfile(path + "/.fdhashes3")
 
- 						check(err)
 
- 						hashFile = *hf
 
- 					}
 
- 					hashes[path] = hashFile
 
- 				}
 
- 			}
 
- 		}
 
- 		return nil
 
- 	})
 
- 	check(err)
 
- 	fmt.Printf("\nfound %d hash files.\n", len(hashes))
 
- }
 
- func check(e error) {
 
- 	if e != nil {
 
- 		panic(e)
 
- 	}
 
- }
 
 
  |