main.go 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. package main
  2. import (
  3. "bufio"
  4. "crypto/sha256"
  5. "encoding/hex"
  6. "encoding/json"
  7. "fmt"
  8. "io"
  9. "io/ioutil"
  10. "log"
  11. "os"
  12. "path/filepath"
  13. "sort"
  14. "strings"
  15. "sync"
  16. "time"
  17. "code.cloudfoundry.org/bytefmt"
  18. flag "github.com/spf13/pflag"
  19. "wkla.no-ip.biz/gogs/Willie/GoHash/pkg/hash"
  20. )
  21. var hashes map[string]hash.Fdhashes
  22. var ignoreLines []string
  23. var mu sync.RWMutex
  24. var rewrite bool
  25. var prune bool
  26. var outputJson bool
  27. var report string
  28. var ignores string
  29. func init() {
  30. flag.BoolVarP(&rewrite, "rewrite", "r", false, "rewrite all fhhashes files.")
  31. flag.StringVarP(&report, "equals", "e", "", "compare all file hashes and writing a equlatity report.")
  32. flag.BoolVarP(&prune, "prune", "p", false, "checking all fdhashes files.")
  33. flag.BoolVarP(&outputJson, "json", "j", false, "output as json.")
  34. flag.StringVarP(&ignores, "ignores", "i", "", "list of files to ignore in report.")
  35. }
  36. func main() {
  37. log.Println("starting GoHash")
  38. ignoreLines = make([]string, 0)
  39. hashes = make(map[string]hash.Fdhashes)
  40. flag.Parse()
  41. myFile := flag.Arg(0)
  42. if rewrite {
  43. log.Println("rewrite active")
  44. }
  45. if prune {
  46. log.Println("prune active")
  47. }
  48. if outputJson {
  49. log.Println("output json format active")
  50. }
  51. if report != "" {
  52. log.Println("report active, file: ", report)
  53. }
  54. if ignores != "" {
  55. log.Println("ignores file: ", ignores)
  56. }
  57. file, err := os.Stat(myFile)
  58. if os.IsNotExist(err) {
  59. log.Fatalln("File does not exists:", myFile)
  60. }
  61. if file.IsDir() {
  62. log.Println("start with folder:", myFile)
  63. if report != "" {
  64. compareFolder(myFile)
  65. } else {
  66. processFolder(myFile)
  67. saveAllHashFiles()
  68. }
  69. } else {
  70. log.Printf("file %s has hash %s\n", myFile, getSha256Hash(myFile))
  71. }
  72. log.Println("done")
  73. }
  74. func getSha256Hash(fileStr string) string {
  75. f, err := os.Open(fileStr)
  76. if err != nil {
  77. log.Fatal(err)
  78. }
  79. defer f.Close()
  80. h := sha256.New()
  81. if _, err := io.Copy(h, f); err != nil {
  82. log.Fatal(err)
  83. }
  84. return hex.EncodeToString(h.Sum(nil))
  85. }
  86. var lock1 = sync.RWMutex{}
  87. var lock2 = sync.RWMutex{}
  88. func calculateHash(fileStr string) {
  89. var hashFile hash.Fdhashes
  90. doHash := true
  91. dir, fileName := filepath.Split(fileStr)
  92. if fileName == ".fdhashes3" {
  93. return
  94. }
  95. // checking if hash is present
  96. mu.Lock()
  97. hashFile, ok := hashes[dir]
  98. if !ok {
  99. _, err := os.Stat(dir + ".fdhashes3")
  100. if os.IsNotExist(err) || rewrite {
  101. hashFile = hash.Fdhashes{Path: dir, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
  102. } else {
  103. hf, err := hash.LoadHashfile(dir + ".fdhashes3")
  104. check(err)
  105. hashFile = *hf
  106. }
  107. hashes[dir] = hashFile
  108. }
  109. lock1.RLock()
  110. _, ok = hashFile.Hashes[fileName]
  111. lock1.RUnlock()
  112. mu.Unlock()
  113. doHash = !ok
  114. // checking if dattime is identically
  115. file, _ := os.Stat(fileStr)
  116. time := file.ModTime()
  117. lock2.RLock()
  118. savedTime, ok := hashFile.Times[fileName]
  119. lock2.RUnlock()
  120. if !time.Equal(savedTime) || !ok {
  121. doHash = true
  122. }
  123. if doHash {
  124. log.Printf("starting %s\n", fileStr)
  125. hash := getSha256Hash(fileStr)
  126. log.Printf("ready %s\n", fileStr)
  127. mu.Lock()
  128. lock1.Lock()
  129. hashFile.Hashes[fileName] = hash
  130. lock1.Unlock()
  131. lock2.Lock()
  132. hashFile.Times[fileName] = time
  133. lock2.Unlock()
  134. dirtyHashfile(&hashFile)
  135. hashes[dir] = hashFile
  136. mu.Unlock()
  137. log.Printf("file \"%s\" has hash \"%s\"\n", fileStr, hash)
  138. }
  139. }
  140. var count int
  141. var addWork int
  142. var startTime time.Time
  143. func processFolder(folder string) {
  144. startTime = time.Now()
  145. count = 0
  146. addWork = 0
  147. err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
  148. count++
  149. if (count % 100) == 0 {
  150. fmt.Print(".")
  151. }
  152. if (count % 10000) == 0 {
  153. fmt.Println()
  154. }
  155. filename := info.Name()
  156. if filename[0:1] != "." {
  157. if info.IsDir() {
  158. fmt.Println(path)
  159. if prune {
  160. pruneHash(path)
  161. }
  162. }
  163. if !info.IsDir() {
  164. addWork++
  165. calculateHash(path)
  166. if time.Since(startTime).Seconds() > 10.0 {
  167. startTime = time.Now()
  168. saveAllHashFiles()
  169. addWork = 0
  170. }
  171. }
  172. }
  173. return nil
  174. })
  175. if err != nil {
  176. panic(err)
  177. }
  178. }
  179. /* delete unused hash values from the hash file */
  180. func pruneHash(dir string) {
  181. _, err := os.Stat(dir + "/.fdhashes3")
  182. if !os.IsNotExist(err) {
  183. hashFile, err := hash.LoadHashfile(dir + "/.fdhashes3")
  184. check(err)
  185. for filename := range hashFile.Hashes {
  186. _, err := os.Stat(dir + "/" + filename)
  187. if os.IsNotExist(err) {
  188. delete(hashFile.Hashes, filename)
  189. delete(hashFile.Times, filename)
  190. hashFile.Dirty = true
  191. }
  192. }
  193. for filename := range hashFile.Times {
  194. _, err := os.Stat(dir + "/" + filename)
  195. if os.IsNotExist(err) {
  196. delete(hashFile.Hashes, filename)
  197. delete(hashFile.Times, filename)
  198. hashFile.Dirty = true
  199. }
  200. }
  201. saveHashfile(hashFile)
  202. }
  203. }
  204. func dirtyHashfile(hashFile *hash.Fdhashes) {
  205. hashFile.Dirty = true
  206. }
  207. func saveAllHashFiles() {
  208. hashList := make([]hash.Fdhashes, 0)
  209. for _, hashFile := range hashes {
  210. if hashFile.Dirty {
  211. saveHashfile(&hashFile)
  212. hashList = append(hashList, hashFile)
  213. }
  214. }
  215. hashes = make(map[string]hash.Fdhashes)
  216. for _, hashFile := range hashList {
  217. hashes[hashFile.Path] = hashFile
  218. }
  219. }
  220. func saveHashfile(hashFile *hash.Fdhashes) {
  221. if hashFile.Dirty {
  222. hashFile.Dirty = false
  223. b, err := json.Marshal(hashFile)
  224. if err != nil {
  225. fmt.Println(err)
  226. return
  227. }
  228. err = ioutil.WriteFile(hashFile.Path+"/.fdhashes3", b, 0644)
  229. if err != nil {
  230. panic(err)
  231. }
  232. }
  233. }
  234. func compareFolder(folder string) {
  235. loadIgnoreFile(ignores)
  236. loadAllHashFiles(folder)
  237. // putting all hashes into one big map key = hash, value list of files with that hash
  238. size := len(hashes)
  239. index := make(map[string][]string)
  240. count = 0
  241. for _, hashFile := range hashes {
  242. count++
  243. if count%100 == 0 {
  244. fmt.Printf("%d (%d) merging\n", count, size)
  245. }
  246. for filename, hash := range hashFile.Hashes {
  247. values := index[hash]
  248. if values == nil {
  249. values = make([]string, 0)
  250. }
  251. filepath := fmt.Sprintf("%s/%s", hashFile.Path, filename)
  252. pos := sort.SearchStrings(ignoreLines, filepath)
  253. if pos == len(ignoreLines) {
  254. _, err := os.Stat(filepath)
  255. if err == nil {
  256. values = append(values, filepath)
  257. index[hash] = values
  258. }
  259. }
  260. }
  261. }
  262. // sorting list of files for every hash and deleting hashes with only 1 entry
  263. size = len(index)
  264. myHashes := make([]string, 0)
  265. count = 0
  266. for hash, values := range index {
  267. count++
  268. if count%100 == 0 {
  269. fmt.Printf("%d (%d) sorting\n", count, size)
  270. }
  271. if len(values) > 1 {
  272. sort.Strings(values)
  273. index[hash] = values
  274. myHashes = append(myHashes, hash)
  275. // for _, filename := range values {
  276. // fmt.Printf(" %s\n", filename)
  277. // }
  278. } else {
  279. delete(index, hash)
  280. }
  281. }
  282. sort.Slice(myHashes, func(i, j int) bool { return index[myHashes[i]][0] < index[myHashes[j]][0] })
  283. if outputJson {
  284. size = len(index)
  285. var filesize int64
  286. fileCount := 0
  287. for _, hash := range myHashes {
  288. values := index[hash]
  289. count++
  290. if count%100 == 0 {
  291. fmt.Printf("%d (%d) checking\n", count, size)
  292. }
  293. if len(values) > 1 {
  294. info, err := os.Stat(values[0])
  295. if err == nil {
  296. fmt.Printf("found identically hash: %s size: %d\n", hash, info.Size())
  297. filesize += int64(len(values)-1) * info.Size()
  298. }
  299. fileCount += len(values) - 1
  300. for _, filename := range values {
  301. fmt.Printf(" %s\n", filename)
  302. }
  303. } else {
  304. delete(index, hash)
  305. }
  306. }
  307. b, err := json.Marshal(index)
  308. if err != nil {
  309. fmt.Println(err)
  310. return
  311. }
  312. err = ioutil.WriteFile(report, b, 0644)
  313. if err != nil {
  314. panic(err)
  315. }
  316. } else {
  317. size := len(index)
  318. f, err := os.Create(report)
  319. check(err)
  320. w := bufio.NewWriter(f)
  321. count := 0
  322. var filesize int64
  323. fileCount := 0
  324. for _, hash := range myHashes {
  325. values := index[hash]
  326. count++
  327. if count%100 == 0 {
  328. fmt.Printf("%d (%d) checking\n", count, size)
  329. }
  330. if len(values) > 1 {
  331. info, err := os.Stat(values[0])
  332. if err == nil {
  333. w.WriteString(fmt.Sprintf("found identically hash: size: %d\n", info.Size()))
  334. filesize += int64(len(values)-1) * info.Size()
  335. }
  336. fileCount += len(values) - 1
  337. for _, filename := range values {
  338. w.WriteString(fmt.Sprintf(" %s\n", filename))
  339. }
  340. w.Flush()
  341. }
  342. }
  343. w.WriteString(fmt.Sprintf("can save up to %s on %d files\n", bytefmt.ByteSize(uint64(filesize)), fileCount))
  344. w.Flush()
  345. }
  346. }
  347. func loadIgnoreFile(filename string) {
  348. content, err := ioutil.ReadFile(filename)
  349. if err == nil {
  350. ignoreLines = strings.Split(string(content), "\n")
  351. sort.Strings(ignoreLines)
  352. }
  353. }
  354. func search(srcHash string, exFilename string, exFilepath string) (value string, found bool) {
  355. for _, hashFile := range hashes {
  356. for filename, hash := range hashFile.Hashes {
  357. if (filename != exFilename) && (hashFile.Path != exFilepath) {
  358. if hash == srcHash {
  359. value += fmt.Sprintf("%s/%s;", hashFile.Path, filename)
  360. found = true
  361. }
  362. }
  363. }
  364. }
  365. return
  366. }
  367. func loadAllHashFiles(folder string) {
  368. count = 0
  369. addWork = 0
  370. err := filepath.Walk(folder, func(path string, info os.FileInfo, err error) error {
  371. if info != nil {
  372. if info.IsDir() {
  373. count++
  374. fmt.Print(".")
  375. if (count % 100) == 0 {
  376. fmt.Println()
  377. }
  378. hashFile, ok := hashes[path]
  379. if !ok {
  380. _, err := os.Stat(path + "/.fdhashes3")
  381. if os.IsNotExist(err) {
  382. hashFile = hash.Fdhashes{Path: path, Hashes: make(map[string]string), Times: make(map[string]time.Time), Dirty: true}
  383. } else {
  384. hf, err := hash.LoadHashfile(path + "/.fdhashes3")
  385. check(err)
  386. hashFile = *hf
  387. }
  388. hashes[path] = hashFile
  389. }
  390. }
  391. }
  392. return nil
  393. })
  394. check(err)
  395. fmt.Printf("\nfound %d hash files.\n", len(hashes))
  396. }
  397. func check(e error) {
  398. if e != nil {
  399. panic(e)
  400. }
  401. }