wow, filesystem stats was HARD

2022-12-07 20:37:14 +00:00
parent 0ad5b9d435
commit 7443fc80ae
3 changed files with 1315 additions and 0 deletions
--- a/day7-directories-cleanup.lisp
+++ b/day7-directories-cleanup.lisp
@@ -0,0 +1,284 @@
+;; https://adventofcode.com/2022/day/7
+
+;; so, right now if i need to calculate sum of the size of files.
+;; but, there could be duplicates.
+;; so, i suppose i'll need to maintain the full filename?
+;; so, track the current directory? and on $ ls read lines until next $
+;; and put them into hashtable, under full name. ok. ugh
+;;
+;; so types of commands:
+;; cd / - drop current path
+;; cd <name> - add <name> to current path
+;; cd .. - drop head of current path
+;; ls - read in filenames, and add them with current path and size into hashmap
+;;
+;; then what? go through the hashmap and do calculation
+;; with current task, i don't really need to process
+;; dir d - names of the directories after $ ls
+;; but, how to do pleasant parsing of the lines?
+;; and how to store the state, if I'm reading things one line at a time?
+;; possibly with DO macro again
+
+;; i guess is could be one function that takes in line and returns new state
+;; so state would be
+;; - list of directories
+;; - hashtable of (filename -> size)
+
+;; i could split string, and try to do case pattern match
+(ql:quickload 'alexandria)
+
+(require 'cl-ppcre)
+
+(let* ((line  "dir a")
+       (line-list (cl-ppcre:split " " line)))
+  (cond ((equal '("$" "cd" "/") line-list) 'ROOT)
+        ((equal '("$" "cd") (subseq line-list 0 2)) 'CD)
+        ((equal '("$" "ls") (subseq line-list 0 2)) 'LS)
+        ((equal '("dir") (subseq line-list 0 1)) 'DIR)
+        ((integerp (parse-integer (first line-list))) 'FILE)
+        (t 'OTHER)))
+
+
+;; CASE or COND
+
+(subseq '(1 2 3) 0 2)
+(integerp (parse-integer (first (cl-ppcre:split " " "14848514 b.txt"))))
+(integerp "1")
+(parse-integer "1")
+(parse-integer "r")
+
+(defun what (n)
+  (format t "~S~%" n))
+
+(defun my-parse-line (line)
+  (let ((line-list (cl-ppcre:split " " line)))
+    (cond ((equal '("$" "cd" "/") line-list) 'ROOT)
+          ((equal '("$" "cd") (subseq line-list 0 2)) 'CD)
+          ((equal '("$" "ls") (subseq line-list 0 2)) 'LS)
+          ((equal '("dir") (subseq line-list 0 1)) 'DIR)
+          ((integerp (parse-integer (first line-list))) 'FILE)
+          (t 'OTHER))))
+
+;; (integerp (parse-integer (first (cl-ppcre:split " " "$ cd /"))))
+
+(mapcar #'my-parse-line '(
+                       "$ cd /"
+                       "$ ls"
+                       "dir a"
+                       "14848514 b.txt"
+                       "8504156 c.dat"
+                       "dir d"
+                       "$ cd a"
+                       ))
+
+;; next step is to utilize parse line to change state, i guess
+
+(ql:quickload 'fset)
+
+(defparameter *test-dir-list* ())
+(defparameter *test-dir-set* (fset:empty-set))
+(defparameter *test-file-sizes* (make-hash-table))
+
+(let ((current-path-dirs '())
+      (file-sizes (make-hash-table))
+      (dirset (fset:empty-set)))
+  (labels ((my-full-file-name (dirs lastName)
+             (let* ((all-dirs (reverse (concatenate 'list (list lastName)
+                                                    dirs))))
+               (format nil "~{~a~^/~}" all-dirs)))
+           (ingest-line (line)
+             (let ((line-list (cl-ppcre:split " " line)))
+               (cond ((equal '("$" "cd" "/")
+                             line-list)
+                      (setf current-path-dirs '()))
+                     ((equal '("$" "cd" "..")
+                             line-list)
+                      (pop  current-path-dirs))
+                     ((equal '("$" "cd")
+                             (subseq line-list 0 2))
+                      (push (third line-list) current-path-dirs))
+                     ((equal '("$" "ls")
+                             (subseq line-list 0 2))
+                      ;; do i need to do anything if just act on the file?
+                      )
+                     ((equal '("dir")
+                             (subseq line-list 0 1))
+                      (setf dirset (fset:with dirset (my-full-file-name current-path-dirs (second line-list)))))
+                     ((integerp (parse-integer (first line-list)))
+                      (let ((file-name (my-full-file-name current-path-dirs (second line-list)))
+                            (file-size (parse-integer (first line-list))))
+                        (setf (gethash file-name file-sizes) file-size)))))))
+    (with-open-file (in "day7-input.txt")
+    ;; with-open-file (in "day7-test-input.txt")
+      (loop
+        for line = (read-line in nil nil)
+        while line
+        do (ingest-line line))))
+  (setf *test-dir-list* current-path-dirs)
+  (setf *test-file-sizes* file-sizes)
+  (setf *test-dir-set* dirset))
+
+*test-dir-list*
+*test-dir-set*
+*test-file-sizes*
+;; now let's iterate over keys in the sizes
+(print  (loop
+          for k being each hash-key of *test-file-sizes* using (hash-value v)
+          do (format t "~a => ~a~%" k v)))
+;; ok, popping doesn't happen i think
+;; fixed
+;; now i want / in the beginning
+
+;; now let's loop over dirs in set. and loop over keys in the hashtable
+;; and sum values, and collect sum if it's < 100000
+;;
+;; well. it's for sure! should be different value
+;; hmw/tsrqvpbq/dqbnbl/mbc/nqrdmvjm
+;; is also a file
+;; hmw/tsrqvpbq/dqbnbl/mbc/nqrdmvjm.vtq => 137158
+;; but if all such directories are above 100k then they wouldn't matter.
+;; ugh. let's sort lines
+
+(fset:do-set (dirname *test-dir-set*)
+  (print dirname))
+;; but I guess i'll need to just map with fset:image
+;; and put sum of all files there
+;; let's first return all files for which dir is prefix
+(fset:image (lambda (dir) `(,dir imagined)) *test-dir-set*)
+
+(print (fset:reduce #'+
+                    (fset:filter (lambda (sumed)
+                                   (>= 100000 sumed))
+                                 (fset:image (lambda (dir)
+                                               (loop
+                                                 for filename being each hash-key of *test-file-sizes* using (hash-value filesize)
+                                                 when (alexandria:starts-with-subseq (concatenate 'string dir "/") filename )
+                                                   summing filesize)) *test-dir-set*))))
+;; oh, shit. it's set, so duplicates of the freaking same sizes get dropped.
+;; so, i need to calculate differently
+(print  (let ((total-sum 0))
+          (fset:do-set (dirname *test-dir-set*)
+            (let ((dir-size (loop
+                              for filename being each hash-key of *test-file-sizes* using (hash-value filesize)
+                              when (alexandria:starts-with-subseq (concatenate 'string dirname "/")
+                                                                  filename )
+                                summing filesize)))
+              (if (> 100000 dir-size)
+                  (incf total-sum dir-size))))
+          total-sum))
+
+;; crap
+;; with / 159935456
+;; without / 160367201
+;; but if i'm not filtering, then they should be same? wtf
+;; or like i'm not counting top level?
+
+;; wow. summing instead of collect
+;; now filter those that less than 10000 and sum again
+;; ok, i guess.
+;; now with a different file?
+;;
+;; wrong answer 1265478
+;;
+;; oh, you tricky people
+;; there's dir.file that matches as prefix. ugh
+;; but then my value should be more that required? ugh twice
+
+(fset:image (lambda (dir)
+              (loop
+                for filename being each hash-key of *test-file-sizes* using (hash-value filesize)
+                when (alexandria:starts-with-subseq (concatenate 'string dir "/")
+                                                    filename )
+                  collect (list filename filesize)))
+            *test-dir-set*)
+
+(string-prefix-p "a" "aab")
+(alexandria:starts-with "a" "aab")
+(alexandria:starts-with-subseq "a/" "a/ab")
+(alexandria:starts-with-subseq "" "aab")
+;; ok. hello
+
+;; error - tried to use FLET* for allowing recursion, again went to stackoverflow.
+
+;; oh. i need to sum over dirs, ugh.
+;; now that's more complicated now.
+;; so. then maybe i'd want to register DIRs in another hashtable?
+;; and then for each dir collect all files that start with that prefix and sum?
+;; would be O(n^2) but ok, i guess
+
+(defparameter *test-set* (fset:empty-set))
+(fset:with *test-set* "hello")          ; already uses #'EQUAL , cool
+
+(concatenate 'string "hello" "another" "yay")
+;; yay!
+;; thank you https://stackoverflow.com/questions/5457346/lisp-function-to-concatenate-a-list-of-strings
+(format nil "~{~a~^/~}" '("hello" "this" "one"))
+
+(concatenate 'list '("name") '("efim" "home"))
+(defun my-full-file-name (dirs lastName)
+  (let* ((all-dirs (reverse (concatenate 'list (list lastName) dirs))))
+    (format nil "/~{~a~^/~}" all-dirs)))
+
+(my-full-file-name '("eifm" "home") "Documents")
+
+
+;; well. ok. now what? maybe i don't need to count root anyway?
+
+;; ugh. can i build a tree then?
+;; have pointer? ugh/
+
+;;; so, just start anew? ugh
+
+;;; TO THE PART 2
+;; need to find /smallest/ directory to delete, so that free space would be 30000000 our of 70000000
+;; so I need "total sum of all"
+
+;; copying over code to calculate sum
+;; oh, shit. it's set, so duplicates of the freaking same sizes get dropped.
+;; so, i need to calculate differently
+(print  (let ((total-sum 0))
+          (fset:do-set (dirname *test-dir-set*)
+            (let ((dir-size (loop
+                              for filename being each hash-key of *test-file-sizes* using (hash-value filesize)
+                              when (alexandria:starts-with-subseq (concatenate 'string dirname "/")
+                                                                  filename )
+                                summing filesize)))
+              (incf total-sum dir-size)))
+          total-sum))
+;; 166378694 ; oh that's sum with the duplicates. ugh
+
+;; (- 70000000 166378694)
+;; i need direct sum, just over the lines.
+;; luckily this should be easier? or no? well, sum over the file-hashtable, there are all unique
+
+(print (loop
+         for filename being each hash-key of *test-file-sizes* using (hash-value filesize)
+         summing filesize))
+
+(print (- 70000000 44795677))
+;; wait, no. I need 30000000
+;; so, that's my free memory right now: 25204323
+;; to free is
+(print (- 30000000 25204323))
+;; to free 4795677
+;; now i need for all dir sizes find one that is more than than, but the smallest
+
+(fset:filter (lambda (item)
+               (< 4795677 (first item))) (fset:image (lambda (dir)
+                                                        (list (loop
+                                                                for filename being each hash-key of *test-file-sizes* using (hash-value filesize)
+                                                                when (alexandria:starts-with-subseq (concatenate 'string dir "/")
+                                                                                                    filename )
+                                                                  summing filesize) dir))
+               *test-dir-set*))
+;; and it shows sorted, and the first one - is the dir to be deleted.
+;; cooooool. it was very hard.
+;;
+;; what are lessons:
+;; image (mapping) on set discards duplicates. lot's of time spent debugging this
+;; also - i need to learn threading.
+;; maybe that's the way to make code simpler.
+;; but then i won't be able to call it iteratevly? i really still should.
+;;
+;; so, go to Alexandria for threading, and for string things.
+;; and maybe read about functions for hashmaps and such. ugh.
--- a/day7-input.txt
+++ b/day7-input.txt
--- a/day7-test-input.txt
+++ b/day7-test-input.txt
@@ -0,0 +1,23 @@
+$ cd /
+$ ls
+dir a
+1 b.txt
+1 c.dat
+dir d
+$ cd a
+$ ls
+dir e
+1 f
+1 g
+1 h.lst
+$ cd e
+$ ls
+1 i
+$ cd ..
+$ cd ..
+$ cd d
+$ ls
+1 j
+1 d.log
+1 d.ext
+1 k