diff --git a/subvolume.py b/0_check-btrfs-sub-size-diff__original.py
old mode 100644
new mode 100755
similarity index 83%
rename from subvolume.py
rename to 0_check-btrfs-sub-size-diff__original.py
index bd0c87b..454143f
--- a/subvolume.py
+++ b/0_check-btrfs-sub-size-diff__original.py
@@ -24,8 +24,8 @@
import math, array
from functools import lru_cache
import multiprocessing,os
+#from sortedcontainers import SortedList
import bisect
-
#function to convert a pair of positive integers to a single integer
#we want to decrease memory consumption, thus we need this trick
#http://szudzik.com/ElegantPairing.pdf
@@ -221,7 +221,7 @@ def add(self,tree,key,start,stop,inode):
#do not search deep, since we add one subvolume at a time
ranges=[]
for myrange,snapshotinodelist in extent.items():
- if find_snapshot_in_list(snapshotinodelist,tree,False):
+ if find_snapshot_in_list(snapshotinodelist,tree,True):
ranges.append(myrange)
ranges.sort()
@@ -274,9 +274,9 @@ def add(self,tree,key,start,stop,inode):
#if newbase differs in an end of base then that end must
#be deleted because the end of the interval will be added
if base.lower>target.lower and newbase.lower!=base.lower:
- self.delete_range(key,base.lower,tree,False)
+ self.delete_range(key,base.lower,tree,True)
if base.upper.
+
+import btrfs
+import argparse
+import sys
+from collections import deque
+from collections import Counter
+from collections import defaultdict
+import math, array
+from functools import lru_cache
+import multiprocessing,os
+#from sortedcontainers import SortedList
+import bisect
+#function to convert a pair of positive integers to a single integer
+#we want to decrease memory consumption, thus we need this trick
+#http://szudzik.com/ElegantPairing.pdf
+#cache the results for speed up
+
+@lru_cache(maxsize=1024)
+def unique_number(x,y):
+ result=x
+ if x >=y:
+ result+=y
+ result+=x**2
+ else:
+ result+=y**2
+ return result
+
+#undo the above function, return x,y based on a single number
+#also cache the results
+
+
+################ Obtain Subvolume Path Function ##################################
+
+import subprocess # Needed for external command(btrfs) execution
+
+def get_subvolume_path(snapshot, subpath):
+ """
+ Retrieves the path of a subvolume based on its snapshot ID.
+
+ Args:
+ snapshot: The integer ID of the snapshot.
+
+ Returns:
+ The path of the subvolume or None if not found.
+ """
+
+ try:
+ if snapshot == 5:
+ return "///// Top-Level(ID:5) /////"
+
+ # Get subvolume list output
+ subvols_output = subprocess.run(["sudo", "btrfs", "subvolume", "list", f"{subpath}"], input=subpath, capture_output=True, text=True).stdout
+
+
+
+ # Filter by snapshot ID using grep (modify grep pattern if needed)
+ filtered_output = subprocess.run(["grep", f"{snapshot} gen"], input=subvols_output, capture_output=True, text=True).stdout
+
+ # Extract subvolume path using awk
+ path = subprocess.run(["awk", "{print $NF}"], input=filtered_output, capture_output=True, text=True).stdout.strip()
+ return path
+
+ except subprocess.CalledProcessError:
+ # Handle potential errors from subprocess calls (e.g., not found)
+ return None
+
+################ Obtain Subvolume Path Function ##################################
+
+
+@lru_cache(maxsize=1024)
+def unique_to_pair(number):
+ root=int(math.floor(math.sqrt(number)))
+ crit=number-root**2
+ if crit< root:
+ x=crit
+ y=root
+ else:
+ x=root
+ y=crit-root
+ return x,y
+
+#take a list of 'paired' numbers and return the x coordinate, which is snapshot
+#stored into the pair
+def return_snapshots(mylist):
+ result=[]
+ for item in mylist:
+ snapshot,_=unique_to_pair(item)
+ result.append(snapshot)
+ return result
+
+#take a list of 'paired' numbers and return the paired number that has the same
+#x coordinate, which is the snapshot stored into the pair. if shallow search only
+#the last element
+def return_coded(mylist,snapshot,shallow):
+ if shallow:
+ snapshot_pair,_=unique_to_pair(mylist[-1])
+ if snapshot_pair == snapshot:
+ return mylist[-1]
+ return None
+ for item in mylist:
+ snapshot_pair,_=unique_to_pair(item)
+ if snapshot_pair == snapshot:
+ return item
+ return None
+
+
+#take a paired number and compare it with a snapshot
+#cache the results for speedup
+@lru_cache(maxsize=1024)
+def compare_pair_to_snapshot(item,snapshot):
+ snapshot_pair,_=unique_to_pair(item)
+ if snapshot_pair == snapshot:
+ return True
+ return False
+
+#find an item that has the same subvolume and remove it
+def remove_snapshot_from_list(mylist,snapshot,shallow):
+ coded =return_coded(mylist,snapshot,shallow)
+ if coded != None:
+ mylist.remove(coded)
+ return mylist
+
+#find if an item with the same snapshot exists in the list
+def find_snapshot_in_list(mylist,snapshot,shallow):
+ if shallow:
+ if compare_pair_to_snapshot(mylist[-1],snapshot):
+ return True
+ else:
+ return False
+ for item in mylist:
+ if compare_pair_to_snapshot(item,snapshot):
+ return True
+ return False
+
+#function to calculate subtraction between 2 integer intervals
+def range_sub(range1,range2):
+ result=[]
+ a,b=range1
+ x,y=range2
+ if x>b or y=b:
+ if x>a:
+ b=x-1
+ else:
+ return result
+ else:
+ if x>a:
+ result.append((a,x-1))
+ a=y+1
+ result.append((a,b))
+ return result
+
+
+#Class to implement intervals, we only care about subtraction
+class Ranges:
+ def __init__(self,start=None,stop=None):
+ self.list=[]
+ self.upper=None
+ self.lower=None
+ if start!=None:
+ self.list.append((start,stop))
+ self.upper=stop
+ self.lower=start
+
+ def is_empty(self):
+ if len(self.list)>=1:
+ return False
+ return True
+
+ def __sub__(self,other):
+ final_result=Ranges()
+ queue=deque(self.list)
+
+ while queue:
+ item = queue.popleft()
+ #this could be optiomized more
+ for otheritem in other.list:
+ result=range_sub(item,otheritem)
+ if len(result)>1:
+ queue.appendleft(result[1])
+ item=result[0]
+ elif len(result)==1:
+ item=result[0]
+ else:
+ item=None
+ break
+ #print item
+ if item !=None:
+ final_result.append(item)
+ return final_result
+
+ def append(self,myrange):
+ self.list.append(myrange)
+ self.upper=myrange[1]
+ if self.lower==None:
+ self.lower=myrange[0]
+
+ def __str__(self):
+ return(str(self.list))
+
+#Class to hold data. It's a dictionary of dictionaries.
+#tree[key of the extent]= {range1: [list of paired (snapshot,inode)],range2: [list of paired (snapshot,inode)]}
+#inodes data are used to find which files hold data of unique extents.
+class TreeWrapper:
+ def __init__(self):
+ self._tree=dict()
+ self._snapshots=[]
+ #self._inodes=dict()
+
+
+ #check if the current tree has data for this extent/key.
+ #if it has, check if the current extent range is already parsed.
+ def delete_range(self,key,limit,tree,shallow):
+ snapshotinodelist=self._tree[key][limit]
+ #print(snapshotinodelist)
+ remove_snapshot_from_list(snapshotinodelist,tree,shallow)
+ #print(self._tree[key][limit])
+ if len(self._tree[key][limit])==0:
+ del self._tree[key][limit]
+
+ #use array instead of list because integers consume too much memory in python
+ def add_range(self,key,limit,mypair):
+ if key in self._tree.keys():
+ if limit in self._tree[key].keys():
+ self._tree[key][limit].append(mypair)
+ else:
+ self._tree[key][limit]=array.array('Q')
+ self._tree[key][limit].append(mypair)
+ else:
+ self._tree[key]=dict()
+ self._tree[key][limit]=array.array('Q')
+ self._tree[key][limit].append(mypair)
+
+ #unfortunately some extents reappear, maybe there are dedup or reflink?
+ #so we need to take care of that by calculating exactly the data that each
+ #subvolume uses
+ def add(self,tree,key,start,stop,inode):
+ mypair=unique_number(tree,inode)
+ if key in self._tree.keys():
+ extent=self._tree[key]
+ #find all ranges that have data for this subvolume in this extent
+ #do not search deep, since we add one subvolume at a time
+ ranges=[]
+ for myrange,snapshotinodelist in extent.items():
+ if find_snapshot_in_list(snapshotinodelist,tree,True):
+ ranges.append(myrange)
+ ranges.sort()
+
+ #start of intervals for this key and tree are in even positions
+ #ends are in odd positions
+ starts=ranges[::2]
+ stops=ranges[1::2]
+ if len(starts)!=len(stops):
+ print("problem",key,ranges)
+ sys.exit(0)
+
+ #if the data we are trying to push already exist, ignore them
+ if start in starts:
+ index=starts.index(start)
+ if stop == stops[index]:
+ #print(ranges,start,stop)
+ return
+
+ #Algorithm: we have these intervals: 0...100, 150...200
+ #and we want to add 80...170
+ #the final result must be 0...200 because this extent is used
+ #interely by this snapshot
+
+ #For each base, calculate base - target. If the base
+ #interval is modified then delete that end because new data will
+ #be added. Then target becomes target-base and continue with the next base
+
+ #try to minimize the subtractions needed
+ realstart=bisect.bisect_left(starts,start)
+ realstop=bisect.bisect_right(stops,stop)
+ if realstart > 0:
+ #print(realstart,starts,start)
+ realstart-=1
+ mystarts=starts[realstart:realstop+1]
+ mystops=stops[realstart:realstop+1]
+ if len(mystarts)>0:
+ if mystops[-1]0:
+ if mystarts[0]>stop:
+ mystarts=mystarts[1:]
+
+ #target is the interval we are trying to add
+ target=Ranges(start,stop)
+ for i, oldstart in enumerate(mystarts):
+ #base is the interval that we must analyze
+ base=Ranges(oldstart,mystops[i])
+ newbase=base-target
+ #if newbase differs in an end of base then that end must
+ #be deleted because the end of the interval will be added
+ if base.lower>target.lower and newbase.lower!=base.lower:
+ self.delete_range(key,base.lower,tree,True)
+ if base.upper=1:
+ try:
+ size=iterableview[i+1][0]-myrange
+ result+=size
+ except:
+ print(extent,sorted(rangedict.items()),mytuple)
+ return result
+
+ #find those ranges that have only one snapshot, if this snapshot is deleted
+ #this space will be freed.
+ #based on the scenario of transform is should return:
+ #result[tree1]=pos2-pos1+pos4-pos3
+ #result[tree2]=0
+ #if files are analyzed use the inode data to find them ans store them in different dictionary.
+ def find_unique(self,fs,analyze_file):
+ result=Counter()
+ result_data=defaultdict(set)
+ for extent,rangedict in self._tree.items():
+ iterableview = sorted(rangedict.items())
+ for i,mytuple in enumerate(iterableview):
+ myrange,unique_pair_list=mytuple
+ #myset=list(myset)
+ if len(unique_pair_list)==1:
+ subvolume,inode=unique_to_pair(unique_pair_list[0])
+ size=iterableview[i+1][0]-myrange
+ result[subvolume]+=size
+ #result[myset[0]]+=size
+ #print(inode)
+ if analyze_file:
+ try:
+ file=btrfs.ioctl.ino_lookup(fs.fd,subvolume,inode)
+ result_data[file.name_bytes.decode('utf-8')].add(subvolume)
+ except:
+ print("Inode not found",inode)
+ return result,result_data
+
+ #helper function to find the size of the extend ranges that have the desired snapshots
+ def find_snapshots_size(self,wanted,not_wanted):
+ result=0
+ for extent,rangedict in self._tree.items():
+ rangelist = sorted(rangedict.keys())
+ for i,myrange in enumerate(rangelist):
+ snapshots=set(return_snapshots(rangedict[myrange]))
+ if len(set(wanted) & snapshots)>0 and len(set(not_wanted) & snapshots) ==0:
+ try:
+ result+=rangelist[i+1]-myrange
+ except:
+ print(wanted,not_wanted)
+ print(extent,sorted(rangedict.items()),myrange)
+ return result
+
+ #the active subvolume must be the last one
+ def add_snapshots(self,snapshots):
+ self._snapshots=snapshots.copy()
+
+ #calculate the size of ranges ontop of the previous subvolume
+ #older subvolumes must be first in subvolume list
+ def find_snapshot_size_to_previous(self):
+ results=Counter()
+ for i, snapshot in enumerate(self._snapshots):
+ if i>0:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[self._snapshots[i-1]])
+ else:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[])
+ return results
+
+ #calculate the size of ranges ontop of the current active subvolume
+ def find_snapshot_size_to_current(self):
+ results=Counter()
+ current=self._snapshots[-1]
+ for snapshot in self._snapshots:
+ if snapshot == current:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[])
+ else:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[current])
+ return results
+
+#try to optimize parsing by piping, but to no avail
+
+def disk_parse_pipe(pipe,path,tree):
+ print("Parsing subvolume:",tree)
+ fs=btrfs.FileSystem(path)
+ min_key=btrfs.ctree.Key(0,btrfs.ctree.EXTENT_DATA_KEY,0)
+ for header, data in btrfs.ioctl.search_v2(fs.fd, tree,min_key):
+ if header.type == btrfs.ctree.EXTENT_DATA_KEY:
+ datum=btrfs.ctree.FileExtentItem(header,data)
+ if datum.type != btrfs.ctree.FILE_EXTENT_INLINE:# and datum.disk_bytenr !=0:
+ key=unique_number(datum.disk_bytenr,datum.disk_num_bytes)
+ #key = pool.apply(unique_number, (datum.disk_bytenr,datum.disk_num_bytes,))
+ stop=datum.offset+datum.num_bytes
+ #key=res.get()
+ pipe.send((key,datum.offset,stop,datum.key.objectid))
+ pipe.send(None)
+ pipe.close()
+ os.close(fs.fd)
+ del fs
+
+
+
+def pipe_add(data_tree,path,tree,analyze_files):
+ parent_conn, child_conn = multiprocessing.Pipe(False)
+ p = multiprocessing.Process(target=disk_parse_pipe, args=(child_conn,path,tree,))
+ p.start()
+ while True:
+ res=parent_conn.recv()
+ if res !=None:
+ if analyze_files:
+ data_tree.add(tree,res[0],res[1],res[2],res[3])
+ else:
+ data_tree.add(tree,res[0],res[1],res[2],0)
+ else:
+ break
+ p.join()
+
+
+#try to optimize parsing by using multiprocessing
+
+
+#return the data to add for this extent
+#unfortunately we have to open each time the filesystem and reparse partially the
+#data
+def actual_extent_parsing(item):
+ header,path,tree=item
+ result=None
+ fs=btrfs.FileSystem(path)
+ key=btrfs.ctree.Key(header.objectid,btrfs.ctree.EXTENT_DATA_KEY,header.offset)
+ for header,data in btrfs.ioctl.search_v2(fs.fd, tree,key,nr_items=1):
+ datum=btrfs.ctree.FileExtentItem(header,data)
+ if datum.type != btrfs.ctree.FILE_EXTENT_INLINE:# and datum.disk_bytenr !=0:
+ key=unique_number(datum.disk_bytenr,datum.disk_num_bytes)
+ stop=datum.offset+datum.num_bytes
+ result = (key,datum.offset,stop,datum.key.objectid)
+ os.close(fs.fd)
+ del fs
+ return result
+
+
+#main function to parse data from disk, generate 'interesting' extents
+def generate_extents(path,tree):
+ #print("Parsing subvolume:",tree,path)
+ #pool = multiprocessing.Pool(processes=1)
+ fs=btrfs.FileSystem(path)
+ min_key=btrfs.ctree.Key(0,btrfs.ctree.EXTENT_DATA_KEY,0)
+ for header, _ in btrfs.ioctl.search_v2(fs.fd, tree,min_key):
+ if header.type == btrfs.ctree.EXTENT_DATA_KEY:
+ yield header,path,tree
+ os.close(fs.fd)
+ del fs
+ return
+
+
+#parallelize parsing, return the data without order what is the best value for chunk size?
+def disk_parse_parallel(pool,data_tree,path,tree,analyze_files):
+ print("Parsing subvolume:",tree)
+ #pool = multiprocessing.Pool(processes=4)
+ #fs=btrfs.FileSystem(path)
+ for res in pool.imap_unordered(actual_extent_parsing, generate_extents(path,tree),128):
+ #print(res)
+ if res!=None:
+ if analyze_files:
+ data_tree.add(tree,res[0],res[1],res[2],res[3])
+ else:
+ data_tree.add(tree,res[0],res[1],res[2],0)
+
+
+
+#main function to parse data from disk and add the to the tree of extents, sequentially
+def disk_parse(data_tree,fs,tree,analyze_files):
+ print("Parsing subvolume:",tree)
+ #pool = multiprocessing.Pool(processes=1)
+ min_key=btrfs.ctree.Key(0,btrfs.ctree.EXTENT_DATA_KEY,0)
+ for header, data in btrfs.ioctl.search_v2(fs.fd, tree,min_key):
+ if header.type == btrfs.ctree.EXTENT_DATA_KEY:
+ datum=btrfs.ctree.FileExtentItem(header,data)
+ if datum.type != btrfs.ctree.FILE_EXTENT_INLINE:# and datum.disk_bytenr !=0:
+ key=unique_number(datum.disk_bytenr,datum.disk_num_bytes)
+ #key = pool.apply(unique_number, (datum.disk_bytenr,datum.disk_num_bytes,))
+ stop=datum.offset+datum.num_bytes
+ #key=res.get()
+ if analyze_files:
+ data_tree.add(tree,key,datum.offset,stop,datum.key.objectid)
+ else:
+ data_tree.add(tree,key,datum.offset,stop,0)
+
+def main():
+ multiprocessing.set_start_method('spawn')
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-u","--unique",action='store_true',help="calculate only unique data, -r argument makes no sense if -u is active")
+ parser.add_argument("-f","--files",action='store_true',help="find filenames that exist in unique extents")
+ parser.add_argument("path", type=str,
+ help="path of the btrfs filesystem")
+ parser.add_argument("-r", "--root", type=int,default=5,
+ help="current active subvolume to analyze first, default is 5")
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('-i', '--ignore', action='store_true',help="Do not analyze the specified subvolumes")
+ group.add_argument('-o', '--only', action='store_true',help="Analyze only the specified subvolumes")
+ parser.add_argument('subvolume', nargs='*', type=int, help='Subvolumes to ingore or analyze')
+ args=parser.parse_args()
+
+ #find subvolumes to parse, make sure -r subvolume stays first
+ parse_trees=[5]
+ if args.root!=5:
+ parse_trees=[args.root,5]
+ fs = btrfs.FileSystem(args.path)
+ for subvol in fs.subvolumes():
+ if subvol.key.objectid != args.root:
+ parse_trees.append(subvol.key.objectid)
+
+ #these are the subvolumes specified by the user, these will be either ignored
+ #or all the other subvolumes will be ingored
+ special_subvolumes=set(args.subvolume)
+
+ #if no argument specified then assume that the user wanted to ingore the speficied subvolumes
+ if args.ignore == False and args.only== False:
+ args.ignore=True
+
+ #remove the unneeded subvolumes
+ if args.ignore:
+ for item in special_subvolumes:
+ try:
+ parse_trees.remove(item)
+ except:
+ pass
+ else:
+ for tree in parse_trees[:]:
+ if tree not in special_subvolumes:
+ parse_trees.remove(tree)
+
+ data_tree=TreeWrapper()
+
+ #move the root subvolume in the end
+ #older subvolumes must be first
+ changed_snapshots = deque(parse_trees)
+ changed_snapshots.rotate(-1)
+ parse_trees=list(changed_snapshots)
+ data_tree.add_snapshots(parse_trees)
+
+ #parse the trees from newer to older
+ parse_trees=list(reversed(parse_trees))
+ pool = multiprocessing.Pool(processes=4)
+ print("Subvolumes to parse:",parse_trees)
+ for tree in parse_trees:
+ #disk_parse(data_tree,fs,tree,args.files)
+ disk_parse_parallel(pool,data_tree,args.path,tree,args.files)
+ #pipe_add(data_tree,args.path,tree,args.files)
+ pool.close()
+ pool.join()
+
+ data_tree.transform()
+ unique_sum=0
+ unique_data,files=data_tree.find_unique(fs,args.files)
+ #if unique analysis is only needed, do not calculate differences
+ if args.unique:
+ current_data=Counter()
+ previous_data=Counter()
+ else:
+ current_data=data_tree.find_snapshot_size_to_current()
+ previous_data=data_tree.find_snapshot_size_to_previous()
+ print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ print("| | |Unique File Extents| Extents added ontop|Extents added ontop of|")
+ print("| | | per subvolume|of previous subvolume|current(act) subvolume|")
+ print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ print("| Path| SubvolumId| Size| Size| Size|")
+ print("| | | | | |")
+ for snapshot in parse_trees:
+ subpath = args.path
+ path = get_subvolume_path(snapshot, subpath)
+ print("| {:>30}| {:>7}| {:>10}| {:>10}| {:>10}|".format(path, snapshot, btrfs.utils.pretty_size(unique_data[snapshot]),btrfs.utils.pretty_size(previous_data[snapshot]),btrfs.utils.pretty_size(current_data[snapshot])))
+ #print(files[snapshot])
+ unique_sum+=unique_data[snapshot]
+ total_data=len(data_tree)
+ print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ print("")
+ print("Unique Data size of subvolumes:",btrfs.utils.pretty_size(unique_sum),"Total size:",btrfs.utils.pretty_size(total_data),"Volatility:","{:.2%}".format(unique_sum/total_data))
+ if args.files:
+ print()
+ print("Possible Unique Files:")
+ for file,myset in files.items():
+ print(file,":",myset)
+
+if __name__ == '__main__':
+ main()
diff --git a/3_check-btrfs-sub-size-diff__with-line.py b/3_check-btrfs-sub-size-diff__with-line.py
new file mode 100755
index 0000000..162f4d8
--- /dev/null
+++ b/3_check-btrfs-sub-size-diff__with-line.py
@@ -0,0 +1,664 @@
+#!/usr/bin/python3
+
+# Copyright (C) 2018 Dimitris Georgiou
+
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see .
+
+import btrfs
+import argparse
+import sys
+from collections import deque
+from collections import Counter
+from collections import defaultdict
+import math, array
+from functools import lru_cache
+import multiprocessing,os
+#from sortedcontainers import SortedList
+import bisect
+#function to convert a pair of positive integers to a single integer
+#we want to decrease memory consumption, thus we need this trick
+#http://szudzik.com/ElegantPairing.pdf
+#cache the results for speed up
+
+@lru_cache(maxsize=1024)
+def unique_number(x,y):
+ result=x
+ if x >=y:
+ result+=y
+ result+=x**2
+ else:
+ result+=y**2
+ return result
+
+#undo the above function, return x,y based on a single number
+#also cache the results
+
+
+################ Obtain Subvolume Path Function ##################################
+
+import subprocess # Needed for external command(btrfs) execution
+
+def get_subvolume_path(snapshot, subpath):
+ """
+ Retrieves the path of a subvolume based on its snapshot ID.
+
+ Args:
+ snapshot: The integer ID of the snapshot.
+
+ Returns:
+ The path of the subvolume or None if not found.
+ """
+
+ try:
+ if snapshot == 5:
+ return "///// Top-Level(ID:5) /////"
+
+ # Get subvolume list output
+ subvols_output = subprocess.run(["sudo", "btrfs", "subvolume", "list", f"{subpath}"], input=subpath, capture_output=True, text=True).stdout
+
+
+
+ # Filter by snapshot ID using grep (modify grep pattern if needed)
+ filtered_output = subprocess.run(["grep", f"{snapshot} gen"], input=subvols_output, capture_output=True, text=True).stdout
+
+ # Extract subvolume path using awk
+ path = subprocess.run(["awk", "{print $NF}"], input=filtered_output, capture_output=True, text=True).stdout.strip()
+ return path
+
+ except subprocess.CalledProcessError:
+ # Handle potential errors from subprocess calls (e.g., not found)
+ return None
+
+################ Obtain Subvolume Path Function ##################################
+
+
+@lru_cache(maxsize=1024)
+def unique_to_pair(number):
+ root=int(math.floor(math.sqrt(number)))
+ crit=number-root**2
+ if crit< root:
+ x=crit
+ y=root
+ else:
+ x=root
+ y=crit-root
+ return x,y
+
+#take a list of 'paired' numbers and return the x coordinate, which is snapshot
+#stored into the pair
+def return_snapshots(mylist):
+ result=[]
+ for item in mylist:
+ snapshot,_=unique_to_pair(item)
+ result.append(snapshot)
+ return result
+
+#take a list of 'paired' numbers and return the paired number that has the same
+#x coordinate, which is the snapshot stored into the pair. if shallow search only
+#the last element
+def return_coded(mylist,snapshot,shallow):
+ if shallow:
+ snapshot_pair,_=unique_to_pair(mylist[-1])
+ if snapshot_pair == snapshot:
+ return mylist[-1]
+ return None
+ for item in mylist:
+ snapshot_pair,_=unique_to_pair(item)
+ if snapshot_pair == snapshot:
+ return item
+ return None
+
+
+#take a paired number and compare it with a snapshot
+#cache the results for speedup
+@lru_cache(maxsize=1024)
+def compare_pair_to_snapshot(item,snapshot):
+ snapshot_pair,_=unique_to_pair(item)
+ if snapshot_pair == snapshot:
+ return True
+ return False
+
+#find an item that has the same subvolume and remove it
+def remove_snapshot_from_list(mylist,snapshot,shallow):
+ coded =return_coded(mylist,snapshot,shallow)
+ if coded != None:
+ mylist.remove(coded)
+ return mylist
+
+#find if an item with the same snapshot exists in the list
+def find_snapshot_in_list(mylist,snapshot,shallow):
+ if shallow:
+ if compare_pair_to_snapshot(mylist[-1],snapshot):
+ return True
+ else:
+ return False
+ for item in mylist:
+ if compare_pair_to_snapshot(item,snapshot):
+ return True
+ return False
+
+#function to calculate subtraction between 2 integer intervals
+def range_sub(range1,range2):
+ result=[]
+ a,b=range1
+ x,y=range2
+ if x>b or y=b:
+ if x>a:
+ b=x-1
+ else:
+ return result
+ else:
+ if x>a:
+ result.append((a,x-1))
+ a=y+1
+ result.append((a,b))
+ return result
+
+
+#Class to implement intervals, we only care about subtraction
+class Ranges:
+ def __init__(self,start=None,stop=None):
+ self.list=[]
+ self.upper=None
+ self.lower=None
+ if start!=None:
+ self.list.append((start,stop))
+ self.upper=stop
+ self.lower=start
+
+ def is_empty(self):
+ if len(self.list)>=1:
+ return False
+ return True
+
+ def __sub__(self,other):
+ final_result=Ranges()
+ queue=deque(self.list)
+
+ while queue:
+ item = queue.popleft()
+ #this could be optiomized more
+ for otheritem in other.list:
+ result=range_sub(item,otheritem)
+ if len(result)>1:
+ queue.appendleft(result[1])
+ item=result[0]
+ elif len(result)==1:
+ item=result[0]
+ else:
+ item=None
+ break
+ #print item
+ if item !=None:
+ final_result.append(item)
+ return final_result
+
+ def append(self,myrange):
+ self.list.append(myrange)
+ self.upper=myrange[1]
+ if self.lower==None:
+ self.lower=myrange[0]
+
+ def __str__(self):
+ return(str(self.list))
+
+#Class to hold data. It's a dictionary of dictionaries.
+#tree[key of the extent]= {range1: [list of paired (snapshot,inode)],range2: [list of paired (snapshot,inode)]}
+#inodes data are used to find which files hold data of unique extents.
+class TreeWrapper:
+ def __init__(self):
+ self._tree=dict()
+ self._snapshots=[]
+ #self._inodes=dict()
+
+
+ #check if the current tree has data for this extent/key.
+ #if it has, check if the current extent range is already parsed.
+ def delete_range(self,key,limit,tree,shallow):
+ snapshotinodelist=self._tree[key][limit]
+ #print(snapshotinodelist)
+ remove_snapshot_from_list(snapshotinodelist,tree,shallow)
+ #print(self._tree[key][limit])
+ if len(self._tree[key][limit])==0:
+ del self._tree[key][limit]
+
+ #use array instead of list because integers consume too much memory in python
+ def add_range(self,key,limit,mypair):
+ if key in self._tree.keys():
+ if limit in self._tree[key].keys():
+ self._tree[key][limit].append(mypair)
+ else:
+ self._tree[key][limit]=array.array('Q')
+ self._tree[key][limit].append(mypair)
+ else:
+ self._tree[key]=dict()
+ self._tree[key][limit]=array.array('Q')
+ self._tree[key][limit].append(mypair)
+
+ #unfortunately some extents reappear, maybe there are dedup or reflink?
+ #so we need to take care of that by calculating exactly the data that each
+ #subvolume uses
+ def add(self,tree,key,start,stop,inode):
+ mypair=unique_number(tree,inode)
+ if key in self._tree.keys():
+ extent=self._tree[key]
+ #find all ranges that have data for this subvolume in this extent
+ #do not search deep, since we add one subvolume at a time
+ ranges=[]
+ for myrange,snapshotinodelist in extent.items():
+ if find_snapshot_in_list(snapshotinodelist,tree,True):
+ ranges.append(myrange)
+ ranges.sort()
+
+ #start of intervals for this key and tree are in even positions
+ #ends are in odd positions
+ starts=ranges[::2]
+ stops=ranges[1::2]
+ if len(starts)!=len(stops):
+ print("problem",key,ranges)
+ sys.exit(0)
+
+ #if the data we are trying to push already exist, ignore them
+ if start in starts:
+ index=starts.index(start)
+ if stop == stops[index]:
+ #print(ranges,start,stop)
+ return
+
+ #Algorithm: we have these intervals: 0...100, 150...200
+ #and we want to add 80...170
+ #the final result must be 0...200 because this extent is used
+ #interely by this snapshot
+
+ #For each base, calculate base - target. If the base
+ #interval is modified then delete that end because new data will
+ #be added. Then target becomes target-base and continue with the next base
+
+ #try to minimize the subtractions needed
+ realstart=bisect.bisect_left(starts,start)
+ realstop=bisect.bisect_right(stops,stop)
+ if realstart > 0:
+ #print(realstart,starts,start)
+ realstart-=1
+ mystarts=starts[realstart:realstop+1]
+ mystops=stops[realstart:realstop+1]
+ if len(mystarts)>0:
+ if mystops[-1]0:
+ if mystarts[0]>stop:
+ mystarts=mystarts[1:]
+
+ #target is the interval we are trying to add
+ target=Ranges(start,stop)
+ for i, oldstart in enumerate(mystarts):
+ #base is the interval that we must analyze
+ base=Ranges(oldstart,mystops[i])
+ newbase=base-target
+ #if newbase differs in an end of base then that end must
+ #be deleted because the end of the interval will be added
+ if base.lower>target.lower and newbase.lower!=base.lower:
+ self.delete_range(key,base.lower,tree,True)
+ if base.upper=1:
+ try:
+ size=iterableview[i+1][0]-myrange
+ result+=size
+ except:
+ print(extent,sorted(rangedict.items()),mytuple)
+ return result
+
+ #find those ranges that have only one snapshot, if this snapshot is deleted
+ #this space will be freed.
+ #based on the scenario of transform is should return:
+ #result[tree1]=pos2-pos1+pos4-pos3
+ #result[tree2]=0
+ #if files are analyzed use the inode data to find them ans store them in different dictionary.
+ def find_unique(self,fs,analyze_file):
+ result=Counter()
+ result_data=defaultdict(set)
+ for extent,rangedict in self._tree.items():
+ iterableview = sorted(rangedict.items())
+ for i,mytuple in enumerate(iterableview):
+ myrange,unique_pair_list=mytuple
+ #myset=list(myset)
+ if len(unique_pair_list)==1:
+ subvolume,inode=unique_to_pair(unique_pair_list[0])
+ size=iterableview[i+1][0]-myrange
+ result[subvolume]+=size
+ #result[myset[0]]+=size
+ #print(inode)
+ if analyze_file:
+ try:
+ file=btrfs.ioctl.ino_lookup(fs.fd,subvolume,inode)
+ result_data[file.name_bytes.decode('utf-8')].add(subvolume)
+ except:
+ print("Inode not found",inode)
+ return result,result_data
+
+ #helper function to find the size of the extend ranges that have the desired snapshots
+ def find_snapshots_size(self,wanted,not_wanted):
+ result=0
+ for extent,rangedict in self._tree.items():
+ rangelist = sorted(rangedict.keys())
+ for i,myrange in enumerate(rangelist):
+ snapshots=set(return_snapshots(rangedict[myrange]))
+ if len(set(wanted) & snapshots)>0 and len(set(not_wanted) & snapshots) ==0:
+ try:
+ result+=rangelist[i+1]-myrange
+ except:
+ print(wanted,not_wanted)
+ print(extent,sorted(rangedict.items()),myrange)
+ return result
+
+ #the active subvolume must be the last one
+ def add_snapshots(self,snapshots):
+ self._snapshots=snapshots.copy()
+
+ #calculate the size of ranges ontop of the previous subvolume
+ #older subvolumes must be first in subvolume list
+ def find_snapshot_size_to_previous(self):
+ results=Counter()
+ for i, snapshot in enumerate(self._snapshots):
+ if i>0:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[self._snapshots[i-1]])
+ else:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[])
+ return results
+
+ #calculate the size of ranges ontop of the current active subvolume
+ def find_snapshot_size_to_current(self):
+ results=Counter()
+ current=self._snapshots[-1]
+ for snapshot in self._snapshots:
+ if snapshot == current:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[])
+ else:
+ results[snapshot]+=self.find_snapshots_size([snapshot],[current])
+ return results
+
+#try to optimize parsing by piping, but to no avail
+
+def disk_parse_pipe(pipe,path,tree):
+ print("Parsing subvolume:",tree)
+ fs=btrfs.FileSystem(path)
+ min_key=btrfs.ctree.Key(0,btrfs.ctree.EXTENT_DATA_KEY,0)
+ for header, data in btrfs.ioctl.search_v2(fs.fd, tree,min_key):
+ if header.type == btrfs.ctree.EXTENT_DATA_KEY:
+ datum=btrfs.ctree.FileExtentItem(header,data)
+ if datum.type != btrfs.ctree.FILE_EXTENT_INLINE:# and datum.disk_bytenr !=0:
+ key=unique_number(datum.disk_bytenr,datum.disk_num_bytes)
+ #key = pool.apply(unique_number, (datum.disk_bytenr,datum.disk_num_bytes,))
+ stop=datum.offset+datum.num_bytes
+ #key=res.get()
+ pipe.send((key,datum.offset,stop,datum.key.objectid))
+ pipe.send(None)
+ pipe.close()
+ os.close(fs.fd)
+ del fs
+
+
+
+def pipe_add(data_tree,path,tree,analyze_files):
+ parent_conn, child_conn = multiprocessing.Pipe(False)
+ p = multiprocessing.Process(target=disk_parse_pipe, args=(child_conn,path,tree,))
+ p.start()
+ while True:
+ res=parent_conn.recv()
+ if res !=None:
+ if analyze_files:
+ data_tree.add(tree,res[0],res[1],res[2],res[3])
+ else:
+ data_tree.add(tree,res[0],res[1],res[2],0)
+ else:
+ break
+ p.join()
+
+
+#try to optimize parsing by using multiprocessing
+
+
+#return the data to add for this extent
+#unfortunately we have to open each time the filesystem and reparse partially the
+#data
+def actual_extent_parsing(item):
+ header,path,tree=item
+ result=None
+ fs=btrfs.FileSystem(path)
+ key=btrfs.ctree.Key(header.objectid,btrfs.ctree.EXTENT_DATA_KEY,header.offset)
+ for header,data in btrfs.ioctl.search_v2(fs.fd, tree,key,nr_items=1):
+ datum=btrfs.ctree.FileExtentItem(header,data)
+ if datum.type != btrfs.ctree.FILE_EXTENT_INLINE:# and datum.disk_bytenr !=0:
+ key=unique_number(datum.disk_bytenr,datum.disk_num_bytes)
+ stop=datum.offset+datum.num_bytes
+ result = (key,datum.offset,stop,datum.key.objectid)
+ os.close(fs.fd)
+ del fs
+ return result
+
+
+#main function to parse data from disk, generate 'interesting' extents
+def generate_extents(path,tree):
+ #print("Parsing subvolume:",tree,path)
+ #pool = multiprocessing.Pool(processes=1)
+ fs=btrfs.FileSystem(path)
+ min_key=btrfs.ctree.Key(0,btrfs.ctree.EXTENT_DATA_KEY,0)
+ for header, _ in btrfs.ioctl.search_v2(fs.fd, tree,min_key):
+ if header.type == btrfs.ctree.EXTENT_DATA_KEY:
+ yield header,path,tree
+ os.close(fs.fd)
+ del fs
+ return
+
+
+#parallelize parsing, return the data without order what is the best value for chunk size?
+def disk_parse_parallel(pool,data_tree,path,tree,analyze_files):
+ print("Parsing subvolume:",tree)
+ #pool = multiprocessing.Pool(processes=4)
+ #fs=btrfs.FileSystem(path)
+ for res in pool.imap_unordered(actual_extent_parsing, generate_extents(path,tree),128):
+ #print(res)
+ if res!=None:
+ if analyze_files:
+ data_tree.add(tree,res[0],res[1],res[2],res[3])
+ else:
+ data_tree.add(tree,res[0],res[1],res[2],0)
+
+
+
+#main function to parse data from disk and add the to the tree of extents, sequentially
+def disk_parse(data_tree,fs,tree,analyze_files):
+ print("Parsing subvolume:",tree)
+ #pool = multiprocessing.Pool(processes=1)
+ min_key=btrfs.ctree.Key(0,btrfs.ctree.EXTENT_DATA_KEY,0)
+ for header, data in btrfs.ioctl.search_v2(fs.fd, tree,min_key):
+ if header.type == btrfs.ctree.EXTENT_DATA_KEY:
+ datum=btrfs.ctree.FileExtentItem(header,data)
+ if datum.type != btrfs.ctree.FILE_EXTENT_INLINE:# and datum.disk_bytenr !=0:
+ key=unique_number(datum.disk_bytenr,datum.disk_num_bytes)
+ #key = pool.apply(unique_number, (datum.disk_bytenr,datum.disk_num_bytes,))
+ stop=datum.offset+datum.num_bytes
+ #key=res.get()
+ if analyze_files:
+ data_tree.add(tree,key,datum.offset,stop,datum.key.objectid)
+ else:
+ data_tree.add(tree,key,datum.offset,stop,0)
+
+def main():
+ multiprocessing.set_start_method('spawn')
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-u","--unique",action='store_true',help="calculate only unique data, -r argument makes no sense if -u is active")
+ parser.add_argument("-f","--files",action='store_true',help="find filenames that exist in unique extents")
+ parser.add_argument("path", type=str,
+ help="path of the btrfs filesystem")
+ parser.add_argument("-r", "--root", type=int,default=5,
+ help="current active subvolume to analyze first, default is 5")
+ group = parser.add_mutually_exclusive_group()
+ group.add_argument('-i', '--ignore', action='store_true',help="Do not analyze the specified subvolumes")
+ group.add_argument('-o', '--only', action='store_true',help="Analyze only the specified subvolumes")
+ parser.add_argument('subvolume', nargs='*', type=int, help='Subvolumes to ingore or analyze')
+ args=parser.parse_args()
+
+ #find subvolumes to parse, make sure -r subvolume stays first
+ parse_trees=[5]
+ if args.root!=5:
+ parse_trees=[args.root,5]
+ fs = btrfs.FileSystem(args.path)
+ for subvol in fs.subvolumes():
+ if subvol.key.objectid != args.root:
+ parse_trees.append(subvol.key.objectid)
+
+ #these are the subvolumes specified by the user, these will be either ignored
+ #or all the other subvolumes will be ingored
+ special_subvolumes=set(args.subvolume)
+
+ #if no argument specified then assume that the user wanted to ingore the speficied subvolumes
+ if args.ignore == False and args.only== False:
+ args.ignore=True
+
+ #remove the unneeded subvolumes
+ if args.ignore:
+ for item in special_subvolumes:
+ try:
+ parse_trees.remove(item)
+ except:
+ pass
+ else:
+ for tree in parse_trees[:]:
+ if tree not in special_subvolumes:
+ parse_trees.remove(tree)
+
+ data_tree=TreeWrapper()
+
+ #move the root subvolume in the end
+ #older subvolumes must be first
+ changed_snapshots = deque(parse_trees)
+ changed_snapshots.rotate(-1)
+ parse_trees=list(changed_snapshots)
+ data_tree.add_snapshots(parse_trees)
+
+ #parse the trees from newer to older
+ parse_trees=list(reversed(parse_trees))
+ pool = multiprocessing.Pool(processes=4)
+ print("Subvolumes to parse:",parse_trees)
+ for tree in parse_trees:
+ #disk_parse(data_tree,fs,tree,args.files)
+ disk_parse_parallel(pool,data_tree,args.path,tree,args.files)
+ #pipe_add(data_tree,args.path,tree,args.files)
+ pool.close()
+ pool.join()
+
+ data_tree.transform()
+ unique_sum=0
+ unique_data,files=data_tree.find_unique(fs,args.files)
+ #if unique analysis is only needed, do not calculate differences
+ if args.unique:
+ current_data=Counter()
+ previous_data=Counter()
+ else:
+ current_data=data_tree.find_snapshot_size_to_current()
+ previous_data=data_tree.find_snapshot_size_to_previous()
+ print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ print("| | |Unique File Extents| Extents added ontop|Extents added ontop of|")
+ print("| | | per subvolume|of previous subvolume|current(act) subvolume|")
+ print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ print("| Path| SubvolumId| Size| Size| Size|")
+ print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ for snapshot in parse_trees:
+ subpath = args.path
+ path = get_subvolume_path(snapshot, subpath)
+ print("| {:>30}| {:>7}| {:>10}| {:>10}| {:>10}|\n|-------------------------------|-----------|-------------------|---------------------|----------------------|".format(path, snapshot, btrfs.utils.pretty_size(unique_data[snapshot]),btrfs.utils.pretty_size(previous_data[snapshot]),btrfs.utils.pretty_size(current_data[snapshot])))
+ #print(files[snapshot])
+ unique_sum+=unique_data[snapshot]
+ total_data=len(data_tree)
+# print("|-------------------------------|-----------|-------------------|---------------------|----------------------|")
+ print("")
+ print("Unique Data size of subvolumes:",btrfs.utils.pretty_size(unique_sum),"Total size:",btrfs.utils.pretty_size(total_data),"Volatility:","{:.2%}".format(unique_sum/total_data))
+ if args.files:
+ print()
+ print("Possible Unique Files:")
+ for file,myset in files.items():
+ print(file,":",myset)
+
+if __name__ == '__main__':
+ main()
diff --git a/Previews/.directory b/Previews/.directory
new file mode 100644
index 0000000..d2b3c2a
--- /dev/null
+++ b/Previews/.directory
@@ -0,0 +1,9 @@
+[Dolphin]
+HeaderColumnWidths=599,72,133,72
+Timestamp=2024,9,13,18,33,18.816
+Version=4
+ViewMode=1
+VisibleRoles=Details_text,Details_size,Details_modificationtime,Details_type,CustomizedDetails
+
+[Settings]
+HiddenFilesShown=true
diff --git a/Previews/Commits.jpg b/Previews/Commits.jpg
new file mode 100644
index 0000000..039e813
Binary files /dev/null and b/Previews/Commits.jpg differ
diff --git a/Previews/Previews_3-check.mp4 b/Previews/Previews_3-check.mp4
new file mode 100755
index 0000000..c6e9f27
Binary files /dev/null and b/Previews/Previews_3-check.mp4 differ
diff --git a/Previews/Previews_compare-0_1.jpg b/Previews/Previews_compare-0_1.jpg
new file mode 100755
index 0000000..973f88e
Binary files /dev/null and b/Previews/Previews_compare-0_1.jpg differ
diff --git a/Previews/Previews_compare-0_2.jpg b/Previews/Previews_compare-0_2.jpg
new file mode 100755
index 0000000..6379d75
Binary files /dev/null and b/Previews/Previews_compare-0_2.jpg differ
diff --git a/Previews/Previews_compare-0_3.jpg b/Previews/Previews_compare-0_3.jpg
new file mode 100755
index 0000000..4d78c48
Binary files /dev/null and b/Previews/Previews_compare-0_3.jpg differ
diff --git a/Previews/Previews_improves-3.jpg b/Previews/Previews_improves-3.jpg
new file mode 100755
index 0000000..02aaf72
Binary files /dev/null and b/Previews/Previews_improves-3.jpg differ
diff --git a/Previews/btrfs-snapshot-diff-v2.webp b/Previews/btrfs-snapshot-diff-v2.webp
new file mode 100644
index 0000000..b9a247c
Binary files /dev/null and b/Previews/btrfs-snapshot-diff-v2.webp differ
diff --git a/README.md b/README.md
index f9edd80..2f5828a 100644
--- a/README.md
+++ b/README.md
@@ -1,14 +1,52 @@
-# btrfs-snapshot-diff
-Find the differences between btrfs snapshots, no quota activation in btrfs needed!
+# btrfs-subvolume-size-diff-forked
+
+This project is a fork of [`dim-geo`](https://github.com/dim-geo/)'s tool [`btrfs-snapshot-diff`](https://github.com/dim-geo/btrfs-snapshot-diff/) which find the differences between btrfs snapshots, no quota activation in btrfs needed!
+
+The primary enhancement introduced in this fork, is the ability to display subvolume paths alongside their IDs. This makes it significantly easier to identify and manage Btrfs subvolumes, especially when dealing with complex snapshot structures.
+
+[**!! For Cantonese(HK) README, Please Click here!!** This is the ENG version](README_HK.md)
+
+
+
+ Video
+
+https://github.com/user-attachments/assets/3c45d540-4ad1-4976-924c-41aefade0534
+
+
+
+
+## Overview
+
+- [1. Why do you need this tool?](#1-why-do-you-need-this-tool)
+- [2. Original implemented functionality](#2-original-implemented-functionality)
+- [3. The improvements over the original project](#3-the-improvements-over-the-original-project)
+- [4. How it works](#4-how-it-works)
+- [5. Installation](#5-installation)
+- [6. Usage](#6-usage)\
+ - [6.1 Example](#61-example)
+- [7. Versions](#7-versions)
+- [8. License](#8-license)
+- [9. Contributing](#9-contributing)\
+ - [9.1 Possible expansions](#91-possible-expansions)
+
+## 1. Why do you need this tool?
+
+Btrfs, as a CoW filesystem, has a problem identifying the size of a snapshot(/subvolume) and the differences between snapshots(/subvolume).
-Btrfs, as a CoW filesystem, has a problem identifying the size of a snapshot and the differences between snapshots.
By using [python-btrfs](https://github.com/knorrie/python-btrfs), it is possible to parse the metadata of a btrfs filesystem and find the differences between subvolumes/snapshots.
-## Currently implemented functionality:
+## 2. Original implemented functionality:
+
+This tool can approximately identify how much space will be freed when a snapshot(/subvolume) is deleted.
+
+## 3. The improvements over [the original project](https://github.com/dim-geo/btrfs-snapshot-diff/)
-This tool can approximately identify how much space will be freed when a snapshot is deleted.
+- **Subvolume Path Integration**: The main improvement of this fork is that the output now includes the subvolume path, making the information more readable and useful for users managing multiple snapshots.
+- **Line Separation**: Additional line spacing for better readability and maintenance.
-## How it works:
+
+
+## 4. How it works:
This tool identifies all subvolumes of a btrfs filesystem. For each subvolume all file extents which are not inline are parsed and kept. All other items are ignored.
@@ -23,13 +61,62 @@ Thus, we create a tree of extents & ranges together with the snapshots that use
2. range2: [...]
Now, the actual disk size of Snapshot 1 can be extracted from each file extent
-## Usage:
-[python-btrfs](https://github.com/knorrie/python-btrfs) must be installed.
+---
+
+## 5. Installation
+
+### Advanced User (Hey! My python is set up)
+#### Prerequisites
+- BTRFS partition with subvolumes(/snapshots)
+- [python-btrfs](https://github.com/knorrie/python-btrfs) must be installed.
+- Just jump to [Usage](#6-usage)
+----
+### Download this project
+Download this repository to your favourite location. Mines at `$Home/scripts`
+
+1. Create a directory at $USER directory
+> `cd $HOME && mkdir scripts`
+
+2. Clone this repo to `$Home/scripts`
+> `git clone https://github.com/Ramen-LadyHKG/btrfs-subvolume-size-diff-forked.git`
+
+### Make sure python is configured
+
+3. Set up python
+Here, I cannot provide detail setup for python due diffences between distro.
+I recommend you looking for better resources.
+> https://docs.python-guide.org/starting/install3/linux/
+> https://docs.aws.amazon.com/zh_tw/elasticbeanstalk/latest/dg/eb-cli3-install-linux.html
+
+4. I recommend using virtual environment for python
+> For example\
+>> `mkdir $HOME/.python`\
+>> `python -m venv $HOME/.python`\
+>> `source $HOME/.python/bin/activate`\
+>> `python -m pip install --upgrade pip`
+
+5. Install `btrfs` python library
+>> `python -m pip install btrfs`
+
+**OPTIONAL.** Add a `cbsd` (check-btrfs-size-diff) alias of the python script to your Shell env file.
+>> `echo -e "\nalias cbsd='sudo $HOME/scripts/btrfs-subvolume-size-diff-forked/3_check-btrfs-sub-size-diff__with-line.py'" >> $HOME/.zshrc`
+
+DONE
+
+## 6. Usage
Program is single threaded, it could use a lot of memory and it puts a lot of read stress in disks. It could take many minutes. ionice it & monitor its memory usage. Memory usage & execution time depend on the dataset. The program does not perform any write operations. Do not modify subvolume/snapshot during execution. Try not to write any data to any subvolume or execute dedup programs in parallel.
-`subvolume.py [-u] [-f] [-r ] /path/to/btrfs/ [ -i | -o ] [ ]`
+1. Change your shell to the directory of this repo
+>> `cd $HOME/btrfs-subvolume-size-diff-forked`
+
+2. Run the script w/wo options
+>> `3_check-btrfs-sub-size-diff__with-line.py [-u] [-f] [-r ] /path/to/btrfs/ [ -i | -o ] [ ]`
+
+ **OPTIONAL**
+ if you've DONE the alias
+>> `cbsd /path/to/btrfs/mount/`
`-u` calculates the unique data occupied by each snapshot. Thus, `-r` makes no sense. Specifying subvolumes to ignore can mess with `-u` results because the specified subvolume data will not be parsed!
`-f` finds the files that might contribute to the unique extents.
@@ -39,118 +126,175 @@ Program is single threaded, it could use a lot of memory and it puts a lot of re
You can find subvolume ids by using:
`btrfs subvolume list /path/to/btrfs`
-## Example:
-
-`btrfs subvolume list /path/to/btrfs`:
+### 6.1. Example:
+`sudo btrfs subvolume list --sort=-rootid /`:
```
-ID 258 gen 15649 top level 5 path mydata
-ID 259 gen 15651 top level 5 path subvol_snapshots
-ID 1949 gen 3785 top level 259 path subvol_snapshots/283/snapshot
-ID 2133 gen 5080 top level 259 path subvol_snapshots/435/snapshot
-ID 2395 gen 6616 top level 259 path subvol_snapshots/660/snapshot
-ID 2694 gen 8781 top level 259 path subvol_snapshots/888/snapshot
-ID 3661 gen 10830 top level 259 path subvol_snapshots/1126/snapshot
-ID 3818 gen 11948 top level 259 path subvol_snapshots/1228/snapshot
-ID 3887 gen 12351 top level 259 path subvol_snapshots/1285/snapshot
-ID 3942 gen 12628 top level 259 path subvol_snapshots/1333/snapshot
-ID 4040 gen 13778 top level 259 path subvol_snapshots/1412/snapshot
-ID 4072 gen 13778 top level 259 path subvol_snapshots/1438/snapshot
-ID 4091 gen 13778 top level 259 path subvol_snapshots/1452/snapshot
-ID 4130 gen 13853 top level 259 path subvol_snapshots/1477/snapshot
-ID 4166 gen 14537 top level 259 path subvol_snapshots/1509/snapshot
-ID 4182 gen 14537 top level 259 path subvol_snapshots/1523/snapshot
-ID 4196 gen 14537 top level 259 path subvol_snapshots/1535/snapshot
-ID 4211 gen 14753 top level 259 path subvol_snapshots/1545/snapshot
-ID 4258 gen 15274 top level 259 path subvol_snapshots/1582/snapshot
-ID 4337 gen 15274 top level 259 path subvol_snapshots/1652/snapshot
-ID 4372 gen 15274 top level 259 path subvol_snapshots/1680/snapshot
-ID 4392 gen 15341 top level 259 path subvol_snapshots/1691/snapshot
-ID 4414 gen 15434 top level 259 path subvol_snapshots/1712/snapshot
-ID 4444 gen 15538 top level 259 path subvol_snapshots/1740/snapshot
-ID 4451 gen 15566 top level 259 path subvol_snapshots/1747/snapshot
-ID 4452 gen 15570 top level 259 path subvol_snapshots/1748/snapshot
-ID 4454 gen 15581 top level 259 path subvol_snapshots/1749/snapshot
-ID 4455 gen 15584 top level 259 path subvol_snapshots/1750/snapshot
-ID 4456 gen 15589 top level 259 path subvol_snapshots/1751/snapshot
-ID 4457 gen 15592 top level 259 path subvol_snapshots/1752/snapshot
-ID 4458 gen 15596 top level 259 path subvol_snapshots/1753/snapshot
-ID 4459 gen 15598 top level 259 path subvol_snapshots/1754/snapshot
-ID 4460 gen 15611 top level 259 path subvol_snapshots/1755/snapshot
-ID 4461 gen 15612 top level 259 path subvol_snapshots/1756/snapshot
-ID 4462 gen 15620 top level 259 path subvol_snapshots/1757/snapshot
-ID 4463 gen 15639 top level 259 path subvol_snapshots/1758/snapshot
-ID 4464 gen 15643 top level 259 path subvol_snapshots/1759/snapshot
-ID 4465 gen 15646 top level 259 path subvol_snapshots/1760/snapshot
-ID 4466 gen 15649 top level 259 path subvol_snapshots/1761/snapshot
+ID 18085 gen 526073 top level 267 path @home/.snapshots/2/snapshot
+ID 18084 gen 526071 top level 263 path @snapshots/1827/snapshot
+ID 18083 gen 526069 top level 263 path @snapshots/1826/snapshot
+ID 18082 gen 526069 top level 267 path @home/.snapshots/1/snapshot
+ID 18065 gen 525568 top level 263 path @snapshots/1825/snapshot
+ID 17994 gen 523504 top level 263 path @snapshots/1803/snapshot
+ID 17992 gen 523427 top level 263 path @snapshots/1801/snapshot
+ID 14424 gen 513650 top level 14421 path flatpak-installs
+ID 14423 gen 525575 top level 14421 path var/tmp
+ID 14422 gen 514035 top level 14421 path usr/share/waydroid-extra
+ID 14421 gen 526112 top level 5 path @
+ID 2722 gen 523512 top level 5 path @opt
+ID 2711 gen 526112 top level 257 path @home/curie/.cache
+ID 2563 gen 513947 top level 5 path @var_lib_libvirt_images
+ID 267 gen 526073 top level 257 path @home/.snapshots
+ID 264 gen 513650 top level 5 path @swap
+ID 263 gen 526072 top level 5 path @snapshots
+ID 262 gen 513650 top level 5 path @var_tmp
+ID 261 gen 526112 top level 5 path @var_log
+ID 260 gen 526094 top level 5 path @var_cache
+ID 259 gen 513650 top level 5 path @srv
+ID 258 gen 525924 top level 5 path @rootf
+ID 257 gen 526112 top level 5 path @home
+
```
-`subvolume.py -r 258 /path/to/btrfs/ 259`:
+### Example Usage
+Choose `14421` as active root partition(/subvolume/snapshot) to compare
+
+options `-r` is explained in help
+> ` -r ROOT, --root ROOT current active subvolume to analyze first, default is 5`
+```bash
+$ `sudo ./check-btrfs-sub-size-diff_3__with-line.py -r 14421/`
+
+Subvolumes to parse: [14421, 18085, 18084, 18083, 18082, 18065, 17994, 17992, 14424, 14423, 14422, 2722, 2711, 2563, 267, 264, 263, 262, 261, 260, 259, 258, 257, 5]
+Parsing subvolume: 14421
+Parsing subvolume: 18085
+Parsing subvolume: 18084
+Parsing subvolume: 18083
+Parsing subvolume: 18082
+Parsing subvolume: 18065
+Parsing subvolume: 17994
+Parsing subvolume: 17992
+Parsing subvolume: 14424
+Parsing subvolume: 14423
+Parsing subvolume: 14422
+Parsing subvolume: 2722
+Parsing subvolume: 2711
+Parsing subvolume: 2563
+Parsing subvolume: 267
+Parsing subvolume: 264
+Parsing subvolume: 263
+Parsing subvolume: 262
+Parsing subvolume: 261
+Parsing subvolume: 260
+Parsing subvolume: 259
+Parsing subvolume: 258
+Parsing subvolume: 257
+Parsing subvolume: 5
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| | |Unique File Extents| Extents added ontop|Extents added ontop of|
+| | | per subvolume|of previous subvolume|current(act) subvolume|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| Path| SubvolumId| Size| Size| Size|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @| 14421| 68.00KiB| 24.34GiB| 24.37GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots/2/snapshot| 18085| 116.00KiB| 32.49GiB| 32.49GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1827/snapshot| 18084| 0.00B| 0.00B| 68.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1826/snapshot| 18083| 0.00B| 24.34GiB| 68.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots/1/snapshot| 18082| 216.00KiB| 32.49GiB| 32.49GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1825/snapshot| 18065| 2.75MiB| 144.95MiB| 2.79MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1803/snapshot| 17994| 3.04MiB| 45.61MiB| 58.10MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1801/snapshot| 17992| 43.60MiB| 24.28GiB| 98.66MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| flatpak-installs| 14424| 8.00KiB| 8.00KiB| 8.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| var/tmp| 14423| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| usr/share/waydroid-extra| 14422| 2.29GiB| 2.29GiB| 2.29GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @opt| 2722| 10.65GiB| 10.65GiB| 10.65GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/curie/.cache| 2711| 11.14GiB| 11.14GiB| 11.14GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_lib_libvirt_images| 2563| 691.54MiB| 691.54MiB| 691.54MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots| 267| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @swap| 264| 8.00GiB| 8.00GiB| 8.00GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots| 263| 48.00KiB| 48.00KiB| 48.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_tmp| 262| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_log| 261| 4.25GiB| 4.25GiB| 4.25GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_cache| 260| 52.64MiB| 52.64MiB| 52.64MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @srv| 259| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @rootf| 258| 192.73MiB| 192.73MiB| 192.73MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home| 257| 14.77MiB| 32.52GiB| 32.50GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| ///// Top-Level(ID:5) /////| 5| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+
+Unique Data size of subvolumes: 37.31GiB Total size: 94.22GiB Volatility: 39.60%
```
- Unique File Extents Extents added ontop Extents added ontop of
- per subvolume of previous subvolume current(act) subvolume
----------------------|---------------------|----------------------
-SubvolumId Size Size Size
- 258 0.00B 0.00B 1.46TiB
- 4466 0.00B 0.00B 0.00B
- 4465 0.00B 0.00B 0.00B
- 4464 0.00B 0.00B 0.00B
- 4463 0.00B 2.58MiB 0.00B
- 4462 0.00B 0.00B 648.00KiB
- 4461 0.00B 0.00B 648.00KiB
- 4460 0.00B 1.18MiB 648.00KiB
- 4459 0.00B 0.00B 996.00KiB
- 4458 0.00B 0.00B 996.00KiB
- 4457 0.00B 0.00B 996.00KiB
- 4456 0.00B 0.00B 996.00KiB
- 4455 0.00B 0.00B 996.00KiB
- 4454 0.00B 0.00B 996.00KiB
- 4452 0.00B 0.00B 996.00KiB
- 4451 0.00B 0.00B 996.00KiB
- 4444 0.00B 1.23MiB 996.00KiB
- 4414 120.00KiB 12.38MiB 1.07MiB
- 4392 184.00KiB 6.20GiB 1.19MiB
- 4372 164.00KiB 3.64MiB 4.41MiB
- 4337 176.00KiB 6.47MiB 4.48MiB
- 4258 0.00B 1010.53MiB 4.91MiB
- 4211 0.00B 1.97GiB 4.91MiB
- 4196 36.00KiB 36.00KiB 5.64MiB
- 4182 36.00KiB 3.66MiB 5.64MiB
- 4166 140.00KiB 590.95MiB 5.80MiB
- 4130 192.00KiB 6.04GiB 5.83MiB
- 4091 1.75MiB 34.36MiB 7.49MiB
- 4072 296.00KiB 9.12MiB 8.09MiB
- 4040 8.96MiB 11.01GiB 16.72MiB
- 3942 2.31MiB 4.16GiB 8.67MiB
- 3887 1.59MiB 4.15GiB 27.33MiB
- 3818 1.22MiB 15.20GiB 27.41MiB
- 3661 2.43MiB 13.61GiB 27.43MiB
- 2694 3.19MiB 40.44GiB 27.42MiB
- 2395 6.55MiB 13.25GiB 62.80MiB
- 2133 5.99MiB 17.44GiB 119.27MiB
- 1949 42.48MiB 1.33TiB 166.50MiB
-Size/Cost of snapshots: 77.78MiB Volatility: 0.01%
-```
-Snapshot 2133 introduced 17GiB, where most of them still reside on the system (used by newer snapshot, 2395)
-Thus, deleting snapshot 2133, will only free 6MiB. Snapshot 2133 has 119MiB changed compared to current/ active (258) subvolume.
+
+#### Explain
+Snapshot `1801` introduced `24.28GiB`, where most of them still reside on the system (used by newer snapshot, `1803`)
+Thus, deleting snapshot `1801`, will only free `43.6MiB`. Snapshot `1801` has `98.66MiB` changed compared to current/ active (`14421`) subvolume.
+
When using `-u` argument only the first column has values.
-Files result example:
-```
-Possible Unique Files:
-beeshash.dat/ : {4652}
-beescrawl.dat/ : {4652}
-beesstats.txt/ : {4652}
-2708/filelist-2700.txt/ : {259}
-2744/filelist-2741.txt/ : {259}
-2752/filelist-2744.txt/ : {259}
-2795/filelist-2789.txt/ : {259}
-```
+## 7. Versions
+
+1. **0_check-btrfs-sub-size-diff__original.py** - Original version.
+2. **1_check-btrfs-sub-size-diff__prototype.sh** - Prototype version.
+3. **2_check-btrfs-sub-size-diff__no-line.py** - Version without line spacing.
+4. **3_check-btrfs-sub-size-diff__with-line.py** - Version with line spacing.
+
+### History / Differences
+**Version 1**
+ > Not much, simply cloned the original project and added a bash script to run `sudo btrfs subvolume list $1` before and after the python script.\
+ >> -Requires the [the original python script](0_check-btrfs-sub-size-diff__original.py) to function.\
+ >> -Cannot use the advanced features of [the original python script](0_check-btrfs-sub-size-diff__original.py).
+ >>> `./1_check-btrfs-sub-size-diff__prototype.sh /path/to/btrfs/mount/point` is the only usage.\
+ > \
+> 
+
+**Version 2**
+ > Fully implemented `sudo btrfs subvolume list $1` printing into the python script. It shows the path of the subvolume alongside the subvolume ID.\
+ >> +All features from [the original python script](0_check-btrfs-sub-size-diff__original.py) are intact.
+ >>> 
+
+**Version 3**
+ > Enhanced the output format for better readability, use line spacing to ensure consistent column alignment.\
+ >> +All features from [the original pythonscript](0_check-btrfs-sub-size-diff__original.py) are intact.
+ >>> 
+---
-## Possible expansions:
+## 8. License
+
+This project is licensed under the GNU General Public License (GPL) Version 3, 29 June 2007. See the [LICENSE](LICENSE) file for more details.
+
+## 9. Contributing
+
+Feel free to open issues or submit pull requests if you encounter any bugs or have suggestions for further improvements!
+
+### 9.1 Possible expansions:
Calculate the size of metadata block differences.
Take into consideration inline file extents.
Why do we recieve the same extent with the same range many times?
+
+Since there're only minor changes, this fork project might merge into [the original project](https://github.com/dim-geo/btrfs-snapshot-diff/).
diff --git a/README_0__original.md b/README_0__original.md
new file mode 100644
index 0000000..f9edd80
--- /dev/null
+++ b/README_0__original.md
@@ -0,0 +1,156 @@
+# btrfs-snapshot-diff
+Find the differences between btrfs snapshots, no quota activation in btrfs needed!
+
+Btrfs, as a CoW filesystem, has a problem identifying the size of a snapshot and the differences between snapshots.
+By using [python-btrfs](https://github.com/knorrie/python-btrfs), it is possible to parse the metadata of a btrfs filesystem and find the differences between subvolumes/snapshots.
+
+## Currently implemented functionality:
+
+This tool can approximately identify how much space will be freed when a snapshot is deleted.
+
+## How it works:
+
+This tool identifies all subvolumes of a btrfs filesystem. For each subvolume all file extents which are not inline are parsed and kept. All other items are ignored.
+
+Thus, we create a tree of extents & ranges together with the snapshots that use them:
+
+1. file extent 1
+ 1. range1: [Snapshot 1, Snapshot 2]
+ 2. range2: [Snapshot 3]
+ 3. ...
+2. file extent 2
+ 1. range1: [...]
+ 2. range2: [...]
+
+Now, the actual disk size of Snapshot 1 can be extracted from each file extent
+## Usage:
+
+[python-btrfs](https://github.com/knorrie/python-btrfs) must be installed.
+
+Program is single threaded, it could use a lot of memory and it puts a lot of read stress in disks. It could take many minutes. ionice it & monitor its memory usage. Memory usage & execution time depend on the dataset. The program does not perform any write operations. Do not modify subvolume/snapshot during execution. Try not to write any data to any subvolume or execute dedup programs in parallel.
+
+`subvolume.py [-u] [-f] [-r ] /path/to/btrfs/ [ -i | -o ] [ ]`
+
+`-u` calculates the unique data occupied by each snapshot. Thus, `-r` makes no sense. Specifying subvolumes to ignore can mess with `-u` results because the specified subvolume data will not be parsed!
+`-f` finds the files that might contribute to the unique extents.
+`-i` makes the program to ignore the specified subvolumes, which is also the default behaviour if no `-i` or `-o` argument is specified but subvolumes are given.
+`-o` makes the program to analyze only the specified subvolumes.
+
+You can find subvolume ids by using:
+`btrfs subvolume list /path/to/btrfs`
+
+## Example:
+
+`btrfs subvolume list /path/to/btrfs`:
+
+```
+ID 258 gen 15649 top level 5 path mydata
+ID 259 gen 15651 top level 5 path subvol_snapshots
+ID 1949 gen 3785 top level 259 path subvol_snapshots/283/snapshot
+ID 2133 gen 5080 top level 259 path subvol_snapshots/435/snapshot
+ID 2395 gen 6616 top level 259 path subvol_snapshots/660/snapshot
+ID 2694 gen 8781 top level 259 path subvol_snapshots/888/snapshot
+ID 3661 gen 10830 top level 259 path subvol_snapshots/1126/snapshot
+ID 3818 gen 11948 top level 259 path subvol_snapshots/1228/snapshot
+ID 3887 gen 12351 top level 259 path subvol_snapshots/1285/snapshot
+ID 3942 gen 12628 top level 259 path subvol_snapshots/1333/snapshot
+ID 4040 gen 13778 top level 259 path subvol_snapshots/1412/snapshot
+ID 4072 gen 13778 top level 259 path subvol_snapshots/1438/snapshot
+ID 4091 gen 13778 top level 259 path subvol_snapshots/1452/snapshot
+ID 4130 gen 13853 top level 259 path subvol_snapshots/1477/snapshot
+ID 4166 gen 14537 top level 259 path subvol_snapshots/1509/snapshot
+ID 4182 gen 14537 top level 259 path subvol_snapshots/1523/snapshot
+ID 4196 gen 14537 top level 259 path subvol_snapshots/1535/snapshot
+ID 4211 gen 14753 top level 259 path subvol_snapshots/1545/snapshot
+ID 4258 gen 15274 top level 259 path subvol_snapshots/1582/snapshot
+ID 4337 gen 15274 top level 259 path subvol_snapshots/1652/snapshot
+ID 4372 gen 15274 top level 259 path subvol_snapshots/1680/snapshot
+ID 4392 gen 15341 top level 259 path subvol_snapshots/1691/snapshot
+ID 4414 gen 15434 top level 259 path subvol_snapshots/1712/snapshot
+ID 4444 gen 15538 top level 259 path subvol_snapshots/1740/snapshot
+ID 4451 gen 15566 top level 259 path subvol_snapshots/1747/snapshot
+ID 4452 gen 15570 top level 259 path subvol_snapshots/1748/snapshot
+ID 4454 gen 15581 top level 259 path subvol_snapshots/1749/snapshot
+ID 4455 gen 15584 top level 259 path subvol_snapshots/1750/snapshot
+ID 4456 gen 15589 top level 259 path subvol_snapshots/1751/snapshot
+ID 4457 gen 15592 top level 259 path subvol_snapshots/1752/snapshot
+ID 4458 gen 15596 top level 259 path subvol_snapshots/1753/snapshot
+ID 4459 gen 15598 top level 259 path subvol_snapshots/1754/snapshot
+ID 4460 gen 15611 top level 259 path subvol_snapshots/1755/snapshot
+ID 4461 gen 15612 top level 259 path subvol_snapshots/1756/snapshot
+ID 4462 gen 15620 top level 259 path subvol_snapshots/1757/snapshot
+ID 4463 gen 15639 top level 259 path subvol_snapshots/1758/snapshot
+ID 4464 gen 15643 top level 259 path subvol_snapshots/1759/snapshot
+ID 4465 gen 15646 top level 259 path subvol_snapshots/1760/snapshot
+ID 4466 gen 15649 top level 259 path subvol_snapshots/1761/snapshot
+```
+
+`subvolume.py -r 258 /path/to/btrfs/ 259`:
+
+```
+ Unique File Extents Extents added ontop Extents added ontop of
+ per subvolume of previous subvolume current(act) subvolume
+---------------------|---------------------|----------------------
+SubvolumId Size Size Size
+ 258 0.00B 0.00B 1.46TiB
+ 4466 0.00B 0.00B 0.00B
+ 4465 0.00B 0.00B 0.00B
+ 4464 0.00B 0.00B 0.00B
+ 4463 0.00B 2.58MiB 0.00B
+ 4462 0.00B 0.00B 648.00KiB
+ 4461 0.00B 0.00B 648.00KiB
+ 4460 0.00B 1.18MiB 648.00KiB
+ 4459 0.00B 0.00B 996.00KiB
+ 4458 0.00B 0.00B 996.00KiB
+ 4457 0.00B 0.00B 996.00KiB
+ 4456 0.00B 0.00B 996.00KiB
+ 4455 0.00B 0.00B 996.00KiB
+ 4454 0.00B 0.00B 996.00KiB
+ 4452 0.00B 0.00B 996.00KiB
+ 4451 0.00B 0.00B 996.00KiB
+ 4444 0.00B 1.23MiB 996.00KiB
+ 4414 120.00KiB 12.38MiB 1.07MiB
+ 4392 184.00KiB 6.20GiB 1.19MiB
+ 4372 164.00KiB 3.64MiB 4.41MiB
+ 4337 176.00KiB 6.47MiB 4.48MiB
+ 4258 0.00B 1010.53MiB 4.91MiB
+ 4211 0.00B 1.97GiB 4.91MiB
+ 4196 36.00KiB 36.00KiB 5.64MiB
+ 4182 36.00KiB 3.66MiB 5.64MiB
+ 4166 140.00KiB 590.95MiB 5.80MiB
+ 4130 192.00KiB 6.04GiB 5.83MiB
+ 4091 1.75MiB 34.36MiB 7.49MiB
+ 4072 296.00KiB 9.12MiB 8.09MiB
+ 4040 8.96MiB 11.01GiB 16.72MiB
+ 3942 2.31MiB 4.16GiB 8.67MiB
+ 3887 1.59MiB 4.15GiB 27.33MiB
+ 3818 1.22MiB 15.20GiB 27.41MiB
+ 3661 2.43MiB 13.61GiB 27.43MiB
+ 2694 3.19MiB 40.44GiB 27.42MiB
+ 2395 6.55MiB 13.25GiB 62.80MiB
+ 2133 5.99MiB 17.44GiB 119.27MiB
+ 1949 42.48MiB 1.33TiB 166.50MiB
+Size/Cost of snapshots: 77.78MiB Volatility: 0.01%
+```
+Snapshot 2133 introduced 17GiB, where most of them still reside on the system (used by newer snapshot, 2395)
+Thus, deleting snapshot 2133, will only free 6MiB. Snapshot 2133 has 119MiB changed compared to current/ active (258) subvolume.
+When using `-u` argument only the first column has values.
+
+Files result example:
+```
+Possible Unique Files:
+beeshash.dat/ : {4652}
+beescrawl.dat/ : {4652}
+beesstats.txt/ : {4652}
+2708/filelist-2700.txt/ : {259}
+2744/filelist-2741.txt/ : {259}
+2752/filelist-2744.txt/ : {259}
+2795/filelist-2789.txt/ : {259}
+```
+
+
+## Possible expansions:
+
+Calculate the size of metadata block differences.
+Take into consideration inline file extents.
+Why do we recieve the same extent with the same range many times?
diff --git a/README_ENG.md b/README_ENG.md
new file mode 100644
index 0000000..2f5828a
--- /dev/null
+++ b/README_ENG.md
@@ -0,0 +1,300 @@
+# btrfs-subvolume-size-diff-forked
+
+This project is a fork of [`dim-geo`](https://github.com/dim-geo/)'s tool [`btrfs-snapshot-diff`](https://github.com/dim-geo/btrfs-snapshot-diff/) which find the differences between btrfs snapshots, no quota activation in btrfs needed!
+
+The primary enhancement introduced in this fork, is the ability to display subvolume paths alongside their IDs. This makes it significantly easier to identify and manage Btrfs subvolumes, especially when dealing with complex snapshot structures.
+
+[**!! For Cantonese(HK) README, Please Click here!!** This is the ENG version](README_HK.md)
+
+
+
+ Video
+
+https://github.com/user-attachments/assets/3c45d540-4ad1-4976-924c-41aefade0534
+
+
+
+
+## Overview
+
+- [1. Why do you need this tool?](#1-why-do-you-need-this-tool)
+- [2. Original implemented functionality](#2-original-implemented-functionality)
+- [3. The improvements over the original project](#3-the-improvements-over-the-original-project)
+- [4. How it works](#4-how-it-works)
+- [5. Installation](#5-installation)
+- [6. Usage](#6-usage)\
+ - [6.1 Example](#61-example)
+- [7. Versions](#7-versions)
+- [8. License](#8-license)
+- [9. Contributing](#9-contributing)\
+ - [9.1 Possible expansions](#91-possible-expansions)
+
+## 1. Why do you need this tool?
+
+Btrfs, as a CoW filesystem, has a problem identifying the size of a snapshot(/subvolume) and the differences between snapshots(/subvolume).
+
+By using [python-btrfs](https://github.com/knorrie/python-btrfs), it is possible to parse the metadata of a btrfs filesystem and find the differences between subvolumes/snapshots.
+
+## 2. Original implemented functionality:
+
+This tool can approximately identify how much space will be freed when a snapshot(/subvolume) is deleted.
+
+## 3. The improvements over [the original project](https://github.com/dim-geo/btrfs-snapshot-diff/)
+
+- **Subvolume Path Integration**: The main improvement of this fork is that the output now includes the subvolume path, making the information more readable and useful for users managing multiple snapshots.
+- **Line Separation**: Additional line spacing for better readability and maintenance.
+
+
+
+## 4. How it works:
+
+This tool identifies all subvolumes of a btrfs filesystem. For each subvolume all file extents which are not inline are parsed and kept. All other items are ignored.
+
+Thus, we create a tree of extents & ranges together with the snapshots that use them:
+
+1. file extent 1
+ 1. range1: [Snapshot 1, Snapshot 2]
+ 2. range2: [Snapshot 3]
+ 3. ...
+2. file extent 2
+ 1. range1: [...]
+ 2. range2: [...]
+
+Now, the actual disk size of Snapshot 1 can be extracted from each file extent
+
+---
+
+## 5. Installation
+
+### Advanced User (Hey! My python is set up)
+#### Prerequisites
+- BTRFS partition with subvolumes(/snapshots)
+- [python-btrfs](https://github.com/knorrie/python-btrfs) must be installed.
+- Just jump to [Usage](#6-usage)
+----
+### Download this project
+Download this repository to your favourite location. Mines at `$Home/scripts`
+
+1. Create a directory at $USER directory
+> `cd $HOME && mkdir scripts`
+
+2. Clone this repo to `$Home/scripts`
+> `git clone https://github.com/Ramen-LadyHKG/btrfs-subvolume-size-diff-forked.git`
+
+### Make sure python is configured
+
+3. Set up python
+Here, I cannot provide detail setup for python due diffences between distro.
+I recommend you looking for better resources.
+> https://docs.python-guide.org/starting/install3/linux/
+> https://docs.aws.amazon.com/zh_tw/elasticbeanstalk/latest/dg/eb-cli3-install-linux.html
+
+4. I recommend using virtual environment for python
+> For example\
+>> `mkdir $HOME/.python`\
+>> `python -m venv $HOME/.python`\
+>> `source $HOME/.python/bin/activate`\
+>> `python -m pip install --upgrade pip`
+
+5. Install `btrfs` python library
+>> `python -m pip install btrfs`
+
+**OPTIONAL.** Add a `cbsd` (check-btrfs-size-diff) alias of the python script to your Shell env file.
+>> `echo -e "\nalias cbsd='sudo $HOME/scripts/btrfs-subvolume-size-diff-forked/3_check-btrfs-sub-size-diff__with-line.py'" >> $HOME/.zshrc`
+
+DONE
+
+## 6. Usage
+
+Program is single threaded, it could use a lot of memory and it puts a lot of read stress in disks. It could take many minutes. ionice it & monitor its memory usage. Memory usage & execution time depend on the dataset. The program does not perform any write operations. Do not modify subvolume/snapshot during execution. Try not to write any data to any subvolume or execute dedup programs in parallel.
+
+1. Change your shell to the directory of this repo
+>> `cd $HOME/btrfs-subvolume-size-diff-forked`
+
+2. Run the script w/wo options
+>> `3_check-btrfs-sub-size-diff__with-line.py [-u] [-f] [-r ] /path/to/btrfs/ [ -i | -o ] [ ]`
+
+ **OPTIONAL**
+ if you've DONE the alias
+>> `cbsd /path/to/btrfs/mount/`
+
+`-u` calculates the unique data occupied by each snapshot. Thus, `-r` makes no sense. Specifying subvolumes to ignore can mess with `-u` results because the specified subvolume data will not be parsed!
+`-f` finds the files that might contribute to the unique extents.
+`-i` makes the program to ignore the specified subvolumes, which is also the default behaviour if no `-i` or `-o` argument is specified but subvolumes are given.
+`-o` makes the program to analyze only the specified subvolumes.
+
+You can find subvolume ids by using:
+`btrfs subvolume list /path/to/btrfs`
+
+### 6.1. Example:
+
+`sudo btrfs subvolume list --sort=-rootid /`:
+```
+ID 18085 gen 526073 top level 267 path @home/.snapshots/2/snapshot
+ID 18084 gen 526071 top level 263 path @snapshots/1827/snapshot
+ID 18083 gen 526069 top level 263 path @snapshots/1826/snapshot
+ID 18082 gen 526069 top level 267 path @home/.snapshots/1/snapshot
+ID 18065 gen 525568 top level 263 path @snapshots/1825/snapshot
+ID 17994 gen 523504 top level 263 path @snapshots/1803/snapshot
+ID 17992 gen 523427 top level 263 path @snapshots/1801/snapshot
+ID 14424 gen 513650 top level 14421 path flatpak-installs
+ID 14423 gen 525575 top level 14421 path var/tmp
+ID 14422 gen 514035 top level 14421 path usr/share/waydroid-extra
+ID 14421 gen 526112 top level 5 path @
+ID 2722 gen 523512 top level 5 path @opt
+ID 2711 gen 526112 top level 257 path @home/curie/.cache
+ID 2563 gen 513947 top level 5 path @var_lib_libvirt_images
+ID 267 gen 526073 top level 257 path @home/.snapshots
+ID 264 gen 513650 top level 5 path @swap
+ID 263 gen 526072 top level 5 path @snapshots
+ID 262 gen 513650 top level 5 path @var_tmp
+ID 261 gen 526112 top level 5 path @var_log
+ID 260 gen 526094 top level 5 path @var_cache
+ID 259 gen 513650 top level 5 path @srv
+ID 258 gen 525924 top level 5 path @rootf
+ID 257 gen 526112 top level 5 path @home
+
+```
+
+### Example Usage
+
+Choose `14421` as active root partition(/subvolume/snapshot) to compare
+
+options `-r` is explained in help
+> ` -r ROOT, --root ROOT current active subvolume to analyze first, default is 5`
+```bash
+$ `sudo ./check-btrfs-sub-size-diff_3__with-line.py -r 14421/`
+
+Subvolumes to parse: [14421, 18085, 18084, 18083, 18082, 18065, 17994, 17992, 14424, 14423, 14422, 2722, 2711, 2563, 267, 264, 263, 262, 261, 260, 259, 258, 257, 5]
+Parsing subvolume: 14421
+Parsing subvolume: 18085
+Parsing subvolume: 18084
+Parsing subvolume: 18083
+Parsing subvolume: 18082
+Parsing subvolume: 18065
+Parsing subvolume: 17994
+Parsing subvolume: 17992
+Parsing subvolume: 14424
+Parsing subvolume: 14423
+Parsing subvolume: 14422
+Parsing subvolume: 2722
+Parsing subvolume: 2711
+Parsing subvolume: 2563
+Parsing subvolume: 267
+Parsing subvolume: 264
+Parsing subvolume: 263
+Parsing subvolume: 262
+Parsing subvolume: 261
+Parsing subvolume: 260
+Parsing subvolume: 259
+Parsing subvolume: 258
+Parsing subvolume: 257
+Parsing subvolume: 5
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| | |Unique File Extents| Extents added ontop|Extents added ontop of|
+| | | per subvolume|of previous subvolume|current(act) subvolume|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| Path| SubvolumId| Size| Size| Size|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @| 14421| 68.00KiB| 24.34GiB| 24.37GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots/2/snapshot| 18085| 116.00KiB| 32.49GiB| 32.49GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1827/snapshot| 18084| 0.00B| 0.00B| 68.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1826/snapshot| 18083| 0.00B| 24.34GiB| 68.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots/1/snapshot| 18082| 216.00KiB| 32.49GiB| 32.49GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1825/snapshot| 18065| 2.75MiB| 144.95MiB| 2.79MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1803/snapshot| 17994| 3.04MiB| 45.61MiB| 58.10MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1801/snapshot| 17992| 43.60MiB| 24.28GiB| 98.66MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| flatpak-installs| 14424| 8.00KiB| 8.00KiB| 8.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| var/tmp| 14423| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| usr/share/waydroid-extra| 14422| 2.29GiB| 2.29GiB| 2.29GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @opt| 2722| 10.65GiB| 10.65GiB| 10.65GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/curie/.cache| 2711| 11.14GiB| 11.14GiB| 11.14GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_lib_libvirt_images| 2563| 691.54MiB| 691.54MiB| 691.54MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots| 267| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @swap| 264| 8.00GiB| 8.00GiB| 8.00GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots| 263| 48.00KiB| 48.00KiB| 48.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_tmp| 262| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_log| 261| 4.25GiB| 4.25GiB| 4.25GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_cache| 260| 52.64MiB| 52.64MiB| 52.64MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @srv| 259| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @rootf| 258| 192.73MiB| 192.73MiB| 192.73MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home| 257| 14.77MiB| 32.52GiB| 32.50GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| ///// Top-Level(ID:5) /////| 5| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+
+Unique Data size of subvolumes: 37.31GiB Total size: 94.22GiB Volatility: 39.60%
+```
+
+#### Explain
+Snapshot `1801` introduced `24.28GiB`, where most of them still reside on the system (used by newer snapshot, `1803`)
+Thus, deleting snapshot `1801`, will only free `43.6MiB`. Snapshot `1801` has `98.66MiB` changed compared to current/ active (`14421`) subvolume.
+
+When using `-u` argument only the first column has values.
+
+## 7. Versions
+
+1. **0_check-btrfs-sub-size-diff__original.py** - Original version.
+2. **1_check-btrfs-sub-size-diff__prototype.sh** - Prototype version.
+3. **2_check-btrfs-sub-size-diff__no-line.py** - Version without line spacing.
+4. **3_check-btrfs-sub-size-diff__with-line.py** - Version with line spacing.
+
+### History / Differences
+**Version 1**
+ > Not much, simply cloned the original project and added a bash script to run `sudo btrfs subvolume list $1` before and after the python script.\
+ >> -Requires the [the original python script](0_check-btrfs-sub-size-diff__original.py) to function.\
+ >> -Cannot use the advanced features of [the original python script](0_check-btrfs-sub-size-diff__original.py).
+ >>> `./1_check-btrfs-sub-size-diff__prototype.sh /path/to/btrfs/mount/point` is the only usage.\
+ > \
+> 
+
+**Version 2**
+ > Fully implemented `sudo btrfs subvolume list $1` printing into the python script. It shows the path of the subvolume alongside the subvolume ID.\
+ >> +All features from [the original python script](0_check-btrfs-sub-size-diff__original.py) are intact.
+ >>> 
+
+**Version 3**
+ > Enhanced the output format for better readability, use line spacing to ensure consistent column alignment.\
+ >> +All features from [the original pythonscript](0_check-btrfs-sub-size-diff__original.py) are intact.
+ >>> 
+
+---
+
+## 8. License
+
+This project is licensed under the GNU General Public License (GPL) Version 3, 29 June 2007. See the [LICENSE](LICENSE) file for more details.
+
+## 9. Contributing
+
+Feel free to open issues or submit pull requests if you encounter any bugs or have suggestions for further improvements!
+
+### 9.1 Possible expansions:
+
+Calculate the size of metadata block differences.
+Take into consideration inline file extents.
+Why do we recieve the same extent with the same range many times?
+
+Since there're only minor changes, this fork project might merge into [the original project](https://github.com/dim-geo/btrfs-snapshot-diff/).
diff --git a/README_HK.md b/README_HK.md
new file mode 100644
index 0000000..f9417e1
--- /dev/null
+++ b/README_HK.md
@@ -0,0 +1,297 @@
+# btrfs-subvolume-size-diff-forked
+
+依個項目係 [`dim-geo`](https://github.com/dim-geo/)開發嘅工具 [`btrfs-snapshot-diff`](https://github.com/dim-geo/btrfs-snapshot-diff/) 嘅一個fork。佢可以比較 btrfs snapshot 之間嘅差異,唔需要啟用 btrfs 嘅quota功能!
+
+依個fork主要改進咗可以顯示 subvolume 嘅路徑同佢哋嘅 ID。咁樣可以更加容易分析同管理 Btrfs subvolumes,特別係處理複雜嘅snapshot結構時。
+
+[**!! For English README, Please Click here!!** This is the Cantonese(Hong Kong) version](README_ENG.md)
+
+
+ 影片
+
+https://github.com/user-attachments/assets/3c45d540-4ad1-4976-924c-41aefade0534
+
+
+
+
+## 概覽
+
+- [1. 點解你會需要依個工具?](#1-點解你會需要依個工具)
+- [2. 原版本嘅功能](#2-原版本嘅功能)
+- [3. 相比原版本嘅改進](#3-相比原版本嘅改進)
+- [4. 點樣運作](#4-點樣運作)
+- [5. 如何安裝](#5-如何安裝)
+- [6. 使用方式](#6-使用方式)
+ - [6.1 範例](#61-範例)
+- [7. 版本](#7-版本)
+- [8. 授權](#8-授權)
+- [9. 貢獻](#9-貢獻)
+ - [9.1 可能嘅擴展](#91-可能嘅擴展)
+
+## 1. 點解你會需要依個工具?
+
+Btrfs 係一個 CoW (Copy on Write)檔案系統,對於分析snapshot(/subvolume)嘅大小同snapshot之間嘅差異有問題。
+
+通過使用 [python-btrfs](https://github.com/knorrie/python-btrfs),可以解析 btrfs 檔案系統嘅中繼數據,同時搵出 subvolumes/snapshots 之間嘅差異。
+
+## 2. 原版本嘅功能:
+
+依個工具可以大概分析當一個 snapshot(/subvolume) 被刪除時,會釋放幾多空間。
+
+## 3. 相比 [原版本](https://github.com/dim-geo/btrfs-snapshot-diff/) 嘅改進
+
+- **Subvolume 路徑集成**: 依個fork嘅主要改進係輸出包括 subvolume 路徑,令資訊更加易讀,對於管理多個snapshot嘅用戶更加有用。
+- **行間分隔**: 增加行間距,提高可讀性同維護性。
+
+ >>> 
+
+## 4. 點樣運作:
+
+依個工具分析 btrfs 檔案系統中嘅所有 subvolumes。對於每個 subvolume,解析並保存所有唔係 inline 嘅檔案範圍。所有其他項目會被忽略。
+
+之後,我哋建立一個snapshot延伸或範圍內使用嘅樹狀架構:
+
+1. 檔案延伸 1
+ 1. 範圍 1: [Snapshot 1, Snapshot 2]
+ 2. 範圍 2: [Snapshot 3]
+ 3. ...
+2. 檔案延伸 2
+ 1. 範圍 1: [...]
+ 2. 範圍 2: [...]
+
+咁樣,就可以從每個檔案範圍中提取 Snapshot 1 嘅實際磁碟大小。
+
+---
+
+## 5. 如何安裝
+
+### 進階用戶 (Hey! 我嘅 python 已經設置好)
+#### 前置條件
+- BTRFS 分區有 subvolumes(/snapshots)
+- 需要安裝 [python-btrfs](https://github.com/knorrie/python-btrfs)。
+- 直接跳到 [使用方式](#6-使用方式)
+----
+### 下載依個項目
+將依個倉庫下載到你嘅喜愛位置。我放喺 `$Home/scripts`
+
+1. 喺 $USER 資料夾下創建一個資料夾
+> `cd $HOME && mkdir scripts`
+
+2. 克隆依個 repo 到 `$Home/scripts`
+> `git clone https://github.com/Ramen-LadyHKG/btrfs-subvolume-size-diff-forked.git`
+
+### 確保 python 配置好
+
+3. 設置 python
+呢度我無法提供詳細嘅 python 設置因為唔同發行版之間有差異。
+我建議你搵啲更好嘅資源。
+> https://docs.python-guide.org/starting/install3/linux/
+> https://docs.aws.amazon.com/zh_tw/elasticbeanstalk/latest/dg/eb-cli3-install-linux.html
+
+4. 我建議使用虛擬環境來設置 python
+> 例如\
+>> `mkdir $HOME/.python`\
+>> `python -m venv $HOME/.python`\
+>> `source $HOME/.python/bin/activate`\
+>> `python -m pip install --upgrade pip`
+
+5. 安裝 `btrfs` python library
+>> `python -m pip install btrfs`
+
+**可選。** 將 `cbsd` (check-btrfs-size-diff) 別名添加到你嘅 Shell 環境檔案。
+>> `echo -e "\nalias cbsd='sudo $HOME/scripts/btrfs-subvolume-size-diff-forked/3_check-btrfs-sub-size-diff__with-line.py'" >> $HOME/.zshrc`
+
+完成
+
+## 6. 使用方式
+
+程序係單線程,可能會用好多內存,對磁碟有較大嘅讀取壓力。可能需要幾分鐘時間。使用 ionice 同監控佢嘅內存使用情況。內存使用同執行時間取決於數據集。程序唔進行任何寫入操作。執行期間唔好修改 subvolume/snapshot。避免寫數據到任何 subvolume 或平行執行 dedup 程序。
+
+1. 將 shell 轉到依個 repo 嘅資料夾
+>> `cd $HOME/btrfs-subvolume-size-diff-forked`
+
+2. 運行腳本時有或無選項
+>> `3_check-btrfs-sub-size-diff__with-line.py [-u] [-f] [-r ] /path/to/btrfs/ [ -i | -o ] [ ]`
+
+ **可選**
+ 如果你已經完成咗別名設置
+>> `cbsd /path/to/btrfs/mount/`
+
+`-u` 計算每個 snapshot 所佔用嘅唯一數據。因此,`-r` 就無意義。指定要忽略嘅 subvolumes 可能會影響 `-u` 嘅結果,因為指定嘅 subvolume 數據唔會被解析!\
+`-f` 尋找可能貢獻到唯一範圍嘅檔案。\
+`-i` 令程序忽略指定嘅 subvolumes,依個係默認行為,如果唔指定 `-i` 或 `-o` 參數但有提供 subvolumes 嘅情況下。\
+`-o` 令程序只分析指定嘅 subvolumes。
+
+你可以通過以下命令找到 subvolume ID:
+`btrfs subvolume list /path/to/btrfs`
+
+### 6.1. 範例:
+
+`sudo btrfs subvolume list --sort=-rootid /`:
+```
+ID 18085 gen 526073 top level 267 path @home/.snapshots/2/snapshot
+ID 18084 gen 526071 top level 263 path @snapshots/1827/snapshot
+ID 18083 gen 526069 top level 263 path @snapshots/1826/snapshot
+ID 18082 gen 526069 top level 267 path @home/.snapshots/1/snapshot
+ID 18065 gen 525568 top level 263 path @snapshots/1825/snapshot
+ID 17994 gen 523504 top level 263 path @snapshots/1803/snapshot
+ID 17992 gen 523427 top level 263 path @snapshots/1801/snapshot
+ID 14424 gen 513650 top level 14421 path flatpak-installs
+ID 14423 gen 525575 top level 14421 path var/tmp
+ID 14422 gen 514035 top level 14421 path usr/share/waydroid-extra
+ID 14421 gen 526112 top level 5 path @
+ID 2722 gen 523512 top level 5 path @opt
+ID 2711 gen 526112 top level 257 path @home/curie/.cache
+ID 2563 gen 513947 top level 5 path @var_lib_libvirt_images
+ID 267 gen 526073 top level 257 path @home/.snapshots
+ID 264 gen 513650 top level 5 path @swap
+ID 263 gen 526072 top level 5 path @snapshots
+ID 262 gen 513650 top level 5 path @var_tmp
+ID 261 gen 526112 top level 5 path @var_log
+ID 260 gen 526094 top level 5 path @var_cache
+ID 259 gen 513650 top level 5 path @srv
+ID 258 gen 525924 top level 5 path @rootf
+ID 257 gen 526112 top level 5 path @home
+
+```
+
+### 使用範例
+
+選擇 `14421` 作為(使用中)嘅 root partition(/subvolume/snapshot) 去比較
+
+選項 `-r` 嘅作用同使用方式喺help有說明
+> ` -r ROOT, --root ROOT current active subvolume to analyze first, default is 5`
+```bash
+$ `sudo ./check-btrfs-sub-size-diff_3__with-line.py -r 14421/`
+
+Subvolumes to parse: [14421, 18085, 18084, 18083, 18082, 18065, 17994, 17992, 14424, 14423, 14422, 2722, 2711, 2563, 267, 264, 263, 262, 261, 260, 259, 258, 257, 5]
+Parsing subvolume: 14421
+Parsing subvolume: 18085
+Parsing subvolume: 18084
+Parsing subvolume: 18083
+Parsing subvolume: 18082
+Parsing subvolume: 18065
+Parsing subvolume: 17994
+Parsing subvolume: 17992
+Parsing subvolume: 14424
+Parsing subvolume: 14423
+Parsing subvolume: 14422
+Parsing subvolume: 2722
+Parsing subvolume: 2711
+Parsing subvolume: 2563
+Parsing subvolume: 267
+Parsing subvolume: 264
+Parsing subvolume: 263
+Parsing subvolume: 262
+Parsing subvolume: 261
+Parsing subvolume: 260
+Parsing subvolume: 259
+Parsing subvolume: 258
+Parsing subvolume: 257
+Parsing subvolume: 5
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| | |Unique File Extents| Extents added ontop|Extents added ontop of|
+| | | per subvolume|of previous subvolume|current(act) subvolume|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| Path| SubvolumId| Size| Size| Size|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @| 14421| 68.00KiB| 24.34GiB| 24.37GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots/2/snapshot| 18085| 116.00KiB| 32.49GiB| 32.49GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1827/snapshot| 18084| 0.00B| 0.00B| 68.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1826/snapshot| 18083| 0.00B| 24.34GiB| 68.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots/1/snapshot| 18082| 216.00KiB| 32.49GiB| 32.49GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1825/snapshot| 18065| 2.75MiB| 144.95MiB| 2.79MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1803/snapshot| 17994| 3.04MiB| 45.61MiB| 58.10MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots/1801/snapshot| 17992| 43.60MiB| 24.28GiB| 98.66MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| flatpak-installs| 14424| 8.00KiB| 8.00KiB| 8.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| var/tmp| 14423| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| usr/share/waydroid-extra| 14422| 2.29GiB| 2.29GiB| 2.29GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @opt| 2722| 10.65GiB| 10.65GiB| 10.65GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/curie/.cache| 2711| 11.14GiB| 11.14GiB| 11.14GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_lib_libvirt_images| 2563| 691.54MiB| 691.54MiB| 691.54MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home/.snapshots| 267| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @swap| 264| 8.00GiB| 8.00GiB| 8.00GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @snapshots| 263| 48.00KiB| 48.00KiB| 48.00KiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_tmp| 262| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_log| 261| 4.25GiB| 4.25GiB| 4.25GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @var_cache| 260| 52.64MiB| 52.64MiB| 52.64MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @srv| 259| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @rootf| 258| 192.73MiB| 192.73MiB| 192.73MiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| @home| 257| 14.77MiB| 32.52GiB| 32.50GiB|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+| ///// Top-Level(ID:5) /////| 5| 0.00B| 0.00B| 0.00B|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+|-------------------------------|-----------|-------------------|---------------------|----------------------|
+
+Unique Data size of subvolumes: 37.31GiB Total size: 94.22GiB Volatility: 39.60%
+```
+
+#### Explain
+Snapshot `1801` 増加咗 `24.28GiB`, 然而大部份嘅佔用仲保留喺檔案系統入面 (可能佔用係嚟自新嘅 snapshot, `1803`)
+因此刪除 snapshot `1801`,只會釋放出 `43.6MiB`嘅空間。 Snapshot `1801` 有 `98.66MiB` 嘅改變(不能共用嘅檔案資源)對比目前/使用中嘅 (`14421`) subvolume.
+
+當使用 `-u` 引數時時,只有第一列會有數值。
+
+## 7. 版本
+
+1. **0_check-btrfs-sub-size-diff__original.py** - 原始版本。
+2. **1_check-btrfs-sub-size-diff__prototype.sh** - 原型版本。
+3. **2_check-btrfs-sub-size-diff__no-line.py** - 無行間距版本。
+4. **3_check-btrfs-sub-size-diff__with-line.py** - 有行間距版本。
+
+### 歷史 / 差異
+**版本 1**
+ > 唔多改動,單純克隆咗原版本,並加咗一個 bash 腳本喺 python 腳本之前同之後執行 `sudo btrfs subvolume list $1`。\
+ >> -需要 [原版本 python 腳本](0_check-btrfs-sub-size-diff__original.py) 才能夠運作。\
+ >> -唔能夠使用 [原版本 python 腳本](0_check-btrfs-sub-size-diff__original.py) 嘅進階功能。
+ >>> `./1_check-btrfs-sub-size-diff__prototype.sh /path/to/btrfs/mount/` 係唯一使用方法。\
+ > \
+> 
+
+**版本 2**
+ > 完全實現咗 `sudo btrfs subvolume list $1` 嘅輸出到 python 腳本入面。佢會顯示 subvolume 嘅路徑同 subvolume ID。\
+ >> + [原始 python 腳本](0_check-btrfs-sub-size-diff__original.py) 嘅所有功能都完好保留。
+ >>> 
+
+**版本 3**
+ > 改進咗輸出格式,提升可讀性,用行距保持列嘅對齊一致。\
+ >> + [原始 python 腳本](0_check-btrfs-sub-size-diff__original.py) 嘅所有功能都完好保留。
+ >>> 
+
+## 8. 授權
+
+依個項目係根據 GNU 通用公共許可證(GPL)第 3 版,2007 年 6 月 29 日發佈。詳細內容見 [LICENSE](LICENSE) 檔案。
+
+## 9. 貢獻
+
+如果你遇到任何漏洞或有進一步改進嘅建議,歡迎提出問題或提交 pull requests!
+
+### 9.1 可能嘅擴展:
+
+計算中繼數據區塊嘅差異大小。
+考慮到 inline 檔案區間。
+點解我哋會多次收到相同範圍嘅相同區段?
+
+由於只係有細微改動,呢個 fork 項目可能會合併返入 [原始項目](https://github.com/dim-geo/btrfs-snapshot-diff/)。