Documentation
¶
Overview ¶
Package db is responsible for creating and managing the underlying data storage required by ajfs as a single file.
Index ¶
- Constants
- Variables
- func FixDatabase(out io.Writer, dbPath string, dryRun bool, bakPath string) error
- func RestoreDatabaseHeader(dbPath string, bakPath string) error
- type DatabaseFile
- func (dbf *DatabaseFile) BuildHashStrToIndexMap() (HashStrToIndexMap, error)
- func (dbf *DatabaseFile) BuildIdToHashMap() (IdToHashMap, error)
- func (dbf *DatabaseFile) BuildIdToInfoMap() (IdToInfoMap, error)
- func (dbf *DatabaseFile) CalculateHashTableStats() (HashTableStats, error)
- func (dbf *DatabaseFile) CalculateStats() (Stats, error)
- func (dbf *DatabaseFile) Close() error
- func (dbf *DatabaseFile) EntriesCount() int
- func (dbf *DatabaseFile) EntriesNeedHashing(fn NeedHashingFn) error
- func (dbf *DatabaseFile) Features() FeatureFlags
- func (dbf *DatabaseFile) FileEntriesCount() int
- func (dbf *DatabaseFile) FindDuplicateHashes() (DuplicateHashes, error)
- func (dbf *DatabaseFile) FindDuplicates(fn FindDuplicatesFn) error
- func (dbf *DatabaseFile) FindEntryIndexAndOffset(id path.Id) (EntryIndexAndOffset, error)
- func (dbf *DatabaseFile) FinishEntries() error
- func (dbf *DatabaseFile) FinishHashTable() error
- func (dbf *DatabaseFile) Flush() error
- func (dbf *DatabaseFile) HashTableAlgo() (ajhash.Algo, error)
- func (dbf *DatabaseFile) Interrupted() error
- func (dbf *DatabaseFile) Meta() MetaEntry
- func (dbf *DatabaseFile) Path() string
- func (dbf *DatabaseFile) ReadAllEntries(fn ReadAllEntriesFn) error
- func (dbf *DatabaseFile) ReadAllEntriesWithHashes(fn ReadAllEntriesWithHashesFn) error
- func (dbf *DatabaseFile) ReadEntryAtIndex(idx int) (path.Info, error)
- func (dbf *DatabaseFile) ReadEntryWithId(id path.Id) (path.Info, error)
- func (dbf *DatabaseFile) ReadHashTable() (HashTable, error)
- func (dbf *DatabaseFile) ReadHashTableEntries(fn ReadHashTableEntryFn) error
- func (dbf *DatabaseFile) RootPath() string
- func (dbf *DatabaseFile) StartHashTable(algo ajhash.Algo) error
- func (dbf *DatabaseFile) VerifyChecksums() error
- func (dbf *DatabaseFile) Version() int
- func (dbf *DatabaseFile) WriteEntry(pi *path.Info) error
- func (dbf *DatabaseFile) WriteHashEntry(idx int, hash []byte) error
- type DuplicateHashes
- type EntryIndexAndOffset
- type FeatureFlags
- type FindDuplicatesFn
- type HashStrToIndexMap
- type HashTable
- type HashTableStats
- type IdToHashMap
- type IdToInfoMap
- type MetaEntry
- type NeedHashingFn
- type ReadAllEntriesFn
- type ReadAllEntriesWithHashesFn
- type ReadHashTableEntryFn
- type Stats
Constants ¶
const ( FeatureJustEntries = 0 // Contains no extra features. Only path info entries. FeatureHashTable = 1 << iota // Contains the calculated file hash signatures for the path objects. )
Variables ¶
var ErrInvalidChecksum = errors.New("ajfs database file does not match the stored checksum")
var ErrNotFound = errors.New("path entry not found")
ErrNotFound is returned when a path entry could not be found in the database.
var (
SkipAll = fs.SkipAll
)
Functions ¶
func FixDatabase ¶
Attempts to repair a damaged database. out is used to display information to the user (normally routed to STDOUT). Things to be fixed will be prefixed with >>. path is the file path to an existing database file. dryRun when set to true will only output issues to the output writer and not make any changes. bakPath path to where the backup file will be created. NOTE: only the headers are saved.
func RestoreDatabaseHeader ¶
Restore the headers from a backup file.
Types ¶
type DatabaseFile ¶
type DatabaseFile struct {
// contains filtered or unexported fields
}
DatabaseFile is the underlying data storage used by ajfs as a single file.
NOTE: The order of operations during the creation process is very important: - CreateDatabase - n * Write - [features] - Finish - Close .
func CreateDatabase ¶
func CreateDatabase(path string, root string, features FeatureFlags) (*DatabaseFile, error)
Create a new file If the file already exists then an error will be returned. path is the file path at which the database file will be created. root is the file path that the database will represents and that will be used to scan the file hierarchy. features indicate the expected features that will be present in the database.
func OpenDatabase ¶
func OpenDatabase(path string) (*DatabaseFile, error)
Open an existing database file (as read-only) and check the signature is valid and the version is supported.
func ResumeDatabase ¶
func ResumeDatabase(path string) (*DatabaseFile, error)
Open an existing database file (read-write) to resume processing of extra features.
func (*DatabaseFile) BuildHashStrToIndexMap ¶
func (dbf *DatabaseFile) BuildHashStrToIndexMap() (HashStrToIndexMap, error)
Build a map from a hash encoded string to the path entry index.
func (*DatabaseFile) BuildIdToHashMap ¶
func (dbf *DatabaseFile) BuildIdToHashMap() (IdToHashMap, error)
Build a map from a path's identifier to the file signature hash.
func (*DatabaseFile) BuildIdToInfoMap ¶
func (dbf *DatabaseFile) BuildIdToInfoMap() (IdToInfoMap, error)
Build a map from a path's identifier to the path info entry.
func (*DatabaseFile) CalculateHashTableStats ¶
func (dbf *DatabaseFile) CalculateHashTableStats() (HashTableStats, error)
Calculate statistics for the hash table.
func (*DatabaseFile) CalculateStats ¶
func (dbf *DatabaseFile) CalculateStats() (Stats, error)
Calculate statistics on the database.
func (*DatabaseFile) Close ¶
func (dbf *DatabaseFile) Close() error
Sync pending writes and close the file.
func (*DatabaseFile) EntriesCount ¶
func (dbf *DatabaseFile) EntriesCount() int
The number of path info entries.
func (*DatabaseFile) EntriesNeedHashing ¶
func (dbf *DatabaseFile) EntriesNeedHashing(fn NeedHashingFn) error
Look at the hash table and call the passed function for each entry that need the file signature has to be still calculated.
func (*DatabaseFile) Features ¶
func (dbf *DatabaseFile) Features() FeatureFlags
Features present in the database.
func (*DatabaseFile) FileEntriesCount ¶
func (dbf *DatabaseFile) FileEntriesCount() int
The number of path info entries that are files.
func (*DatabaseFile) FindDuplicateHashes ¶
func (dbf *DatabaseFile) FindDuplicateHashes() (DuplicateHashes, error)
Find all the hashes that are duplicates with the indices to those path info entries.
func (*DatabaseFile) FindDuplicates ¶
func (dbf *DatabaseFile) FindDuplicates(fn FindDuplicatesFn) error
Find duplicate file entries that share the same file signature hash.
func (*DatabaseFile) FindEntryIndexAndOffset ¶
func (dbf *DatabaseFile) FindEntryIndexAndOffset(id path.Id) (EntryIndexAndOffset, error)
Lookup the index and offset for a path entry with the specified identifier. Returns ErrNotFound if the entry does not exist.
func (*DatabaseFile) FinishEntries ¶
func (dbf *DatabaseFile) FinishEntries() error
Write the entries offset table after all path info objects have been written.
func (*DatabaseFile) FinishHashTable ¶
func (dbf *DatabaseFile) FinishHashTable() error
Finish writing the hash table.
func (*DatabaseFile) Flush ¶
func (dbf *DatabaseFile) Flush() error
Ensure unwritten data is written to the file on disk.
func (*DatabaseFile) HashTableAlgo ¶
func (dbf *DatabaseFile) HashTableAlgo() (ajhash.Algo, error)
Read the hash table header and return the hashing algorithm used.
func (*DatabaseFile) Interrupted ¶
func (dbf *DatabaseFile) Interrupted() error
Called when the app has to shutdown before the database could be created. This will remove the database file.
func (*DatabaseFile) ReadAllEntries ¶
func (dbf *DatabaseFile) ReadAllEntries(fn ReadAllEntriesFn) error
Read all the path info objects from the database and call the callback function. If the callback function returns SkipAll then the reading process will be stopped and nil will be returned as the error.
func (*DatabaseFile) ReadAllEntriesWithHashes ¶
func (dbf *DatabaseFile) ReadAllEntriesWithHashes(fn ReadAllEntriesWithHashesFn) error
Read all the path info objects along with their file signature hash from the database and call the callback function. If the callback function returns SkipAll then the reading process will be stopped and nil will be returned as the error.
func (*DatabaseFile) ReadEntryAtIndex ¶
func (dbf *DatabaseFile) ReadEntryAtIndex(idx int) (path.Info, error)
Read the path info object with the specified index.
func (*DatabaseFile) ReadEntryWithId ¶
Read the path info object with the specified identifier. Returns ErrNotFound if the entry does not exist.
func (*DatabaseFile) ReadHashTable ¶
func (dbf *DatabaseFile) ReadHashTable() (HashTable, error)
Read the hash table. Will only contain the entries for which a file signature hash was calculated.
func (*DatabaseFile) ReadHashTableEntries ¶
func (dbf *DatabaseFile) ReadHashTableEntries(fn ReadHashTableEntryFn) error
Read all hash table entries from the database and call the callback function. If the callback function returns SkipAll then the reading process will be stopped and nil will be returned as the error.
func (*DatabaseFile) RootPath ¶
func (dbf *DatabaseFile) RootPath() string
The file path that the database represents and that was used to scan the file hierarchy.
func (*DatabaseFile) StartHashTable ¶
func (dbf *DatabaseFile) StartHashTable(algo ajhash.Algo) error
Start writing the initial hash table.
func (*DatabaseFile) VerifyChecksums ¶
func (dbf *DatabaseFile) VerifyChecksums() error
Check the database file integrity and return ErrInvalidChecksum if the checksum does not match.
func (*DatabaseFile) WriteEntry ¶
func (dbf *DatabaseFile) WriteEntry(pi *path.Info) error
Write the path info to the database.
func (*DatabaseFile) WriteHashEntry ¶
func (dbf *DatabaseFile) WriteHashEntry(idx int, hash []byte) error
Write the file hash signature for the path info object with the specified index in the database. idx Index of the path info object. hash The file hash signature.
type DuplicateHashes ¶
Duplicate hashes is a map from the hash (as hex encoded string) to all the indices of path info entries that share the same file signature hash.
type EntryIndexAndOffset ¶
type FeatureFlags ¶
type FeatureFlags uint16
func (FeatureFlags) HasHashTable ¶
func (f FeatureFlags) HasHashTable() bool
type FindDuplicatesFn ¶
FindDuplicatesFn will be called by FindDuplicates for each duplicate file that was found. group Each of the same duplicates will belong to the same group. idx Is the index of the entry. pi Is the path info object. hash Is the file signature hash (as a hex encoded string). Return SkipAll to stop reading all the entries.
type HashStrToIndexMap ¶
Map from a hash encoded string to the path entry index.
type HashTableStats ¶
type HashTableStats struct {
HashedCount uint64 // number of entries that have a calculated hash
PendingCount uint64 // number of entries that still need to be calculated
DupesCount uint64 // number of duplicate files found
TotalDupeSize uint64 // total bytes of space used by found duplicates
SaveDupeSize uint64 // total bytes that could be saved after removing duplicates
}
Stats used to calculate statistics on the hash table.
type IdToHashMap ¶
Map from a path's identifier to the file signature hash.
type IdToInfoMap ¶
Map from a path's identifier to the path info entry.
type MetaEntry ¶
type MetaEntry struct {
// The following fields will be written as the size of the data varint followed by the encoded form of the data
Tool string `json:"tool"` // The tool used to create or update the database (e.g ajfs: v1.0.42)
OS string `json:"os"` // The operating system (e.g. darwin, linux, windows etc.)
Arch string `json:"arch"` // The architecture (e.g. arm64 etc.)
CreatedAt time.Time `json:"createdAt"` // Time of database creation (this is captured instead of relying on the file system time)
}
Meta info about how the database was created.
type NeedHashingFn ¶
Called by EntriesNeedHashing. idx Is the index of the path info entry that need it's file signature hash to be calculated. pi The path info entry in the database. Call WriteHashEntry with the calculated hash. Return SkipAll to stop processing.
type ReadAllEntriesFn ¶
ReadAllEntriesFn will be called by ReadAllEntries for each entry that was read from the database. idx Is the index of the entry. pi Is the path info object. Return SkipAll to stop reading all the entries.
type ReadAllEntriesWithHashesFn ¶
ReadAllEntriesWithHashesFn will be called by ReadAllEntriesWithHashes for each entry that was read from the database. idx Is the index of the entry. pi Is the path info object. hash Is the file signature hash. Return SkipAll to stop reading all the entries.
type ReadHashTableEntryFn ¶
ReadHashTableEntryFn will be called by ReadHashTableEntries for each hash table entry that was read from the database. idx Is the index of the hash table entry which also maps 1:1 to the path entry index. hash Is the file hash signature. Return SkipAll to stop reading further entries.
type Stats ¶
type Stats struct {
DirCount uint64 // total number of directories
FileCount uint64 // total number of files
TotalFileSize uint64 // total size of files all summed together
AvgFileSize uint64 // totalFileSize / fileCount
MaxFileSize uint64 // the biggest single file size
}
Stats is used to calculate statistics on the database.