1 // Written in the D programming language.
2 /**
3 Types that handle the core logic of archive file formats.
4 
5 Copyright: Copyright Richard W Laughlin Jr. 2014—2016
6 
7 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
8 
9 Authors: Richard W Laughlin Jr.
10 
11 Source: http://github.com/rcythr/archive
12 
13 */
14 
15 module archive.core;
16 
17 private import std.container;
18 private import std..string;
19 
20 /**
21  * The common template for all archives. Each archive format is implemented as a Policy class which supplies necessary
22  * types and methods specific to that format. Reference this class to find the methods available to all archives, but use
23  * the docs for your specific format to find methods/members available for your specific format.
24  */
25 public class Archive(T, Filter = NullArchiveFilter)
26 {    
27     /**
28      * Alias to allow for easy referencing the proper archive File member type for Policy.
29      */
30     alias File = T.FileImpl;
31     
32     /**
33      * Alias to allow for easy referencing the proper archive Directory member type for Policy.
34      */
35     alias Directory = T.DirectoryImpl;
36     
37     static if(T.hasProperties)
38     {
39         /**
40         * (Optional) Alias to allow for easy referencing the proper archive Properties type for Policy.
41         *       e.g. Tar archives do not have any archive-wide properties, while zip files have an archive comment.
42         */
43         alias Properties = T.Properties;
44     }
45     
46     /**
47     Constructor for archives which initializes the archive with the contents of the serialized archive
48     stored in data.
49     Params:
50         data = serialized data of an archive in the proper format for the Policy.
51      */
52     public this(void[] data) 
53     {
54         // Cannot call this() because it may not be defined.
55         root = new Directory(); 
56         static if(T.hasProperties)
57             properties = new Properties();
58         T.deserialize(Filter.decompress(data), this);
59     }
60    
61     static if(!T.isReadOnly)
62     {
63         /**
64          * Constructor for read/write archives which does not require serialized data to create.
65          */
66         public this() 
67         { 
68             root = new Directory(); 
69             static if(T.hasProperties)
70                 properties = new Properties(); 
71         }
72     }
73     
74     /**
75      * Provides access to all files in the archive via a delegate method. This allows use in foreach loops.
76      * Example:
77      * ---
78      *    foreach(file; archive.files)
79      *    {
80      *        // Use properties of file
81      *    }
82      * ---
83      */
84     @property public int delegate(int delegate(ref File)) files() { return &root.filesOpApply; }
85 
86     /**
87      * Provides access to all directories in the archive via a delegate method. This allows use in foreach loops.
88      * Example:
89      * ---
90      *    foreach(dir; archive.directories)
91      *    {
92      *        // Use properties of dir
93      *    }
94      * ---
95      */
96     @property public int delegate(int delegate(ref Directory)) directories() { return &root.directoriesOpApply; }
97     
98     /**
99      * Provides access to all files and directories in the archive via a delegate method. This allows use in foreach loops.
100      * Example:
101      * ---
102      *    foreach(member; archive.members)
103      *    {
104      *        if(member.isFile())
105      *        {
106      *            auto file = cast(archive.File)member;
107      *            // Use properties of file
108      *        }
109      *        else
110      *        {
111      *            auto dir = cast(archive.Directory)member;
112      *            // Use properties of dir
113      *        }
114      *    }
115      * ---
116      */
117     @property public int delegate(int delegate(ref ArchiveMember)) members() { return &root.membersOpApply; }
118 
119     /**
120      * Returns: The file associated with the given path variable, or null if no such file exists in the archive.
121      */
122     public File getFile(string path) 
123     { 
124         return root.getFile(split(path, "/")); 
125     }
126 
127     /**
128      * Returns: The directory associated with the given path variable, the root for "/", or null if no such directory exists.
129      */
130     public Directory getDirectory(string path) 
131     { 
132         if(path.length == 0) // Handle ""
133         {
134             return root;
135         }
136         else if(path[$-1] == '/') // Handle paths ending with /
137         {
138             return root.getDirectory(split(path, "/")[0 .. $-1]);
139         }
140         else // Handle paths ending without /
141         {
142             return root.getDirectory(split(path, "/")); 
143         }
144     }
145 
146     /**
147      * Returns: the number of files in the archive which are up to n levels deep (inclusive).
148      */
149     public size_t numFiles(size_t n=size_t.max) { return root.numFiles(n); }
150 
151     /**
152      * Returns: The number of directories in the archive which are up to n levels deep (inclusive).
153      */
154     public size_t numDirectories(size_t n=size_t.max) { return root.numDirectories(n); }
155 
156     /**
157      * Returns: The number of directories and files in the archive which are up to n levels deep (inclusive).
158      */
159     public size_t numMembers(size_t n=size_t.max) { return root.numMembers(n); }
160 
161     static if(!T.isReadOnly)
162     {
163         /**
164          * Serializes the archive.
165          * Returns: the archive in a void[] array which can be saved to a file, sent over a network connection, etc.
166          */
167         public void[] serialize() 
168         { 
169             return Filter.compress(T.serialize(this));
170         }   
171     }
172     
173     /**
174      * Adds a file to the archive. If the path to the file contains directories that are not in the archive, they are added.
175      * Throws: IllegalPathException when an element in the given path is already used for a file/directory or the path is otherwise invalid.
176      * Example:
177      * ---
178      * // inserts apple.txt into the archive.
179      * archive.addFile(new archive.File("apple.txt")); 
180      *
181      * // inserts directory animals (if not exists) and dogs.txt into the archive.
182      * archive.addFile(new archive.File("animals/dogs.txt")); 
183      * ---
184      */
185     public void addFile(File member) 
186     { 
187         if(member.path == null || member.path == "")
188             throw new IllegalPathException("Files which are inserted into the archive must have a valid name");
189             
190         root.addFile(split(member.path, "/"), member); 
191     }
192     
193     /**
194      * Adds a directory to the archive. If the path to the directory contains directories that are not in the archive, they are added.
195      * If the directory already exists it is not replaced with an empty directory.
196      * 
197      * Returns: the final Directory in the path. (e.g. "dlang" for "languages/dlang/")
198      * Throws: IllegalPathException when an element in the given path is already used for a file or the path is otherwise invalid.
199      * Example:
200      * ---
201      * // inserts animals/birds/ into the archive.
202      * archive.addDirectory("animals/"); 
203      *
204      * // inserts directory languages (if not exists) and dlang into the archive.
205      * archive.addDirectory("languages/dlang/"); 
206      * ---
207      */
208     public Directory addDirectory(string path) 
209     {
210         if(path.length == 0)
211         {
212             return root;
213         }
214         else if(path[$-1] == '/') // Handle paths ending with "/"
215         {
216             return root.addDirectory(split(path, "/")[0 .. $-1]);
217         }
218         else
219         {
220             return root.addDirectory(split(path, "/")); 
221         }
222     }
223 
224     /**
225      * Removes a file from the archive.
226      * Returns: true if the file was removed, false if it did not exist.
227      */
228     public bool removeFile(string path) { return root.removeFile(split(path, "/")); }
229 
230     /**
231      * Removes a directory (and all contained files and directories) from the archive.
232      * Returns: true if the directory was removed, false if it did not exist.
233      */
234     public bool removeDirectory(string path) 
235     { 
236         if(path.length == 0)
237         {
238             return false;
239         }
240         else if(path[$-1] == '/') // Handle paths ending with "/"
241         {
242             return root.removeDirectory(split(path, "/")[0 .. $-1]);
243         }
244         else
245         {
246             return root.removeDirectory(split(path, "/")); 
247         }
248     }
249 
250     /**
251      * Removes all directories in the archive with no direct files or files in subdirectories.
252      */
253     public void removeEmptyDirectories() { root.removeEmptyDirectories(); }
254     
255     static if(T.hasProperties)
256     {
257         /**
258          * (Optional) Archive-wide properties for the format associated with Policy. 
259          *      e.g. Tar archives do not have any archive-wide properties, while zip files have an archive comment.
260          */
261         public Properties properties;
262     }
263     
264     /**
265      * The root directory of the archive. Public here to allow for manual recursive algorithms.
266      */
267     public Directory root;
268 }
269 
270 /**
271  * Thrown when a supplied path is invalid.
272  */
273 public class IllegalPathException : Exception
274 {
275     this(string msg) { super("IllegalPathException: " ~ msg); }
276 }
277 
278 /**
279  * Default filter which performs no mutation to the input/output data.
280  */
281 public class NullArchiveFilter
282 {
283     public static void[] compress(void[] data) { return data; }
284     public static void[] decompress(void[] data) { return data; }
285 }
286 
287 /**
288  * Common base class for all Archive members (Files and Directories). 
289  * Provides common name management functionality and ability to iterate over both Files and Directories at once.
290  */
291 public class ArchiveMember
292 {
293     private bool _isDirectory;
294     protected string _path;
295     
296     protected this(bool isDirectory)
297     {
298         _isDirectory = isDirectory;
299         _path = "";
300     }
301 
302     protected this(bool isDirectory, string mypath)
303     {
304         _isDirectory = isDirectory;
305         _path = mypath;
306     }
307 
308     protected this(bool isDirectory, string[] pathParts)
309     {
310         _isDirectory = isDirectory;
311         _path = join(pathParts, "/");
312     }
313 
314     /**
315      * Returns: true if this member is a directory, false otherwise.
316      */
317     @property bool isDirectory() { return _isDirectory; }
318 
319     /**
320      * Returns: false if this member is a file, false otherwise.
321      */
322     @property bool isFile() { return !_isDirectory; }
323 
324     /**
325      * Gets the final element in the path of this member.
326      *      e.g. for the path "a/b/c/e/fg.txt" the result is "fg.txt"
327      * Returns: the final element in the path of this member.
328      */
329     @property public string name() 
330     { 
331         string[] parts = split(_path, '/'); 
332         return parts[$-1]; 
333     }
334 
335     /**
336      * Sets the final element in the path of this member.
337      *      e.g. for the path "a/b/c/e/fg.txt" the changed path part will be "fg.txt"
338      * Warning: Do not use this property while this member is currently part of an archive.
339      */
340     @property public void name(string newname)
341     { 
342         string[] parts = split(_path, '/'); 
343         parts[$-1] = newname; 
344         _path = join(parts, "/"); 
345     }
346 
347     /**
348      * Gets the path of this member.
349      * Returns: the path of this member.
350      */
351     @property string path()
352     { 
353         return _path; 
354     }
355 
356     /**
357      * Sets the path of this member.
358      * Warning: Do not use this property while this member is currently part of an archive.
359      */
360     @property void path(string newpath)
361     { 
362             _path = newpath; 
363     }
364 }
365 
366 /**
367  * Base class for archive directories. Provides common subdirectory and file management.
368  */
369 public class ArchiveDirectory(Policy) : ArchiveMember
370 {
371 
372     /**
373      * Alias for referencing the correct File class in the Policy.
374      */
375     public alias File = Policy.FileImpl;
376     
377     /**
378      * Alias for referencing the correct Directory class in the Policy.
379      */
380     public alias Directory = Policy.DirectoryImpl;
381 
382     /**
383      * Default constructor for ArchiveDirectories. Used to create the root archive. 
384      * Note: Do not use without a subsequent call to *at least* .path = "path".
385      */
386     public this() { super(true, ""); }
387  
388     /**
389      * Constructs a new ArchiveDirectory with the given path name.
390      */
391     public this(string mypath) { super(true, mypath); }
392     
393     /** ditto */
394     public this(string[] parts) { super(true, parts); }
395     
396     /*
397      * Adds a member to the archive, creating subdirectories as necessary.
398      */
399     public void addFile(string[] pathParts, File file, uint i=0)
400     {
401         if(i == pathParts.length-1)
402         {
403             // Check that a directory of the same name does not exist
404             if(pathParts[i] in directories)
405             {
406                 throw new IllegalPathException("Cannot add file due to existing directory by the same name: " ~ join(pathParts, "/"));
407             }
408 
409             // Add the member to this node.
410             files[pathParts[i]] = file;
411         }
412         else
413         {
414             Directory* dir = pathParts[i] in directories;
415             if(!dir)
416             {
417                 // Check that a file of the same name does not exist.
418                 if(pathParts[i] in files)
419                 {
420                     throw new IllegalPathException("Cannot add directory due to existing file by the same name: " ~ join(pathParts[0 .. i+1], "/"));
421                 }
422 
423                 // Construct the Directory
424                 Directory directory = new Directory(pathParts[0 .. i+1]);
425                 directories[pathParts[i]] = directory;
426                 dir = &directory;
427             }
428             dir.addFile(pathParts, file, i+1);
429         }
430     }
431     
432     /*
433      * Adds a chain of subdirectories, creating them as necessary.
434      */
435     public Directory addDirectory(string[] pathParts, uint i=0)
436     {
437         // Empty string handles case where root directory is added.
438         // Some tar archivers will place it into the archive to store permissions/ownership
439         Directory* dir = pathParts[i] in directories;
440         if(!dir)
441         {
442             // Check that a file of the same name does not exist.
443             if(pathParts[i] in files)
444             {
445                 throw new IllegalPathException("Cannot add directory due to existing file by the same name: " ~ join(pathParts[0 .. i+1], "/"));
446             }
447 
448             Directory directory = new Directory(pathParts[0 .. i+1]);
449             directories[pathParts[i]] = directory;
450             dir = &directory;
451         }
452 
453         if(i == pathParts.length-1)
454         {
455             return *dir;
456         }
457         else
458         {
459             return dir.addDirectory(pathParts, i+1);
460         }
461     }
462    
463     /*
464      * Attempts to remove a member from the archive.
465      */
466     public bool removeFile(string[] pathParts, uint i=0)
467     {
468         if(i == pathParts.length-1)
469         {
470             return files.remove(pathParts[i]);
471         }
472         else
473         {
474             Directory* dir = pathParts[i] in directories;
475             if(dir)
476             {
477                 return dir.removeFile(pathParts, i+1);
478             }
479         }
480         return false;
481     }
482 
483     /*
484      * Attempts to remove a directory from the archive.
485      */
486     public bool removeDirectory(string[] pathParts, uint i=0)
487     {
488         if(i == pathParts.length-1)
489         {
490             return directories.remove(pathParts[i]);
491         }
492         else
493         {
494             Directory* dir = pathParts[i] in directories;
495             if(dir)
496             {
497                 return dir.removeDirectory(pathParts, i+1);
498             }
499         }
500         return false;
501     }
502     
503     /*
504      * Removes all empty directories from the archive. 
505      */
506     public uint removeEmptyDirectories()
507     {
508         uint count = 0;
509 
510         SList!string toRemove;
511 
512         foreach(string key; directories.byKey)
513         {
514             uint subdirCount = directories[key].removeEmptyDirectories();
515             if(subdirCount == 0)
516             {
517                 toRemove.insertFront(key);
518             }
519             else
520             {
521                 count += subdirCount;
522             }
523         }
524         
525         foreach(string key; toRemove)
526         {
527             directories.remove(key);
528         }
529 
530         count += files.length;
531         return count;
532     }
533    
534     /*
535      * Returns a file from the directory if it exists, otherwise null.
536      */
537     public File getFile(string[] pathParts, uint i=0)
538     {
539         if(i == pathParts.length-1)
540         {
541             File* file = pathParts[i] in files;
542             return (file) ? *file : null;
543         }
544         else
545         {
546             Directory* dir = pathParts[i] in directories;
547             if(!dir)
548                 return null;
549             return dir.getFile(pathParts, i+1);
550         }
551     }
552     
553     /*
554      * Returns the number of files up to n levels deep. Current directory is level 0.
555      */
556     public size_t numFiles(size_t n, size_t cur=0) 
557     {
558         if(n == cur)
559         {
560             return files.length;
561         }
562         else
563         {
564             size_t result = files.length; // All files in this directory.
565             foreach(dir; directories.byValue)
566             {
567                 result += dir.numFiles(n, cur+1);
568             }
569             return result;
570         }
571     }
572 
573     /*
574      * Returns the number of directories up to n levels deep. Current directory is level 0.
575      */
576     public size_t numDirectories(size_t n, size_t cur=0) 
577     { 
578         if(n == cur)
579         {
580             return directories.length; // The number of directories in this directory + this directory.
581         }
582         else
583         {
584             size_t result = 0;
585             foreach(dir; directories.byValue)
586             {
587                 result += 1 + dir.numDirectories(n, cur+1);
588             }
589             return result;
590         }
591     }
592 
593     /*
594      * Returns the number of files and directories up to n levels deep. Current directory is level 0.
595      */
596     public size_t numMembers(size_t n, size_t cur=0) 
597     { 
598         if(n == cur)
599         {
600             return files.length + directories.length; // All files/directories in this directory.
601         }
602         else
603         {
604             size_t result = files.length; // All the files in this directory.
605             foreach(dir; directories.byValue)
606             {
607                 result += 1 + dir.numMembers(n, cur+1); // A subdirectory and the files/directories inside it.
608             }
609             return result;
610         }
611     }
612 
613     /*
614      * Returns a directory from this directory if it exists, otherwise null.
615      */
616     public Directory getDirectory(string[] pathParts, uint i=0)
617     {
618         Directory* dir = pathParts[i] in directories;
619         if(!dir)
620             return null;
621 
622         if(i == pathParts.length-1)
623         {
624             return *dir;
625         }
626         else
627         {
628             return dir.getDirectory(pathParts, i+1);
629         }
630     }
631 
632     /**
633      * opApply method used for file iteration.
634      */
635     public int filesOpApply(int delegate(ref File) dg)
636     {
637         int result = 0;
638         foreach(Directory ad; directories)
639         {
640             result = ad.filesOpApply(dg);
641             if(result) 
642                 return result;
643         }
644         
645         foreach(File am; files)
646         {
647             result = dg(am);
648             if(result)
649                 return result;
650         }
651         return result;
652     }
653  
654     /**
655      * opApply method used for directory iteration.
656      */
657     public int directoriesOpApply(int delegate(ref Directory) dg)
658     {
659         int result = 0;
660         foreach(Directory ad; directories)
661         {
662             result = dg(ad);
663             if(result)
664                 return result;
665             
666             result = ad.directoriesOpApply(dg);
667             if(result) 
668                 return result;
669         }
670         
671         return result;
672     }
673    
674     /**
675      * opApply method for member iteration.
676      */
677     public int membersOpApply(int delegate(ref ArchiveMember) dg)
678     {
679         int result = 0;
680         foreach(Directory ad; directories)
681         {
682             ArchiveMember entry = ad;
683             
684             result = dg(entry);
685             if(result)
686                 return result;
687             
688             result = ad.membersOpApply(dg);
689             if(result) 
690                 return result;
691         }
692         
693         foreach(File am; files)
694         {
695             ArchiveMember entry = am;
696             
697             result = dg(entry);
698             if(result)
699                 return result;
700         }
701         return result;
702     }
703 
704     /**
705      * Subdirectories in this directory. Allows access to directories during manual recursion of the Directory structure.
706      */
707     public Directory[string] directories;
708 
709     /**
710      * Files in this directory. Allows access to files during manual recursion of the Directory structure.
711      */
712     public File[string] files;
713 }
714 
715 version(unittest)
716 {
717     private class MockPolicy
718     {
719         public static immutable(bool) isReadOnly = false;
720         public static immutable(bool) hasProperties = false;
721 
722         public static class FileImpl : ArchiveMember 
723         { 
724             public this() { super(false); }
725             public this(string path) { super(false, path); } 
726             public this(string[] path) { super(false, path); }
727         }
728         
729         public static class DirectoryImpl : ArchiveDirectory!(MockPolicy)
730         { 
731             public this() { }
732             public this(string path) { super(path); } 
733             public this(string[] path) { super(path); }
734         }
735         
736         public static void deserialize(Filter)(void[] data, Archive!(MockPolicy, Filter) archive)
737         {
738         }
739 
740         public static void[] serialize(Filter)(Archive!(MockPolicy, Filter) archive)
741         {
742             return (cast(void[]) new ubyte[4]);
743         }
744     }
745 
746     private class MockFilter
747     {
748         public static void[] compress(void[] data)
749         {
750             return data;
751         }
752 
753         public static void[] decompress(void[] data)
754         {
755             return data;
756         }
757     }
758         
759     private class MockROPolicy
760     {
761         public static immutable(bool) isReadOnly = true;
762         public static immutable(bool) hasProperties = true;
763 
764         public static class FileImpl : ArchiveMember 
765         { 
766             public this() { super(false); }
767             public this(string path) { super(false, path); } 
768             public this(string[] path) { super(false, path); }
769         }
770 
771         public static class DirectoryImpl : ArchiveDirectory!MockROPolicy 
772         {
773             public this() { super(); }
774             public this(string path) { super(path); } 
775             public this(string[] path) { super(path); }
776         }
777         
778         public static class Properties { }
779         
780         public static void deserialize(Filter)(void[] data, Archive!(MockROPolicy, Filter) archive)
781         {
782             static import std.algorithm;
783 
784             char[] cdata = (cast(char[])data);
785             assert(std.algorithm.all!"a == 'a'"(cdata));
786         }
787     }
788 
789     private class MockROFilter
790     {
791         public static void[] decompress(void[] data)
792         {
793             // Fill it all with a's. We'll test in the deserialize that this is held.
794             char[] cdata = (cast(char[])data);
795             cdata[] = 'a';
796             return data;
797         }
798     }
799 }
800 
801 unittest
802 {
803     alias ArchType = Archive!(MockPolicy, MockFilter);
804     alias File = MockPolicy.FileImpl;
805     alias Directory = MockPolicy.DirectoryImpl;
806     
807     // Archive tests
808     ArchType arch = new ArchType();
809 
810     // Add top-level member
811     arch.addFile(new File("apples.txt"));
812    
813     // Add member adding in implicit directory. 
814     arch.addFile(new File("apples/oranges.txt"));
815 
816     // Add member, adding in implicit directory while using one previously defined.
817     arch.addFile(new File("apples/oranges/bananas.txt"));
818 
819     // Add directory, adding in implicit directory
820     arch.addDirectory("animals/dog/");
821 
822     // Add directory, using previously defined directory without trailing "/"
823     arch.addDirectory("animals/cat");
824 
825     // Add directory, adding in implicit directory
826     arch.addDirectory("animals/bird/eagle/");
827 
828     assert(arch.getFile("apples.txt") !is null);
829     assert(arch.getDirectory("apples") !is null);
830     assert(arch.getFile("apples/oranges.txt") !is null);
831     assert(arch.getDirectory("apples/oranges/") !is null);
832     assert(arch.getFile("apples/oranges/bananas.txt") !is null);
833 
834     assert(arch.getDirectory("animals") !is null);
835     assert(arch.getDirectory("animals/dog/") !is null);
836     assert(arch.getDirectory("animals/cat/") !is null);
837     assert(arch.getDirectory("animals/bird/") !is null);
838     assert(arch.getDirectory("animals/bird/eagle/") !is null);
839 
840     // Check the num* is correct
841     assert(arch.numFiles() == 3);
842     assert(arch.numDirectories() == 7);
843     assert(arch.numMembers() == 10);
844 
845     // Check num* at top level is correct
846     assert(arch.numFiles(0) == 1);
847     assert(arch.numDirectories(0) == 2);
848     assert(arch.numMembers(0) == 3);
849     
850     // Check num* at level = 1 is correct
851     assert(arch.numFiles(1) == 2);
852     assert(arch.numDirectories(1) == 6);
853     assert(arch.numMembers(1) == 8);
854 
855     // Remove top level member
856     arch.removeFile("apples.txt");
857 
858     // Remove not-top level member
859     arch.removeFile("apples/oranges.txt");
860 
861     // Remove top level directory
862     arch.removeDirectory("apples/");
863 
864     // Remove non top-level directory
865     arch.removeDirectory("animals/dog/");
866 
867     // Remove All empty directories.
868     arch.removeEmptyDirectories();
869 
870     assert(arch.root.directories.length == 0);
871     assert(arch.root.files.length == 0);
872 }
873 
874 unittest
875 {
876     alias ArchType = Archive!(MockROPolicy, MockROFilter);
877 
878     // Read only archive instantiation tests
879     ArchType Arch = new ArchType(['0', '1', '2', '3', '4', '5', '6', '7', '8']);
880 }