1 /**
2 Functions and Types that implement the Zip Policy used with the Archive template.
3 
4 Copyright: Copyright Richard W Laughlin Jr. 2014—2016
5 
6 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
7 
8 Authors: Refactored into Policy by Richard W Laughlin Jr.
9          Original zip code by $(WEB digitalmars.com, Walter Bright)
10 
11 Source: http://github.com/rcythr/archive 
12 
13 Policy for the Archive template which provides reading and writing of Zip files.
14 
15 Reading Usage:
16 ---
17 import archive.zip;
18 import std.stdio;
19 
20 auto archive = new ZipArchive(std.file.read("my.zip");
21 
22 foreach(file; archive.files)
23 {
24     writeln("Filename: ", file.path);
25     writeln("Data: ", file.data);
26 }
27 
28 ---
29 
30 Writing Usage:
31 ---
32 import archive.zip;
33 
34 auto archive = new ZipArchive();
35 
36 auto file = new ZipArchive.File("languages/awesome.txt");
37 file.data = "D\n"; // can also set to immutable(ubyte)[]
38 archive.addFile(file);
39 
40 std.file.write("lang.zip", cast(ubyte[])archive.serialize());
41 
42 ---
43 
44 */
45 
46 module archive.zip;
47 import archive.core;
48 
49 private import std.algorithm;
50 private import std.array;
51 private import std.bitmanip : littleEndianToNative, nativeToLittleEndian;
52 private import core.bitop;
53 private import std.container;
54 private import std.conv;
55 private import std.datetime;
56 private import std.exception;
57 private import std..string;
58 private import std.zlib;
59 
60 /**
61  * Thrown when a zip file is not readable or contains errors.
62  */
63 public class ZipException : Exception
64 {
65     this(string msg)
66     {
67         super("ZipException: " ~ msg);
68     }
69 }
70 
71 /**
72  * Specifies the compression for a particular zip entry.
73  */
74 public enum CompressionMethod : ushort
75 {
76     none = 0,
77     deflate = 8,
78 }
79 
80 /**
81  * Policy class for reading and writing zip archives.
82  *
83  * Currently lacks support for:
84  *      + Multiple disk zip files
85  *      + Compression algorithms other than deflate
86  *      + Zip64 
87  *      + Encryption
88  */
89 public class ZipPolicy
90 {
91     static immutable(bool) isReadOnly = false;
92     static immutable(bool) hasProperties = true;
93     
94     private static immutable(ubyte[]) DIRECTORY_MAGIC_NUM = cast(immutable(ubyte[]))"PK\x01\x02";
95     private static immutable(ubyte[]) RECORD_MAGIC_NUM = cast(immutable(ubyte[]))"PK\x03\x04";
96     private static immutable(ubyte[]) END_DIRECTORY_MAGIC_NUM = cast(immutable(ubyte[]))"PK\x05\x06";
97     
98     /**
99      * Directory implementation for Zip archives. Provides any additional functionality required by ZipArchives.
100      */
101     public static class DirectoryImpl : ArchiveDirectory!(ZipPolicy) 
102     {
103         public this() { }
104         public this(string path) { super(path); }
105         public this(string[] path) { super(path); }
106     }
107     
108     /**
109      * File implementation for Zip archives. Provides any additional functionality required of files by ZipArchives.
110      */
111     public static class FileImpl : ArchiveMember
112     {  
113         public this() { super(false); }
114         public this(string path) { super(false, path); }
115         public this(string[] path) { super(false, path); }
116 
117         /*
118          * Compresses the uncompressed data in this file (if needed).
119          */
120         private void decompress() 
121         {
122             if(_decompressedData == null)
123             {
124                 switch (_compressionMethod)
125                 {
126                     case CompressionMethod.none:
127                         _decompressedData = _compressedData;
128                         break;
129                     case CompressionMethod.deflate:
130                         // -15 is a magic value used to decompress zip files.
131                         // It has the effect of not requiring the 2 byte header
132                         // and 4 byte trailer.
133                         _decompressedData = assumeUnique!(ubyte)(cast(ubyte[])std.zlib.uncompress(cast(void[])_compressedData, _decompressedSize, -15));
134                         break;
135                     default:
136                         throw new ZipException("unsupported compression method");
137                 }
138             }
139         }
140         
141         /*
142          * Decompresses the compressed data in this file (if needed).
143          */
144         private void compress() 
145         {
146             if(_compressedData == null)
147             {
148                 switch (_compressionMethod)
149                 {
150                     case CompressionMethod.none:
151                         _decompressedData = _compressedData;
152                         break;
153                     case CompressionMethod.deflate:
154                         // -15 is a magic value used to decompress zip files.
155                         // It has the effect of not requiring the 2 byte header
156                         // and 4 byte trailer.
157                         _compressedData = assumeUnique!(ubyte)(cast(ubyte[])std.zlib.compress(cast(void[])_decompressedData));
158                         _compressedData = _compressedData[2 .. _compressedData.length - 4];
159                         break;
160                     default:
161                         throw new ZipException("unsupported compression method");
162                 }
163             }
164         }
165         
166         /**
167          * Returns: the decompressed data.
168          */
169         @property public immutable(ubyte)[] data()
170         {
171             decompress();
172             return _decompressedData;
173         }
174         
175         /**
176          * Sets the decompressed data.
177          */
178         @property public void data(immutable(ubyte)[] data)
179         {
180             _decompressedData = data;
181             _decompressedSize = cast(uint)data.length;
182             _compressedData = null;
183             
184             // Recalculate CRC
185             _crc32 = std.zlib.crc32(0, cast(void[])_decompressedData);
186         }
187         
188         /** ditto */
189         @property public void data(string newdata)
190         {
191             data(cast(immutable(ubyte)[])newdata);
192         }
193         
194         /**
195          * Returns: the compressed data
196          */
197         @property public immutable(ubyte)[] compressed()
198         {
199             compress();
200             return _compressedData;
201         }
202         
203         /**
204          * Returns: the compression method.
205          */
206         @property public CompressionMethod compressionMethod() { return _compressionMethod; }
207         
208         /**
209          * Sets the compression method that will be used.
210          */
211         @property public void compressionMethod(CompressionMethod method)
212         {
213             if(method != _compressionMethod)
214             {
215                 // First make sure the data is already extracted (if needed)
216                 decompress();
217                 
218                 // Clean out stale compressed data
219                 _compressionMethod = method;
220                 _compressedData = null;
221             }
222         }
223         
224         /**
225          * Additional data stored within the zip archive for this file.
226          */
227         public ubyte[] extra;
228         
229         /**
230          * The comment for the member of the archive.
231          */
232         public string comment = "";
233         
234         /**
235          * The time when the file was last modified.
236          */
237         public DosFileTime modificationTime;
238         
239         /**
240          * Zip related flags specific for this member, as specified by the Zip format documentation.
241          */
242         public ushort flags;
243         
244         /**
245          * The internal attributes specific to this member, as specified by the Zip format documentation.
246          */
247         public ushort internalAttributes;
248         
249         /**
250          * The internal attributes specific to this member, as specified by the Zip format documentation.
251          */
252         public uint externalAttributes;
253         
254         private immutable(ubyte)[] _compressedData = null;
255         private immutable(ubyte)[] _decompressedData = null;
256         private uint _compressedSize;
257         private uint _decompressedSize;
258         private uint _crc32;
259         private uint _offset;
260         private CompressionMethod _compressionMethod = CompressionMethod.deflate;
261     }
262     
263     /**
264      * Archive-wide properties for ZipArchives.
265      */
266     public static class Properties
267     {
268         /**
269          * Archive-wide File comment stored in the archive.
270          */
271         public string comment;
272     }
273     
274     /*
275      * Fetches the local header data for a file in the archive - most importantly the stored data
276      */
277     private static void expandMember(void[] data, FileImpl file, int offset)
278     {
279         ushort getUShort()
280         {
281             ubyte[2] result = cast(ubyte[])data[offset .. offset+2];
282             offset += 2;
283             return littleEndianToNative!ushort(result);
284         }
285         
286         uint getUInt()
287         {
288             ubyte[4] result = cast(ubyte[])data[offset .. offset+4];
289             offset += 4;
290             return littleEndianToNative!uint(result);
291         }
292         
293         if(data[offset .. offset + 4] != RECORD_MAGIC_NUM)
294             throw new ZipException("Invalid directory entry 4");
295         offset += 4;
296         
297         ushort minExtractVersion = getUShort();
298         file.flags = getUShort();
299         file._compressionMethod = cast(CompressionMethod)getUShort();
300         file.modificationTime = cast(DosFileTime)getUInt();
301         file._crc32 = getUInt();
302         uint compressedSize = max(file._compressedSize, getUInt());
303         file._decompressedSize = max(file._decompressedSize, getUInt());
304         ushort namelen = getUShort();
305         ushort extralen = getUShort();
306         
307         int dataOffset = offset + namelen + extralen;
308         file._compressedData = assumeUnique!(ubyte)(cast(ubyte[])data[dataOffset .. dataOffset + compressedSize]);
309     }
310     
311     /**
312      * Deserialize method which loads data from a zip archive and stores it in archive.
313      */
314     public static void deserialize(Filter)(void[] data, Archive!(ZipPolicy, Filter) archive)
315     {
316         int iend, i;
317         int endrecoffset;
318     
319         // Helper functions
320         ushort getUShort()
321         {
322             ubyte[2] result = cast(ubyte[])data[i .. i+2];
323             i += 2;
324             return littleEndianToNative!ushort(result);
325         }
326         
327         uint getUInt()
328         {
329             ubyte[4] result = cast(ubyte[])data[i .. i+4];
330             i += 4;
331             return littleEndianToNative!uint(result);
332         }
333         
334         // Calculate the ending record
335         iend = to!uint(data.length) - 66000;
336         if(iend < 0)
337             iend = 0;
338         
339         for(i = to!uint(data.length) - 22; 1; --i)
340         {
341             if( i < iend )
342                 throw new ZipException("No end record.");
343                
344             if(data[i .. i+4] == END_DIRECTORY_MAGIC_NUM)
345             {
346                 i += 20;
347                 ushort endcommentlength = getUShort();
348                 if (i + endcommentlength > data.length)
349                 {
350                     i -= 22;
351                     continue;
352                 }
353                 
354                 archive.properties.comment = cast(string)(data[i .. i + endcommentlength]);
355                 endrecoffset = i - 22;
356                 break;
357             }
358         }
359         i -= 18;
360         
361         ushort diskNumber = getUShort();
362         ushort diskStartDir = getUShort();
363         ushort numEntries = getUShort();
364         ushort totalEntries = getUShort();
365         
366         if(numEntries != totalEntries)
367             throw new ZipException("Multiple disk zips not supported");
368             
369         uint directorySize = getUInt();
370         uint directoryOffset = getUInt();
371         
372         if(directoryOffset + directorySize > endrecoffset)
373             throw new ZipException("Corrupted Directory");
374         
375         i = directoryOffset;
376         for(int n = 0; n < numEntries; ++n)
377         {
378             if(data[i .. i + 4] != DIRECTORY_MAGIC_NUM)
379                 throw new ZipException("Invalid directory entry 1");
380             
381             i += 4;
382             
383             FileImpl file = new FileImpl();
384             ushort madeVersion = getUShort();
385             ushort minExtractVersion = getUShort();
386             file.flags = getUShort();
387             file._compressionMethod = cast(CompressionMethod)getUShort();
388             file.modificationTime = cast(DosFileTime)getUInt();
389             file._crc32 = getUInt();
390             file._compressedSize = getUInt();
391             file._decompressedSize = getUInt();
392             ushort nameLen = getUShort();
393             ushort extraLen = getUShort();
394             ushort commentLen = getUShort();
395             ushort memberDiskNumber = getUShort();
396             file.internalAttributes = getUShort();
397             file.externalAttributes = getUInt();
398             uint offset = getUInt();
399             
400             if(i + nameLen + extraLen + commentLen > directoryOffset + directorySize)
401                 throw new ZipException("Invalid Directory Entry 2");
402                 
403             file.path = cast(string)(data[i .. i + nameLen]);
404             i += nameLen;
405             
406             file.extra = cast(ubyte[])data[i .. i + extraLen];
407             i += extraLen;
408             
409             file.comment = cast(string)(data[i .. i + commentLen]);
410             i += commentLen;
411             
412             // Expand the actual file to get the compressed data now.
413             expandMember(data, file, offset);
414             
415             // Add the Member to the Listing
416             if(file.path.endsWith("/"))
417             {
418                 archive.addDirectory(file.path);
419             }
420             else
421             {
422                 archive.addFile(file);
423             }
424         }
425         if( i != directoryOffset + directorySize)
426             throw new ZipException("Invalid directory entry 3");
427     }
428     
429     /**
430      * Serialize method which writes data stored in the archive to an array and returns it.
431      */
432     public static void[] serialize(Filter)(Archive!(ZipPolicy, Filter) archive)
433     {
434         if(archive.properties.comment.length > 0xFFFF)
435             throw new ZipException("Archive comment longer than 655535");
436          
437         // Ensure each file is compressed; compute size
438         uint archiveSize = 0;
439         uint directorySize = 0;
440         foreach(file; archive.files)
441         {
442             file.compress();
443             archiveSize += 30 + file.path.length + file.extra.length + file._compressedData.length;
444             directorySize += 46 + file.path.length + file.extra.length + file.comment.length;
445         }
446         
447         ubyte[] data = new ubyte[archiveSize + directorySize + 22 + archive.properties.comment.length];
448         
449         // Helper Functions
450         uint i = 0;
451         void putUShort(ushort us)
452         {
453             data[i .. i + 2] = nativeToLittleEndian(us);
454             i += 2;
455         }
456         
457         void putUInt(uint ui)
458         {
459             data[i .. i + 4] = nativeToLittleEndian(ui);
460             i += 4;
461         }
462         
463         // Store Records
464         foreach(file ; archive.files)
465         {
466             file._offset = i;
467             data[i .. i + 4] = RECORD_MAGIC_NUM;
468             i += 4;
469             
470             putUShort(20); // Member Minimum Extract Version
471             putUShort(file.flags);
472             putUShort(file.compressionMethod);
473             putUInt(cast(uint)file.modificationTime);
474             putUInt(file._crc32);
475             putUInt(cast(uint)file._compressedData.length);
476             putUInt(cast(uint)file._decompressedData.length);
477             putUShort(cast(ushort)file.path.length);
478             putUShort(cast(ushort)file.extra.length);
479             
480             data[i .. i + file.path.length] = (cast(ubyte[])file.path)[];
481             i += file.path.length;
482             
483             data[i .. i + file.extra.length] = (cast(ubyte[])file.extra)[];
484             i += file.extra.length;
485             
486             data[i .. i + file._compressedData.length] = file.compressed[];
487             i += file._compressedData.length;
488         }
489         
490         // Store Directory Entries
491         uint directoryOffset = i;
492         ushort numEntries = 0;
493         foreach(file ; archive.files)
494         {
495             data[i .. i+4] = DIRECTORY_MAGIC_NUM;
496             i += 4;
497             
498             putUShort(20); // Made Version
499             putUShort(20); // Min Extract Version
500             putUShort(file.flags);
501             putUShort(cast(ushort)file.compressionMethod);
502             putUInt(cast(uint)file.modificationTime);
503             putUInt(file._crc32);
504             putUInt(cast(uint)file._compressedData.length);
505             putUInt(cast(uint)file._decompressedSize);
506             putUShort(cast(ushort)file.path.length);
507             putUShort(cast(ushort)file.extra.length);
508             putUShort(cast(ushort)file.comment.length);
509             putUShort(0); // Disk Number
510             putUShort(file.internalAttributes);
511             putUInt(file.externalAttributes);
512             putUInt(file._offset);
513             
514             data[i .. i + file.path.length] = (cast(ubyte[])file.path)[];
515             i += file.path.length;
516             
517             data[i .. i + file.extra.length] = (cast(ubyte[])file.extra)[];
518             i += file.extra.length;
519             
520             data[i .. i + file.comment.length] = (cast(ubyte[])file.comment)[];
521             i += file.comment.length;
522             
523             ++numEntries;
524         }
525         
526         // Write End Directory Entry
527         data[i .. i+4] = END_DIRECTORY_MAGIC_NUM;
528         i += 4;
529         
530         putUShort(0); // Disk Number
531         putUShort(0); // Disk Start Dir
532         putUShort(numEntries); // Number of Entries
533         putUShort(numEntries); // Total Number of Entries
534         putUInt(directorySize);
535         putUInt(directoryOffset);
536         putUShort(cast(ushort)archive.properties.comment.length);
537         
538         data[i .. data.length] = (cast(ubyte[])archive.properties.comment)[];
539         
540         // Return result
541         return cast(void[])data;
542     }
543 };
544 
545 /**
546  * Convenience alias that simplifies the interface for users
547  */
548 alias ZipArchive = Archive!ZipPolicy;
549 
550 unittest
551 {
552     string data1 = "HELLO\nI AM A FILE WITH SOME DATA\n1234567890\nABCDEFGHIJKLMOP";
553     immutable(ubyte)[] data2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
554 
555     ZipArchive output = new ZipArchive();
556 
557     // Add file into the top level directory.
558     ZipArchive.File file1 = new ZipArchive.File();
559     file1.path = "apple.txt";
560     file1.data = data1;
561     output.addFile(file1);
562 
563     // Add a file into a non top level directory.
564     ZipArchive.File file2 = new ZipArchive.File("directory/directory/directory/apple.txt");
565     file2.data = data2;
566     output.addFile(file2);
567 
568     // Add a directory that already exists.
569     output.addDirectory("directory/");
570     
571     // Add a directory that does not exist.
572     output.addDirectory("newdirectory/");
573 
574     // Remove unused directories
575     output.removeEmptyDirectories();
576     
577     // Ensure the only unused directory was removed.
578     assert(output.getDirectory("newdirectory") is null);
579 
580     // Re-add a directory that does not exist so we can test its output later.
581     output.addDirectory("newdirectory/");
582 
583     // Serialize the zip archive and construct a new zip with it
584     ZipArchive input = new ZipArchive(output.serialize());
585 
586     // Make sure that there is a file named apple.txt and a file named directory/directory/directory/apple.txt
587     assert(input.getFile("apple.txt") !is null);
588     assert(input.getFile("directory/directory/directory/apple.txt") !is null);
589 
590     // Make sure there are no extra directories or files
591     assert(input.numFiles() == 2);
592     assert(input.numDirectories() == 3);
593     assert(input.numMembers() == 5);
594 }
595