1 // Written in the D programming language.
2 /**
3 Functions and types that implement the TarPolicy used with the Archive template.
4 
5 Copyright: Copyright Richard W Laughlin Jr. 2014—2016
6 
7 License: $(WEB boost.org/LICENSE_1_0.txt, Boost License 1.0).
8 
9 Authors: Richard W Laughlin Jr.
10 
11 Source: http://github.com/rcythr/archive 
12 
13 Policy for the Archive template which provides reading and writing of Tar files.
14 
15 Reading Usage:
16 ---
17 import archive.tar;
18 import std.stdio;
19 
20 auto archive = new TarArchive(std.file.read("my.tar");
21 
22 foreach(file; archive.files)
23 {
24     writeln("Filename: ", file.path);
25     writeln("Data: ", file.data);
26 }
27 
28 ---
29 
30 Writing Usage:
31 ---
32 import archive.tar;
33 
34 auto archive = new TarArchive();
35 
36 auto file = new TarArchive.File("languages/awesome.txt");
37 file.data = "D\n"; // can also set to immutable(ubyte)[]
38 archive.addFile(file);
39 
40 std.file.write("lang.tar", cast(ubyte[])archive.serialize());
41 
42 ---
43 
44 */
45 
46 module archive.tar;
47 import archive.core;
48 
49 private import std.algorithm;
50 private import std.array;
51 private import std.container;
52 private import std.conv;
53 private import std.exception;
54 private import std.format;
55 private import std..string;
56 
57 /**
58  * Thrown when a tar file is not readable or contains errors.
59  */
60 public class TarException : Exception
61 {
62     this(string msg)
63     {
64         super("TarException: " ~ msg);
65     }
66 }
67 
68 /**
69  * Helper struct for unix permissions
70  */
71 public struct TarPermissions
72 {
73     static immutable(uint) DIRECTORY = octal!40000;
74     static immutable(uint) FILE = octal!100000;
75     
76     static immutable(uint) EXEC_SET_UID = octal!4000;
77     static immutable(uint) EXEC_SET_GID = octal!2000;
78     static immutable(uint) SAVE_TEXT = octal!1000;
79     
80     static immutable(uint) R_OWNER = octal!400;
81     static immutable(uint) W_OWNER = octal!200;
82     static immutable(uint) X_OWNER = octal!100;
83     
84     static immutable(uint) R_GROUP = octal!40;
85     static immutable(uint) W_GROUP = octal!20;
86     static immutable(uint) X_GROUP = octal!10;
87 
88     static immutable(uint) R_OTHER = octal!4;
89     static immutable(uint) W_OTHER = octal!2;
90     static immutable(uint) X_OTHER = octal!1;
91     
92     static immutable(uint) ALL = std.conv.octal!777;
93 }
94 
95 /**
96  * Enum class for types supported by tar files.
97  *   Directory is given special treatment, all others
98  *   have any content placed in the data field.
99  */
100 public enum TarTypeFlag : char
101 {
102     file = '0',
103     altFile = '\0',
104     hardLink = '1',
105     symbolicLink = '2',
106     characterSpecial = '3',
107     blockSpecial = '4',
108     directory = '5',
109     fifo = '6',
110     contiguousFile = '7',
111 }
112 
113 /**
114  * Policy class for reading and writing tar archives.
115  * Features:
116  *      + Handles files and directories of arbitrary size
117  *      + Files and directories may have permissions
118  *      + Files and directories may optionally set an owner and group name/id.
119  * Limitations:
120  *      + File paths may not exceed 255 characters - this is due to the format specification.
121  */
122 public class TarPolicy
123 {   
124     static immutable(bool) isReadOnly = false;
125     static immutable(bool) hasProperties = false;
126     
127     private static string trunc(string input)
128     {
129         for(size_t i=0; i < input.length; ++i)
130         {
131             if(input[i] == '\0')
132             {
133                 return input[0 .. i];
134             }
135         }
136         return input;
137     }
138 
139     private static string intToOctalStr(uint value)
140     {
141         auto writer = appender!(string)();
142         formattedWrite(writer, "%o ", value);
143         return writer.data;
144     }
145     
146     private static string longToOctalStr(ulong value)
147     {
148         auto writer = appender!(string)();
149         formattedWrite(writer, "%o ", value);
150         return writer.data;
151     }
152     
153     private static uint octalStrToInt(char[] octal)
154     {
155         string s = cast(string)(std..string.strip(octal));
156         int result = 0;
157         formattedRead(s, "%o ", &result);
158         return result;
159     }
160     
161     private static ulong octalStrToLong(char[] octal)
162     {
163         string s = cast(string)(std..string.strip(octal));
164         int result = 0;
165         formattedRead(s, "%o ", &result);
166         return result;
167     }
168     
169     private static char[] strToBytes(string str, uint length)
170     {
171         char[] result = new char[length];
172         result[0 .. min(str.length, length)] = str;
173         result[str.length .. $] = 0;
174         return result;
175     }
176     
177     private static T[] nullArray(T)(uint length)
178     {
179         T[] result = new T[length];
180         result[0 .. $] = 0;
181         return result;
182     }
183     
184     private struct TarHeader
185     {
186         private static uint unsignedSum(char[] values)
187         {
188             uint result = 0;
189             foreach(char c ; values)
190             {
191                 result += c;
192             }
193             return result;
194         }
195         
196         private static uint signedSum(char[] values)
197         {
198             uint result = 0;
199             foreach(byte b ; cast(byte[])values)
200             {
201                 result += b;
202             }
203             return result;
204         }
205         
206         char[100] filename;
207         char[8] mode;
208         char[8] ownerId;
209         char[8] groupId;
210         char[12] size;
211         char[12] modificationTime;
212         char[8] checksum;
213         char linkId;
214         char[100] linkedFilename;
215         
216         char[6] magic;
217         char[2] tarVersion;
218         char[32] owner;
219         char[32] group;
220         char[8] deviceMajorNumber;
221         char[8] deviceMinorNumber;
222         char[155] prefix;
223         char[12] padding;
224         
225         bool confirmChecksum()
226         {
227             uint apparentChecksum = octalStrToInt(checksum);
228             uint currentSum = calculateUnsignedChecksum();
229             
230             if(apparentChecksum != currentSum)
231             {
232                 // Handle old tars which use a broken implementation that calculated the
233                 // checksum incorrectly (using signed chars instead of unsigned).
234                 currentSum = calculateSignedChecksum();
235                 if(apparentChecksum != currentSum)
236                 {
237                     return false;
238                 }
239             }
240             return true;
241         }
242         
243         void nullify()
244         {
245             filename = 0;
246             mode = 0;
247             ownerId = 0;
248             groupId = 0;
249             size = 0;
250             modificationTime = 0;
251             checksum = 0;
252             linkId = 0;
253             magic = 0;
254             tarVersion = 0;
255             owner = 0;
256             group = 0;
257             deviceMajorNumber = 0;
258             deviceMinorNumber = 0;
259             prefix = 0;
260             padding = 0;
261         }
262         
263         uint calculateUnsignedChecksum()
264         {
265             uint sum = 0;
266             sum += unsignedSum(filename);
267             sum += unsignedSum(mode);
268             sum += unsignedSum(ownerId);
269             sum += unsignedSum(groupId);
270             sum += unsignedSum(size);
271             sum += unsignedSum(modificationTime);
272             sum += 32 * 8; // checksum is treated as all blanks
273             sum += linkId;
274             sum += unsignedSum(linkedFilename);
275             sum += unsignedSum(magic);
276             sum += unsignedSum(tarVersion); 
277             sum += unsignedSum(owner);
278             sum += unsignedSum(group);
279             sum += unsignedSum(deviceMajorNumber);
280             sum += unsignedSum(deviceMinorNumber);
281             sum += unsignedSum(prefix);
282             return sum;
283         }
284         
285         uint calculateSignedChecksum()
286         {
287             uint sum = 0;
288             sum += signedSum(filename);
289             sum += signedSum(mode);
290             sum += signedSum(ownerId);
291             sum += signedSum(groupId);
292             sum += signedSum(size);
293             sum += signedSum(modificationTime);
294             sum += 32 * 8; // checksum is treated as all blanks
295             sum += linkId;
296             sum += signedSum(linkedFilename);
297             sum += signedSum(magic);
298             sum += signedSum(tarVersion); 
299             sum += signedSum(owner);
300             sum += signedSum(group);
301             sum += signedSum(deviceMajorNumber);
302             sum += signedSum(deviceMinorNumber);
303             sum += signedSum(prefix);
304             return sum;
305         }
306     }
307     
308     private static ubyte[] POSIX_MAGIC_NUM = cast(ubyte[])"ustar\0";
309     
310     /**
311      * Class for directories
312      */
313     public static class DirectoryImpl : ArchiveDirectory!(TarPolicy)
314     {
315         this() { super(""); }
316         this(string path) { super(path); }
317         this(string[] path) { super(path); }
318         
319         public uint permissions = TarPermissions.DIRECTORY | TarPermissions.ALL;
320         public ulong modificationTime;
321         
322         // Posix Extended Fields
323         public string owner = "";
324         public string group = "";
325     }
326     
327     /**
328      * Class for files
329      */
330     public static class FileImpl : ArchiveMember
331     {
332         public this() { super(false, ""); }
333         public this(string path) { super(false, path); }
334         public this(string[] path) { super(false, path); }
335 
336         public uint permissions = TarPermissions.FILE | TarPermissions.ALL;
337         public ulong modificationTime;
338         public TarTypeFlag typeFlag = TarTypeFlag.file;
339         public string linkName;
340         
341         @property immutable(ubyte)[] data()
342         {
343             return _data;
344         }
345         
346         @property void data(immutable(ubyte)[] newdata)
347         {
348             _data = newdata;
349         }
350         
351         @property void data(string newdata)
352         {
353             _data = cast(immutable(ubyte)[])newdata;
354         }
355         
356         // Posix Extended Fields
357         public string owner;
358         public string group;
359         
360         private immutable(ubyte)[] _data = null;
361     }
362     
363     /**
364      * Deserialize method which loads data from a tar archive.
365      */
366     public static void deserialize(Filter)(void[] data, Archive!(TarPolicy,Filter) archive)
367     {
368         char numNullHeaders = 0;
369         
370         uint i = 0;
371         
372         // Loop through all headers
373         while(numNullHeaders < 2 && i + 512 < data.length)
374         {
375             // Determine if null
376             bool isNull = true;
377             for(int j=0; j < 512; ++j)
378             {
379                 if((cast(char[])data)[i + j] != '\0')
380                 {
381                     isNull = false;
382                     break;
383                 }
384             }
385             
386             if(!isNull)
387             {
388                 TarHeader* header = cast(TarHeader*)(&data[i]);
389                 i += 512;
390                 
391                 // Check the checksum
392                 if(!header.confirmChecksum())
393                     throw new TarException("Invalid checksum");
394 
395                 // Make sure we've dropped off any trailing nuls (strip doens't work because strip doesn't check for nuls!)
396                 string filename = trunc(cast(string)header.filename);
397                 string owner = "";
398                 string group = "";
399                 
400                 if(header.magic == "ustar\0")
401                 {
402                     filename = trunc(cast(string)header.prefix) ~ filename;
403                     owner = trunc(cast(string)header.owner);
404                     group = trunc(cast(string)header.group);
405                 }
406 
407                 // Insert the file into the file list
408                 if(cast(TarTypeFlag)(header.linkId) == TarTypeFlag.directory)
409                 {
410                     DirectoryImpl dir = archive.addDirectory(filename);
411                     
412                     // Add additional ustar properties (or "" if not present)
413                     dir.owner = owner;
414                     dir.group = group;
415                 }
416                 else
417                 {
418                     FileImpl file = new FileImpl();
419                     file.path = filename;
420                     file.permissions = octalStrToInt(header.mode);
421                     uint size = octalStrToInt(header.size);
422                     file.modificationTime = octalStrToLong(header.modificationTime);
423                     file.typeFlag = cast(TarTypeFlag)(header.linkId);
424                     
425                     archive.addFile(file);
426 
427                     // Add additional ustar properties (or "" if not present)
428                     file.owner = owner;
429                     file.group = group;
430                     
431                     if(file.typeFlag == TarTypeFlag.hardLink || file.typeFlag == TarTypeFlag.symbolicLink)
432                     {
433                         file.linkName = cast(string)(header.linkedFilename);
434                     }
435                     
436                     file._data = assumeUnique!(ubyte)(cast(ubyte[])data[i .. i + size]);
437                     i += size;
438                     if(size % 512 != 0)
439                         i += (512 - (size % 512)); // Skip padding bytes in this chunk (if any)
440                 }
441             }
442             else
443             {
444                 ++numNullHeaders;
445                 i += 512;
446             }
447         }
448     }
449     
450     /**
451      * Serialize method which writes data to a tar archive
452      */
453     public static void[] serialize(Filter)(Archive!(TarPolicy,Filter) archive)
454     {
455         ubyte[] serializeDirectory(DirectoryImpl dir, bool isRoot = false)
456         {
457             auto result = appender!(ubyte[])();
458             TarHeader header;
459             
460             // Write out all files in the directory
461             foreach(file; dir.files)
462             {
463                 header.nullify();
464                 // Determine if we need the ustar extension
465                 string filename = file.path;
466                 string prefix = "";
467                 bool needUstar = false;
468 
469                 // Compute the proper filename and prefix, if needed.
470                 // Throw an exception if a filepath exceeds 255 characters.
471                 if(file.path.length > 100)
472                 {
473                     prefix = file.path[0 .. $-100];
474                     filename = file.path[$-100 .. $];
475 
476                     // Check if we exceed the maximum filepath length for tar archives.
477                     if(prefix.length > 155)
478                     {
479                         throw new TarException("Pths cannot exceed 255 characters in tar archives.");
480                     }
481 
482                     header.prefix = strToBytes(prefix, 155);
483 
484                     needUstar = true;
485                 }
486 
487 
488                 // Write out file header
489                 header.filename = strToBytes(filename, 100);
490                 header.mode = (rightJustify(intToOctalStr(file.permissions), 7) ~ "\0");
491                 header.ownerId = rightJustify(intToOctalStr(0), 7) ~ "\0";
492                 header.groupId = rightJustify(intToOctalStr(0), 7) ~ "\0";
493                 header.size = rightJustify(intToOctalStr(cast(uint)file._data.length), 11) ~ " ";
494                 header.modificationTime = rightJustify(longToOctalStr(file.modificationTime), 11) ~ " ";
495                 header.linkId = cast(char)(file.typeFlag);
496                 header.linkedFilename = strToBytes(file.linkName, 100);
497                 
498                 // Set owner name if needed.
499                 if(file.owner !is null && file.owner != "")
500                 {
501                     header.owner = strToBytes(file.owner, 32); 
502                     needUstar = true;
503                 }
504                 
505                 // Set group name if needed
506                 if(file.group !is null && file.group != "")
507                 {
508                     header.group = strToBytes(file.group, 32);
509                     needUstar = true;
510                 }
511                 
512                 // Only set the ustar extensions if needed.
513                 if(needUstar)
514                 {
515                     header.magic = strToBytes("ustar", 6);
516                 }
517 
518                 // Compute checksum last.
519                 header.checksum = rightJustify(intToOctalStr(header.calculateUnsignedChecksum()), 7) ~ "\0"; 
520                 
521                 // Write out the header
522                 result.put((cast(ubyte*)(&header))[0 .. 512]);
523                 
524                 // Write out file data
525                 result.put(file._data[0 .. $]);
526                 
527                 // Write out padding
528                 if(file._data.length % 512 != 0)
529                     result.put(nullArray!ubyte(512 - (file._data.length % 512)));
530             }
531             
532             // Write out all directories in the directory
533             foreach(directory; dir.directories)
534             {
535                 header.nullify();
536 
537                 string dirname = directory.path;
538                 bool needUstar = false;
539                 
540                 // Compute the proper filename and prefix, if needed.
541                 // Throw an exception if a filepath exceeds 255 characters.
542                 if(directory.path.length > 100)
543                 {
544                     string prefix = directory.path[0 .. $-100];
545                     dirname = directory.path[$-100 .. $];
546 
547                     // Check if we exceed the maximum filepath length for tar archives.
548                     if(prefix.length > 155)
549                     {
550                         throw new TarException("Paths cannot exceed 255 characters in tar archives.");
551                     }
552                     
553                     header.prefix = strToBytes(prefix, 155);
554 
555                     needUstar = true;
556                 }
557 
558                 header.filename = strToBytes(dirname, 100);
559                 header.mode = rightJustify(intToOctalStr(directory.permissions), 7) ~ "\0";
560                 header.ownerId = rightJustify(intToOctalStr(0), 7) ~ "\0";
561                 header.groupId = rightJustify(intToOctalStr(0), 7) ~ "\0";
562                 header.size = rightJustify(intToOctalStr(0), 11) ~ " ";
563                 header.modificationTime = rightJustify(longToOctalStr(directory.modificationTime), 11) ~ " ";
564                 header.linkId = cast(char)(TarTypeFlag.directory);
565                 
566                 // Set owner name if needed.
567                 if(directory.owner !is null && directory.owner != "")
568                 {
569                     header.owner = strToBytes(directory.owner, 32); 
570                     needUstar = true;
571                 }
572                 
573                 // Set group name if needed
574                 if(directory.group !is null && directory.group != "")
575                 {
576                     header.group = strToBytes(directory.group, 32);
577                     needUstar = true;
578                 }
579                 
580                 // Only set the ustar extensions if needed.
581                 if(needUstar)
582                 {
583                     header.magic = strToBytes("ustar", 6);
584                 }
585 
586                 // Compute checksum last.
587                 header.checksum = rightJustify(intToOctalStr(header.calculateUnsignedChecksum()), 7) ~ "\0"; 
588                 
589                 // Write out the header
590                 result.put((cast(ubyte*)(&header))[0 .. 512]);
591                 
592                 // Recurse into this directory and write out sub-directories and sub-files.
593                 result.put(serializeDirectory(directory));
594             }
595             
596             return result.data;
597         }
598         
599         auto finalResult = appender!(ubyte[])();
600         finalResult.put(serializeDirectory(archive.root, true));
601         finalResult.put(nullArray!ubyte(1024));
602         
603         return finalResult.data;
604     }
605 };
606 
607 /**
608  * Convenience alias that simplifies the interface for users
609  */
610 alias TarArchive = Archive!(TarPolicy);
611 
612 unittest
613 {
614     string data1 = "HELLO\nI AM A FILE WITH SOME DATA\n1234567890\nABCDEFGHIJKLMOP";
615     immutable(ubyte)[] data2 = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
616 
617     TarArchive output = new TarArchive();
618 
619     // Add file into the top level directory.
620     TarArchive.File file1 = new TarArchive.File();
621     file1.path = "apple.txt";
622     file1.data = data1;
623     output.addFile(file1);
624     
625     // Add a file into a non top level directory.
626     TarArchive.File file2 = new TarArchive.File("directory/directory/directory/apple.txt");
627     file2.data = data2;
628     output.addFile(file2);
629     
630     // Add a directory that already exists.
631     output.addDirectory("directory/");
632     
633     // Add a directory that does not exist.
634     output.addDirectory("newdirectory/");
635 
636     // Remove unused directories
637     output.removeEmptyDirectories();
638     
639     // Ensure the only unused directory was removed.
640     assert(output.getDirectory("newdirectory") is null);
641 
642     // Re-add a directory that does not exist so we can test its output later.
643     output.addDirectory("newdirectory/");
644 
645     // Serialize the zip archive and construct a new zip with it
646     TarArchive input = new TarArchive(output.serialize());
647 
648     // Make sure that there is a file named apple.txt and a file named directory/directory/directory/apple.txt
649     assert(input.getFile("apple.txt") !is null);
650     assert(input.getFile("directory/directory/directory/apple.txt") !is null);
651 
652     // Make sure there are no extra directories or files
653     assert(input.numFiles() == 2);
654     assert(input.numDirectories() == 4);
655     assert(input.numMembers() == 6);
656 }