xz/DOS: Add experimental 8.3 filename support.
authorLasse Collin <lasse.collin@tukaani.org>
Sun, 10 Apr 2011 10:09:42 +0000 (13:09 +0300)
committerLasse Collin <lasse.collin@tukaani.org>
Sun, 10 Apr 2011 10:09:42 +0000 (13:09 +0300)
This is incompatible with the 8.3 support patch made by
Juan Manuel Guerrero. I think this one is nicer, but
I need to get feedback from DOS users before saying
that this is the final version of 8.3 filename support.

src/xz/suffix.c

index c89f67f..e515626 100644 (file)
 
 #include "private.h"
 
+#ifdef __DJGPP__
+#      include <fcntl.h>
+#endif
+
 // For case-insensitive filename suffix on case-insensitive systems
 #if defined(TUKLIB_DOSLIKE) || defined(__VMS)
 #      define strcmp strcasecmp
@@ -45,6 +49,31 @@ has_dir_sep(const char *str)
 }
 
 
+#ifdef __DJGPP__
+/// \brief      Test for special suffix used for 8.3 short filenames (SFN)
+///
+/// \return     If str matches *.?- or *.??-, true is returned. Otherwise
+///             false is returned.
+static bool
+has_sfn_suffix(const char *str, size_t len)
+{
+       if (len >= 4 && str[len - 1] == '-' && str[len - 2] != '.'
+                       && !is_dir_sep(str[len - 2])) {
+               // *.?-
+               if (str[len - 3] == '.')
+                       return !is_dir_sep(str[len - 4]);
+
+               // *.??-
+               if (len >= 5 && !is_dir_sep(str[len - 3])
+                               && str[len - 4] == '.')
+                       return !is_dir_sep(str[len - 5]);
+       }
+
+       return false;
+}
+#endif
+
+
 /// \brief      Checks if src_name has given compressed_suffix
 ///
 /// \param      suffix      Filename suffix to look for
@@ -87,6 +116,9 @@ uncompressed_name(const char *src_name, const size_t src_len)
                { ".xz",    "" },
                { ".txz",   ".tar" }, // .txz abbreviation for .txt.gz is rare.
                { ".lzma",  "" },
+#ifdef __DJGPP__
+               { ".lzm",   "" },
+#endif
                { ".tlz",   ".tar" },
                // { ".gz",    "" },
                // { ".tgz",   ".tar" },
@@ -112,6 +144,17 @@ uncompressed_name(const char *src_name, const size_t src_len)
                                break;
                        }
                }
+
+#ifdef __DJGPP__
+               // Support also *.?- -> *.? and *.??- -> *.?? on DOS.
+               // This is done also when long filenames are available
+               // to keep it easy to decompress files created when
+               // long filename support wasn't available.
+               if (new_len == 0 && has_sfn_suffix(src_name, src_len)) {
+                       new_suffix = "";
+                       new_len = src_len - 1;
+               }
+#endif
        }
 
        if (new_len == 0 && custom_suffix != NULL)
@@ -134,21 +177,35 @@ uncompressed_name(const char *src_name, const size_t src_len)
 }
 
 
+/// This message is needed in multiple places in compressed_name(),
+/// so the message has been put into its own function.
+static void
+msg_suffix(const char *src_name, const char *suffix)
+{
+       message_warning(_("%s: File already has `%s' suffix, skipping"),
+                       src_name, suffix);
+       return;
+}
+
+
 /// \brief      Appends suffix to src_name
 ///
 /// In contrast to uncompressed_name(), we check only suffixes that are valid
 /// for the specified file format.
 static char *
-compressed_name(const char *src_name, const size_t src_len)
+compressed_name(const char *src_name, size_t src_len)
 {
        // The order of these must match the order in args.h.
-       static const char *const all_suffixes[][3] = {
+       static const char *const all_suffixes[][4] = {
                {
                        ".xz",
                        ".txz",
                        NULL
                }, {
                        ".lzma",
+#ifdef __DJGPP__
+                       ".lzm",
+#endif
                        ".tlz",
                        NULL
 /*
@@ -170,20 +227,27 @@ compressed_name(const char *src_name, const size_t src_len)
        const size_t format = opt_format - 1;
        const char *const *suffixes = all_suffixes[format];
 
+       // Look for known filename suffixes and refuse to compress them.
        for (size_t i = 0; suffixes[i] != NULL; ++i) {
                if (test_suffix(suffixes[i], src_name, src_len) != 0) {
-                       message_warning(_("%s: File already has `%s' "
-                                       "suffix, skipping"), src_name,
-                                       suffixes[i]);
+                       msg_suffix(src_name, suffixes[i]);
                        return NULL;
                }
        }
 
+#ifdef __DJGPP__
+       // Recognize also the special suffix that is used when long
+       // filename (LFN) support isn't available. This suffix is
+       // recognized on LFN systems too.
+       if (opt_format == FORMAT_XZ && has_sfn_suffix(src_name, src_len)) {
+               msg_suffix(src_name, "-");
+               return NULL;
+       }
+#endif
+
        if (custom_suffix != NULL) {
                if (test_suffix(custom_suffix, src_name, src_len) != 0) {
-                       message_warning(_("%s: File already has `%s' "
-                                       "suffix, skipping"), src_name,
-                                       custom_suffix);
+                       msg_suffix(src_name, custom_suffix);
                        return NULL;
                }
        }
@@ -199,7 +263,101 @@ compressed_name(const char *src_name, const size_t src_len)
 
        const char *suffix = custom_suffix != NULL
                        ? custom_suffix : suffixes[0];
-       const size_t suffix_len = strlen(suffix);
+       size_t suffix_len = strlen(suffix);
+
+#ifdef __DJGPP__
+       if (!_use_lfn(src_name)) {
+               // Long filename (LFN) support isn't available and we are
+               // limited to 8.3 short filenames (SFN).
+               //
+               // Look for suffix separator from the filename, and make sure
+               // that it is in the filename, not in a directory name.
+               const char *sufsep = strrchr(src_name, '.');
+               if (sufsep == NULL || sufsep[1] == '\0'
+                               || has_dir_sep(sufsep)) {
+                       // src_name has no filename extension.
+                       //
+                       // Examples:
+                       // xz foo         -> foo.xz
+                       // xz -F lzma foo -> foo.lzm
+                       // xz -S x foo    -> foox
+                       // xz -S x foo.   -> foo.x
+                       // xz -S x.y foo  -> foox.y
+                       // xz -S .x foo   -> foo.x
+                       // xz -S .x foo.  -> foo.x
+                       //
+                       // Avoid double dots:
+                       if (sufsep != NULL && sufsep[1] == '\0'
+                                       && suffix[0] == '.')
+                               --src_len;
+
+               } else if (custom_suffix == NULL
+                               && strcasecmp(sufsep, ".tar") == 0) {
+                       // ".tar" is handled specially.
+                       //
+                       // Examples:
+                       // xz foo.tar          -> foo.txz
+                       // xz -F lzma foo.tar  -> foo.tlz
+                       static const char *const tar_suffixes[] = {
+                               ".txz",
+                               ".tlz",
+                               // ".tgz",
+                       };
+                       suffix = tar_suffixes[format];
+                       suffix_len = 4;
+                       src_len -= 4;
+
+               } else {
+                       if (custom_suffix == NULL && opt_format == FORMAT_XZ) {
+                               // Instead of the .xz suffix, use a single
+                               // character at the end of the filename
+                               // extension. This is to minimize name
+                               // conflicts when compressing multiple files
+                               // with the same basename. E.g. foo.txt and
+                               // foo.exe become foo.tx- and foo.ex-. Dash
+                               // is rare as the last character of the
+                               // filename extension, so it seems to be
+                               // quite safe choice and it stands out better
+                               // in directory listings than e.g. x. For
+                               // comparison, gzip uses z.
+                               suffix = "-";
+                               suffix_len = 1;
+                       }
+
+                       if (suffix[0] == '.') {
+                               // The first character of the suffix is a dot.
+                               // Throw away the original filename extension
+                               // and replace it with the new suffix.
+                               //
+                               // Examples:
+                               // xz -F lzma foo.txt  -> foo.lzm
+                               // xz -S .x  foo.txt   -> foo.x
+                               src_len = sufsep - src_name;
+
+                       } else {
+                               // The first character of the suffix is not
+                               // a dot. Preserve the first 0-2 characters
+                               // of the original filename extension.
+                               //
+                               // Examples:
+                               // xz foo.txt         -> foo.tx-
+                               // xz -S x  foo.c     -> foo.cx
+                               // xz -S ab foo.c     -> foo.cab
+                               // xz -S ab foo.txt   -> foo.tab
+                               // xz -S abc foo.txt  -> foo.abc
+                               //
+                               // Truncate the suffix to three chars:
+                               if (suffix_len > 3)
+                                       suffix_len = 3;
+
+                               // If needed, overwrite 1-3 characters.
+                               if (strlen(sufsep) > 4 - suffix_len)
+                                       src_len = sufsep - src_name
+                                                       + 4 - suffix_len;
+                       }
+               }
+       }
+#endif
 
        char *dest_name = xmalloc(src_len + suffix_len + 1);