LZW Compressor
 All Classes Files Functions Typedefs
lzw_v4.cpp File Reference

LZW file compressor. More...

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <exception>
#include <fstream>
#include <functional>
#include <ios>
#include <iostream>
#include <istream>
#include <limits>
#include <map>
#include <memory>
#include <ostream>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>
#include "FSBAllocator/FSBAllocator.hh"

Go to the source code of this file.

Typedefs

using CodeType = std::uint16_t
 Type used to store and retrieve codes.
 

Functions

void compress (std::istream &is, std::ostream &os)
 Compresses the contents of is and writes the result to os. More...
 
void decompress (std::istream &is, std::ostream &os)
 Decompresses the contents of is and writes the result to os. More...
 
void print_usage (const std::string &s="", bool su=true)
 Prints usage information and a custom error message. More...
 
int main (int argc, char *argv[])
 Actual program entry point. More...
 

Variables

const CodeType globals::dms {std::numeric_limits<CodeType>::max()}
 Dictionary Maximum Size (when reached, the dictionary will be reset)
 

Detailed Description

LZW file compressor.

Author
Julius Pettersson
Version
4

This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line compressor. It uses the simpler fixed-width code compression method. It was written with Doxygen comments.

See Also
http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
http://marknelson.us/2011/11/08/lzw-revisited/
http://www.cs.duke.edu/csed/curious/compression/lzw.html
http://warp.povusers.org/EfficientLZW/index.html
http://en.cppreference.com/
http://www.doxygen.org/

Definition in file lzw_v4.cpp.

Function Documentation

void compress ( std::istream &  is,
std::ostream &  os 
)

Compresses the contents of is and writes the result to os.

Parameters
[in]isinput stream
[out]osoutput stream

Definition at line 56 of file lzw_v4.cpp.

Referenced by main().

57 {
58  using KeyType = std::pair<CodeType, char>;
59  using PairType = std::pair<const KeyType, CodeType>;
60 
61  std::map<KeyType, CodeType, std::less<KeyType>, FSBAllocator2<PairType>> dictionary;
62 
63  // "named" lambda function, used to reset the dictionary to its initial contents
64  const auto reset_dictionary = [&dictionary] {
65  dictionary.clear();
66 
67  const long int minc = std::numeric_limits<char>::min();
68  const long int maxc = std::numeric_limits<char>::max();
69 
70  for (long int c = minc; c <= maxc; ++c)
71  {
72  // to prevent Undefined Behavior, resulting from reading and modifying
73  // the dictionary object at the same time
74  const CodeType dictionary_size = dictionary.size();
75 
76  dictionary[{globals::dms, static_cast<char> (c)}] = dictionary_size;
77  }
78  };
79 
80  reset_dictionary();
81 
82  CodeType i {globals::dms}; // Index
83  char c;
84 
85  while (is.get(c))
86  {
87  // dictionary's maximum size was reached
88  if (dictionary.size() == globals::dms)
89  reset_dictionary();
90 
91  if (dictionary.count({i, c}) == 0)
92  {
93  // to prevent Undefined Behavior, resulting from reading and modifying
94  // the dictionary object at the same time
95  const CodeType dictionary_size = dictionary.size();
96 
97  dictionary[{i, c}] = dictionary_size;
98  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
99  i = dictionary.at({globals::dms, c});
100  }
101  else
102  i = dictionary.at({i, c});
103  }
104 
105  if (i != globals::dms)
106  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
107 }
void decompress ( std::istream &  is,
std::ostream &  os 
)

Decompresses the contents of is and writes the result to os.

Parameters
[in]isinput stream
[out]osoutput stream

Definition at line 114 of file lzw_v4.cpp.

Referenced by main().

115 {
116  std::vector<std::pair<CodeType, char>> dictionary;
117 
118  // "named" lambda function, used to reset the dictionary to its initial contents
119  const auto reset_dictionary = [&dictionary] {
120  dictionary.clear();
121  dictionary.reserve(globals::dms);
122 
123  const long int minc = std::numeric_limits<char>::min();
124  const long int maxc = std::numeric_limits<char>::max();
125 
126  for (long int c = minc; c <= maxc; ++c)
127  dictionary.push_back({globals::dms, static_cast<char> (c)});
128  };
129 
130  const auto rebuild_string = [&dictionary](CodeType k) -> const std::vector<char> * {
131  static std::vector<char> s; // String
132 
133  s.clear();
134 
135  // the length of a string cannot exceed the dictionary's number of entries
136  s.reserve(globals::dms);
137 
138  while (k != globals::dms)
139  {
140  s.push_back(dictionary[k].second);
141  k = dictionary[k].first;
142  }
143 
144  std::reverse(s.begin(), s.end());
145  return &s;
146  };
147 
148  reset_dictionary();
149 
150  CodeType i {globals::dms}; // Index
151  CodeType k; // Key
152 
153  while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
154  {
155  // dictionary's maximum size was reached
156  if (dictionary.size() == globals::dms)
157  reset_dictionary();
158 
159  if (k > dictionary.size())
160  throw std::runtime_error("invalid compressed code");
161 
162  const std::vector<char> *s; // String
163 
164  if (k == dictionary.size())
165  {
166  dictionary.push_back({i, rebuild_string(i)->front()});
167  s = rebuild_string(k);
168  }
169  else
170  {
171  s = rebuild_string(k);
172 
173  if (i != globals::dms)
174  dictionary.push_back({i, s->front()});
175  }
176 
177  os.write(&s->front(), s->size());
178  i = k;
179  }
180 
181  if (!is.eof() || is.gcount() != 0)
182  throw std::runtime_error("corrupted compressed file");
183 }
int main ( int  argc,
char *  argv[] 
)

Actual program entry point.

Parameters
argcnumber of command line arguments
[in]argvarray of command line arguments
Return values
EXIT_FAILUREfor failed operation
EXIT_SUCCESSfor successful operation

Definition at line 216 of file lzw_v4.cpp.

References compress(), decompress(), and print_usage().

217 {
218  if (argc != 4)
219  {
220  print_usage("Wrong number of arguments.");
221  return EXIT_FAILURE;
222  }
223 
224  enum class Mode {
225  Compress,
226  Decompress
227  };
228 
229  Mode m;
230 
231  if (std::string(argv[1]) == "-c")
232  m = Mode::Compress;
233  else
234  if (std::string(argv[1]) == "-d")
235  m = Mode::Decompress;
236  else
237  {
238  print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
239  return EXIT_FAILURE;
240  }
241 
242  const std::size_t buffer_size {1024 * 1024};
243 
244  // these custom buffers should be larger than the default ones
245  const std::unique_ptr<char[]> input_buffer(new char[buffer_size]);
246  const std::unique_ptr<char[]> output_buffer(new char[buffer_size]);
247 
248  std::ifstream input_file;
249  std::ofstream output_file;
250 
251  input_file.rdbuf()->pubsetbuf(input_buffer.get(), buffer_size);
252  input_file.open(argv[2], std::ios_base::binary);
253 
254  if (!input_file.is_open())
255  {
256  print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
257  return EXIT_FAILURE;
258  }
259 
260  output_file.rdbuf()->pubsetbuf(output_buffer.get(), buffer_size);
261  output_file.open(argv[3], std::ios_base::binary);
262 
263  if (!output_file.is_open())
264  {
265  print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
266  return EXIT_FAILURE;
267  }
268 
269  try
270  {
271  input_file.exceptions(std::ios_base::badbit);
272  output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
273 
274  if (m == Mode::Compress)
275  compress(input_file, output_file);
276  else
277  if (m == Mode::Decompress)
278  decompress(input_file, output_file);
279  }
280  catch (const std::ios_base::failure &f)
281  {
282  print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
283  return EXIT_FAILURE;
284  }
285  catch (const std::exception &e)
286  {
287  print_usage(std::string("Caught exception: ") + e.what() + '.', false);
288  return EXIT_FAILURE;
289  }
290 
291  return EXIT_SUCCESS;
292 }
void print_usage ( const std::string &  s = "",
bool  su = true 
)

Prints usage information and a custom error message.

Parameters
scustom error message to be printed
suShow Usage information

Definition at line 190 of file lzw_v4.cpp.

Referenced by main().

191 {
192  if (!s.empty())
193  std::cerr << "\nERROR: " << s << '\n';
194 
195  if (su)
196  {
197  std::cerr << "\nUsage:\n";
198  std::cerr << "\tprogram -flag input_file output_file\n\n";
199  std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
200  std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
201  std::cerr << "Examples:\n";
202  std::cerr << "\tlzw_v4.exe -c license.txt license.lzw\n";
203  std::cerr << "\tlzw_v4.exe -d license.lzw new_license.txt\n";
204  }
205 
206  std::cerr << std::endl;
207 }