LZW Compressor
 All Files Functions Typedefs
lzw_v3.cpp File Reference

LZW file compressor. More...

#include <algorithm>
#include <cstddef>
#include <cstdint>
#include <cstdlib>
#include <exception>
#include <fstream>
#include <ios>
#include <iostream>
#include <istream>
#include <limits>
#include <map>
#include <memory>
#include <ostream>
#include <stdexcept>
#include <string>
#include <utility>
#include <vector>

Go to the source code of this file.

Typedefs

using CodeType = std::uint16_t
 Type used to store and retrieve codes.
 

Functions

void compress (std::istream &is, std::ostream &os)
 Compresses the contents of is and writes the result to os. More...
 
void decompress (std::istream &is, std::ostream &os)
 Decompresses the contents of is and writes the result to os. More...
 
void print_usage (const std::string &s="", bool su=true)
 Prints usage information and a custom error message. More...
 
int main (int argc, char *argv[])
 Actual program entry point. More...
 

Variables

const CodeType globals::dms {std::numeric_limits<CodeType>::max()}
 Dictionary Maximum Size (when reached, the dictionary will be reset)
 

Detailed Description

LZW file compressor.

Author
Julius Pettersson
Version
3

This is the C++11 implementation of a Lempel-Ziv-Welch single-file command-line compressor. It uses the simpler fixed-width code compression method. It was written with Doxygen comments.

See Also
http://en.wikipedia.org/wiki/Lempel%E2%80%93Ziv%E2%80%93Welch
http://marknelson.us/2011/11/08/lzw-revisited/
http://www.cs.duke.edu/csed/curious/compression/lzw.html
http://warp.povusers.org/EfficientLZW/index.html
http://en.cppreference.com/
http://www.doxygen.org/

Definition in file lzw_v3.cpp.

Function Documentation

void compress ( std::istream &  is,
std::ostream &  os 
)

Compresses the contents of is and writes the result to os.

Parameters
[in]isinput stream
[out]osoutput stream

Definition at line 53 of file lzw_v3.cpp.

Referenced by main().

54 {
55  std::map<std::pair<CodeType, char>, CodeType> dictionary;
56 
57  // "named" lambda function, used to reset the dictionary to its initial contents
58  const auto reset_dictionary = [&dictionary] {
59  dictionary.clear();
60 
61  const long int minc = std::numeric_limits<char>::min();
62  const long int maxc = std::numeric_limits<char>::max();
63 
64  for (long int c = minc; c <= maxc; ++c)
65  {
66  // to prevent Undefined Behavior, resulting from reading and modifying
67  // the dictionary object at the same time
68  const CodeType dictionary_size = dictionary.size();
69 
70  dictionary[{globals::dms, static_cast<char> (c)}] = dictionary_size;
71  }
72  };
73 
74  reset_dictionary();
75 
76  CodeType i {globals::dms}; // Index
77  char c;
78 
79  while (is.get(c))
80  {
81  // dictionary's maximum size was reached
82  if (dictionary.size() == globals::dms)
83  reset_dictionary();
84 
85  if (dictionary.count({i, c}) == 0)
86  {
87  // to prevent Undefined Behavior, resulting from reading and modifying
88  // the dictionary object at the same time
89  const CodeType dictionary_size = dictionary.size();
90 
91  dictionary[{i, c}] = dictionary_size;
92  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
93  i = dictionary.at({globals::dms, c});
94  }
95  else
96  i = dictionary.at({i, c});
97  }
98 
99  if (i != globals::dms)
100  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
101 }
void decompress ( std::istream &  is,
std::ostream &  os 
)

Decompresses the contents of is and writes the result to os.

Parameters
[in]isinput stream
[out]osoutput stream

Definition at line 108 of file lzw_v3.cpp.

Referenced by main().

109 {
110  std::vector<std::pair<CodeType, char>> dictionary;
111 
112  // "named" lambda function, used to reset the dictionary to its initial contents
113  const auto reset_dictionary = [&dictionary] {
114  dictionary.clear();
115  dictionary.reserve(globals::dms);
116 
117  const long int minc = std::numeric_limits<char>::min();
118  const long int maxc = std::numeric_limits<char>::max();
119 
120  for (long int c = minc; c <= maxc; ++c)
121  dictionary.push_back({globals::dms, static_cast<char> (c)});
122  };
123 
124  const auto rebuild_string = [&dictionary](CodeType k) -> std::vector<char> {
125  std::vector<char> s; // String
126 
127  while (k != globals::dms)
128  {
129  s.push_back(dictionary.at(k).second);
130  k = dictionary.at(k).first;
131  }
132 
133  std::reverse(s.begin(), s.end());
134  return s;
135  };
136 
137  reset_dictionary();
138 
139  CodeType i {globals::dms}; // Index
140  CodeType k; // Key
141 
142  while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
143  {
144  // dictionary's maximum size was reached
145  if (dictionary.size() == globals::dms)
146  reset_dictionary();
147 
148  if (k > dictionary.size())
149  throw std::runtime_error("invalid compressed code");
150 
151  std::vector<char> s; // String
152 
153  if (k == dictionary.size())
154  {
155  dictionary.push_back({i, rebuild_string(i).front()});
156  s = rebuild_string(k);
157  }
158  else
159  {
160  s = rebuild_string(k);
161 
162  if (i != globals::dms)
163  dictionary.push_back({i, s.front()});
164  }
165 
166  os.write(&s.front(), s.size());
167  i = k;
168  }
169 
170  if (!is.eof() || is.gcount() != 0)
171  throw std::runtime_error("corrupted compressed file");
172 }
int main ( int  argc,
char *  argv[] 
)

Actual program entry point.

Parameters
argcnumber of command line arguments
[in]argvarray of command line arguments
Return values
EXIT_FAILUREfor failed operation
EXIT_SUCCESSfor successful operation

Definition at line 205 of file lzw_v3.cpp.

References compress(), decompress(), and print_usage().

206 {
207  if (argc != 4)
208  {
209  print_usage("Wrong number of arguments.");
210  return EXIT_FAILURE;
211  }
212 
213  enum class Mode {
214  Compress,
215  Decompress
216  };
217 
218  Mode m;
219 
220  if (std::string(argv[1]) == "-c")
221  m = Mode::Compress;
222  else
223  if (std::string(argv[1]) == "-d")
224  m = Mode::Decompress;
225  else
226  {
227  print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
228  return EXIT_FAILURE;
229  }
230 
231  const std::size_t buffer_size {1024 * 1024};
232 
233  // these custom buffers should be larger than the default ones
234  const std::unique_ptr<char[]> input_buffer(new char[buffer_size]);
235  const std::unique_ptr<char[]> output_buffer(new char[buffer_size]);
236 
237  std::ifstream input_file;
238  std::ofstream output_file;
239 
240  input_file.rdbuf()->pubsetbuf(input_buffer.get(), buffer_size);
241  input_file.open(argv[2], std::ios_base::binary);
242 
243  if (!input_file.is_open())
244  {
245  print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
246  return EXIT_FAILURE;
247  }
248 
249  output_file.rdbuf()->pubsetbuf(output_buffer.get(), buffer_size);
250  output_file.open(argv[3], std::ios_base::binary);
251 
252  if (!output_file.is_open())
253  {
254  print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
255  return EXIT_FAILURE;
256  }
257 
258  try
259  {
260  input_file.exceptions(std::ios_base::badbit);
261  output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
262 
263  if (m == Mode::Compress)
264  compress(input_file, output_file);
265  else
266  if (m == Mode::Decompress)
267  decompress(input_file, output_file);
268  }
269  catch (const std::ios_base::failure &f)
270  {
271  print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
272  return EXIT_FAILURE;
273  }
274  catch (const std::exception &e)
275  {
276  print_usage(std::string("Caught exception: ") + e.what() + '.', false);
277  return EXIT_FAILURE;
278  }
279 
280  return EXIT_SUCCESS;
281 }
void print_usage ( const std::string &  s = "",
bool  su = true 
)

Prints usage information and a custom error message.

Parameters
scustom error message to be printed
suShow Usage information

Definition at line 179 of file lzw_v3.cpp.

Referenced by main().

180 {
181  if (!s.empty())
182  std::cerr << "\nERROR: " << s << '\n';
183 
184  if (su)
185  {
186  std::cerr << "\nUsage:\n";
187  std::cerr << "\tprogram -flag input_file output_file\n\n";
188  std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
189  std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
190  std::cerr << "Examples:\n";
191  std::cerr << "\tlzw_v3.exe -c license.txt license.lzw\n";
192  std::cerr << "\tlzw_v3.exe -d license.lzw new_license.txt\n";
193  }
194 
195  std::cerr << std::endl;
196 }