LZW Compressor
 All Classes Files Functions Variables Typedefs
lzw_v5.cpp
Go to the documentation of this file.
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 
21 #include <algorithm>
22 #include <array>
23 #include <climits>
24 #include <cstddef>
25 #include <cstdint>
26 #include <cstdlib>
27 #include <exception>
28 #include <fstream>
29 #include <ios>
30 #include <iostream>
31 #include <istream>
32 #include <limits>
33 #include <memory>
34 #include <ostream>
35 #include <stdexcept>
36 #include <string>
37 #include <utility>
38 #include <vector>
39 
41 using CodeType = std::uint16_t;
42 
43 namespace globals {
44 
46 const CodeType dms {std::numeric_limits<CodeType>::max()};
47 
48 } // namespace globals
49 
54 
58  struct Node {
59 
64  explicit Node(char c): first(globals::dms), c(c), left(globals::dms), right(globals::dms)
65  {
66  }
67 
69  char c;
72  };
73 
74 public:
75 
81  {
82  const long int minc = std::numeric_limits<char>::min();
83  const long int maxc = std::numeric_limits<char>::max();
84  CodeType k {0};
85 
86  for (long int c = minc; c <= maxc; ++c)
87  initials[static_cast<unsigned char> (c)] = k++;
88 
89  vn.reserve(globals::dms);
90  reset();
91  }
92 
97  void reset()
98  {
99  vn.clear();
100 
101  const long int minc = std::numeric_limits<char>::min();
102  const long int maxc = std::numeric_limits<char>::max();
103 
104  for (long int c = minc; c <= maxc; ++c)
105  vn.push_back(Node(c));
106  }
107 
116  {
117  // dictionary's maximum size was reached
118  if (vn.size() == globals::dms)
119  reset();
120 
121  if (i == globals::dms)
122  return search_initials(c);
123 
124  const CodeType vn_size = vn.size();
125  CodeType ci {vn[i].first}; // Current Index
126 
127  if (ci != globals::dms)
128  {
129  while (true)
130  if (c < vn[ci].c)
131  {
132  if (vn[ci].left == globals::dms)
133  {
134  vn[ci].left = vn_size;
135  break;
136  }
137  else
138  ci = vn[ci].left;
139  }
140  else
141  if (c > vn[ci].c)
142  {
143  if (vn[ci].right == globals::dms)
144  {
145  vn[ci].right = vn_size;
146  break;
147  }
148  else
149  ci = vn[ci].right;
150  }
151  else // c == vn[ci].c
152  return ci;
153  }
154  else
155  vn[i].first = vn_size;
156 
157  vn.push_back(Node(c));
158  return globals::dms;
159  }
160 
166  CodeType search_initials(char c) const
167  {
168  return initials[static_cast<unsigned char> (c)];
169  }
170 
171 private:
172 
174  std::vector<Node> vn;
175 
177  std::array<CodeType, 1u << CHAR_BIT> initials;
178 };
179 
185 void compress(std::istream &is, std::ostream &os)
186 {
188  CodeType i {globals::dms}; // Index
189  char c;
190 
191  while (is.get(c))
192  {
193  const CodeType temp {i};
194 
195  if ((i = ed.search_and_insert(temp, c)) == globals::dms)
196  {
197  os.write(reinterpret_cast<const char *> (&temp), sizeof (CodeType));
198  i = ed.search_initials(c);
199  }
200  }
201 
202  if (i != globals::dms)
203  os.write(reinterpret_cast<const char *> (&i), sizeof (CodeType));
204 }
205 
211 void decompress(std::istream &is, std::ostream &os)
212 {
213  std::vector<std::pair<CodeType, char>> dictionary;
214 
215  // "named" lambda function, used to reset the dictionary to its initial contents
216  const auto reset_dictionary = [&dictionary] {
217  dictionary.clear();
218  dictionary.reserve(globals::dms);
219 
220  const long int minc = std::numeric_limits<char>::min();
221  const long int maxc = std::numeric_limits<char>::max();
222 
223  for (long int c = minc; c <= maxc; ++c)
224  dictionary.push_back({globals::dms, static_cast<char> (c)});
225  };
226 
227  const auto rebuild_string = [&dictionary](CodeType k) -> const std::vector<char> * {
228  static std::vector<char> s; // String
229 
230  s.clear();
231 
232  // the length of a string cannot exceed the dictionary's number of entries
233  s.reserve(globals::dms);
234 
235  while (k != globals::dms)
236  {
237  s.push_back(dictionary[k].second);
238  k = dictionary[k].first;
239  }
240 
241  std::reverse(s.begin(), s.end());
242  return &s;
243  };
244 
245  reset_dictionary();
246 
247  CodeType i {globals::dms}; // Index
248  CodeType k; // Key
249 
250  while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
251  {
252  // dictionary's maximum size was reached
253  if (dictionary.size() == globals::dms)
254  reset_dictionary();
255 
256  if (k > dictionary.size())
257  throw std::runtime_error("invalid compressed code");
258 
259  const std::vector<char> *s; // String
260 
261  if (k == dictionary.size())
262  {
263  dictionary.push_back({i, rebuild_string(i)->front()});
264  s = rebuild_string(k);
265  }
266  else
267  {
268  s = rebuild_string(k);
269 
270  if (i != globals::dms)
271  dictionary.push_back({i, s->front()});
272  }
273 
274  os.write(&s->front(), s->size());
275  i = k;
276  }
277 
278  if (!is.eof() || is.gcount() != 0)
279  throw std::runtime_error("corrupted compressed file");
280 }
281 
287 void print_usage(const std::string &s = "", bool su = true)
288 {
289  if (!s.empty())
290  std::cerr << "\nERROR: " << s << '\n';
291 
292  if (su)
293  {
294  std::cerr << "\nUsage:\n";
295  std::cerr << "\tprogram -flag input_file output_file\n\n";
296  std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
297  std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
298  std::cerr << "Examples:\n";
299  std::cerr << "\tlzw_v5.exe -c license.txt license.lzw\n";
300  std::cerr << "\tlzw_v5.exe -d license.lzw new_license.txt\n";
301  }
302 
303  std::cerr << std::endl;
304 }
305 
313 int main(int argc, char *argv[])
314 {
315  if (argc != 4)
316  {
317  print_usage("Wrong number of arguments.");
318  return EXIT_FAILURE;
319  }
320 
321  enum class Mode {
322  Compress,
323  Decompress
324  };
325 
326  Mode m;
327 
328  if (std::string(argv[1]) == "-c")
329  m = Mode::Compress;
330  else
331  if (std::string(argv[1]) == "-d")
332  m = Mode::Decompress;
333  else
334  {
335  print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
336  return EXIT_FAILURE;
337  }
338 
339  const std::size_t buffer_size {1024 * 1024};
340 
341  // these custom buffers should be larger than the default ones
342  const std::unique_ptr<char[]> input_buffer(new char[buffer_size]);
343  const std::unique_ptr<char[]> output_buffer(new char[buffer_size]);
344 
345  std::ifstream input_file;
346  std::ofstream output_file;
347 
348  input_file.rdbuf()->pubsetbuf(input_buffer.get(), buffer_size);
349  input_file.open(argv[2], std::ios_base::binary);
350 
351  if (!input_file.is_open())
352  {
353  print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
354  return EXIT_FAILURE;
355  }
356 
357  output_file.rdbuf()->pubsetbuf(output_buffer.get(), buffer_size);
358  output_file.open(argv[3], std::ios_base::binary);
359 
360  if (!output_file.is_open())
361  {
362  print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
363  return EXIT_FAILURE;
364  }
365 
366  try
367  {
368  input_file.exceptions(std::ios_base::badbit);
369  output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
370 
371  if (m == Mode::Compress)
372  compress(input_file, output_file);
373  else
374  if (m == Mode::Decompress)
375  decompress(input_file, output_file);
376  }
377  catch (const std::ios_base::failure &f)
378  {
379  print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
380  return EXIT_FAILURE;
381  }
382  catch (const std::exception &e)
383  {
384  print_usage(std::string("Caught exception: ") + e.what() + '.', false);
385  return EXIT_FAILURE;
386  }
387 
388  return EXIT_SUCCESS;
389 }