LZW Compressor
 All Files Functions Typedefs
lzw_v1.cpp
Go to the documentation of this file.
1 
2 
3 
4 
5 
6 
7 
8 
9 
10 
11 
12 
13 
14 
15 
16 
17 
18 
19 
20 #include <cstdint>
21 #include <cstdlib>
22 #include <exception>
23 #include <fstream>
24 #include <ios>
25 #include <iostream>
26 #include <istream>
27 #include <limits>
28 #include <map>
29 #include <ostream>
30 #include <stdexcept>
31 #include <string>
32 #include <vector>
33 
35 using CodeType = std::uint16_t;
36 
37 namespace globals {
38 
40 const CodeType dms {std::numeric_limits<CodeType>::max()};
41 
42 } // namespace globals
43 
50 std::vector<char> operator + (std::vector<char> vc, char c)
51 {
52  vc.push_back(c);
53  return vc;
54 }
55 
61 void compress(std::istream &is, std::ostream &os)
62 {
63  std::map<std::vector<char>, CodeType> dictionary;
64 
65  // "named" lambda function, used to reset the dictionary to its initial contents
66  const auto reset_dictionary = [&dictionary] {
67  dictionary.clear();
68 
69  const long int minc = std::numeric_limits<char>::min();
70  const long int maxc = std::numeric_limits<char>::max();
71 
72  for (long int c = minc; c <= maxc; ++c)
73  {
74  // to prevent Undefined Behavior, resulting from reading and modifying
75  // the dictionary object at the same time
76  const CodeType dictionary_size = dictionary.size();
77 
78  dictionary[{static_cast<char> (c)}] = dictionary_size;
79  }
80  };
81 
82  reset_dictionary();
83 
84  std::vector<char> s; // String
85  char c;
86 
87  while (is.get(c))
88  {
89  // dictionary's maximum size was reached
90  if (dictionary.size() == globals::dms)
91  reset_dictionary();
92 
93  s.push_back(c);
94 
95  if (dictionary.count(s) == 0)
96  {
97  // to prevent Undefined Behavior, resulting from reading and modifying
98  // the dictionary object at the same time
99  const CodeType dictionary_size = dictionary.size();
100 
101  dictionary[s] = dictionary_size;
102  s.pop_back();
103  os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));
104  s = {c};
105  }
106  }
107 
108  if (!s.empty())
109  os.write(reinterpret_cast<const char *> (&dictionary.at(s)), sizeof (CodeType));
110 }
111 
117 void decompress(std::istream &is, std::ostream &os)
118 {
119  std::vector<std::vector<char>> dictionary;
120 
121  // "named" lambda function, used to reset the dictionary to its initial contents
122  const auto reset_dictionary = [&dictionary] {
123  dictionary.clear();
124  dictionary.reserve(globals::dms);
125 
126  const long int minc = std::numeric_limits<char>::min();
127  const long int maxc = std::numeric_limits<char>::max();
128 
129  for (long int c = minc; c <= maxc; ++c)
130  dictionary.push_back({static_cast<char> (c)});
131  };
132 
133  reset_dictionary();
134 
135  std::vector<char> s; // String
136  CodeType k; // Key
137 
138  while (is.read(reinterpret_cast<char *> (&k), sizeof (CodeType)))
139  {
140  // dictionary's maximum size was reached
141  if (dictionary.size() == globals::dms)
142  reset_dictionary();
143 
144  if (k > dictionary.size())
145  throw std::runtime_error("invalid compressed code");
146 
147  if (k == dictionary.size())
148  dictionary.push_back(s + s.front());
149  else
150  if (!s.empty())
151  dictionary.push_back(s + dictionary.at(k).front());
152 
153  os.write(&dictionary.at(k).front(), dictionary.at(k).size());
154  s = dictionary.at(k);
155  }
156 
157  if (!is.eof() || is.gcount() != 0)
158  throw std::runtime_error("corrupted compressed file");
159 }
160 
166 void print_usage(const std::string &s = "", bool su = true)
167 {
168  if (!s.empty())
169  std::cerr << "\nERROR: " << s << '\n';
170 
171  if (su)
172  {
173  std::cerr << "\nUsage:\n";
174  std::cerr << "\tprogram -flag input_file output_file\n\n";
175  std::cerr << "Where `flag' is either `c' for compressing, or `d' for decompressing, and\n";
176  std::cerr << "`input_file' and `output_file' are distinct files.\n\n";
177  std::cerr << "Examples:\n";
178  std::cerr << "\tlzw_v1.exe -c license.txt license.lzw\n";
179  std::cerr << "\tlzw_v1.exe -d license.lzw new_license.txt\n";
180  }
181 
182  std::cerr << std::endl;
183 }
184 
192 int main(int argc, char *argv[])
193 {
194  if (argc != 4)
195  {
196  print_usage("Wrong number of arguments.");
197  return EXIT_FAILURE;
198  }
199 
200  enum class Mode {
201  Compress,
202  Decompress
203  };
204 
205  Mode m;
206 
207  if (std::string(argv[1]) == "-c")
208  m = Mode::Compress;
209  else
210  if (std::string(argv[1]) == "-d")
211  m = Mode::Decompress;
212  else
213  {
214  print_usage(std::string("flag `") + argv[1] + "' is not recognized.");
215  return EXIT_FAILURE;
216  }
217 
218  std::ifstream input_file(argv[2], std::ios_base::binary);
219 
220  if (!input_file.is_open())
221  {
222  print_usage(std::string("input_file `") + argv[2] + "' could not be opened.");
223  return EXIT_FAILURE;
224  }
225 
226  std::ofstream output_file(argv[3], std::ios_base::binary);
227 
228  if (!output_file.is_open())
229  {
230  print_usage(std::string("output_file `") + argv[3] + "' could not be opened.");
231  return EXIT_FAILURE;
232  }
233 
234  try
235  {
236  input_file.exceptions(std::ios_base::badbit);
237  output_file.exceptions(std::ios_base::badbit | std::ios_base::failbit);
238 
239  if (m == Mode::Compress)
240  compress(input_file, output_file);
241  else
242  if (m == Mode::Decompress)
243  decompress(input_file, output_file);
244  }
245  catch (const std::ios_base::failure &f)
246  {
247  print_usage(std::string("File input/output failure: ") + f.what() + '.', false);
248  return EXIT_FAILURE;
249  }
250  catch (const std::exception &e)
251  {
252  print_usage(std::string("Caught exception: ") + e.what() + '.', false);
253  return EXIT_FAILURE;
254  }
255 
256  return EXIT_SUCCESS;
257 }