libtld: /home/snapwebsites/snapcpp/contrib/libtld/tests/tld_test_emails.cpp Source File

libtld  1.5.13
A library to determine the Top-Level Domain name of any URL.
tld_test_emails.cpp
Go to the documentation of this file.
1 /* TLD library -- test the TLD interface for emails
2  * Copyright (c) 2013-2019 Made to Order Software Corp. All Rights Reserved
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included
13  * in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
31 #include "libtld/tld.h"
32 #include <stdlib.h>
33 #include <stdio.h>
34 #include <string.h>
35 #include <sstream>
36 
38 int err_count = 0;
39 
41 int verbose = 0;
42 
43 
51 void error(const std::string& msg)
52 {
53  fprintf(stderr, "%s\n", msg.c_str());
54  ++err_count;
55 }
56 
57 
59 #define EXPECTED_THROW(s, e) \
60  try \
61  { \
62  static_cast<void>(s); \
63  error("error: bad." #s "() of \"\" did not throw an error."); \
64  } \
65  catch(const e&) \
66  { \
67  }
68 
69 
81 {
83  const char * f_input_email;
85  int f_count;
86 };
87 
88 //const char * f_group;
89 //const char * f_original_email;
90 //const char * f_fullname;
91 //const char * f_username;
92 //const char * f_domain;
93 //const char * f_email_only;
94 //const char * f_canonicalized_email;
95 
98 {
99  { "", "MAILER-DAEMON@mail.exdox.com (Mail Delivery System)",
100  "", "MAILER-DAEMON", "mail.exdox.com", "MAILER-DAEMON@mail.exdox.com", "MAILER-DAEMON@mail.exdox.com" },
101 #if 1
102  { "", "alexis@m2osw.com",
103  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
104  { "", "a@m2osw.com",
105  "", "a", "m2osw.com", "a@m2osw.com", "a@m2osw.com" },
106  { "", "b@c.com",
107  "", "b", "c.com", "b@c.com", "b@c.com" },
108  { "", "alexis@m2osw.com",
109  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
110  { "", "\"Wilke, Alexis\" <alexis@m2osw.com>",
111  "Wilke, Alexis", "alexis", "m2osw.com", "alexis@m2osw.com", "\"Wilke, Alexis\" <alexis@m2osw.com>" },
112  { "", "(* Pascal Comments *) \t alexis@m2osw.com\n (Just (kidding) he! he!)",
113  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
114  { "", "(Start-Comment)alexis@ \t [ \t m2osw.com \t ] \n (More (comment) here)",
115  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
116  { "", "(Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there)",
117  "", "al.ex.is", "m2osw.com", "al.ex.is@m2osw.com", "al.ex.is@m2osw.com" },
118  { "", "< (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) >",
119  "", "al.ex.is", "m2osw.com", "al.ex.is@m2osw.com", "al.ex.is@m2osw.com" },
120  { "", "(With full name) Alexis Wilke < (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) >",
121  "Alexis Wilke", "al.ex.is", "m2osw.com", "al.ex.is@m2osw.com", "Alexis Wilke <al.ex.is@m2osw.com>" },
122  { "This Group", "",
123  "", "", "", "", "" },
124  { "This Group", "(With full name) Alexis Wilke < \n alexis \t @ \t [ \t m2osw.com \t ] \n (Less) >",
125  "Alexis Wilke", "alexis", "m2osw.com", "alexis@m2osw.com", "Alexis Wilke <alexis@m2osw.com>" },
126  { "People", "",
127  "", "", "", "", "" },
128  { "People", "Alexis Wilke <alexis@m2osw.com>",
129  "Alexis Wilke", "alexis", "m2osw.com", "alexis@m2osw.com", "Alexis Wilke <alexis@m2osw.com>" },
130  { "People", "John Smith <john@m2osw.com>",
131  "John Smith", "john", "m2osw.com", "john@m2osw.com", "John Smith <john@m2osw.com>" },
132  { "Lists", "",
133  "", "", "", "", "" },
134  { "Lists", "Contact <contact@m2osw.com>",
135  "Contact", "contact", "m2osw.com", "contact@m2osw.com", "Contact <contact@m2osw.com>" },
136  { "Lists", "Resume <resume@m2osw.com>",
137  "Resume", "resume", "m2osw.com", "resume@m2osw.com", "Resume <resume@m2osw.com>" },
138  { "", "normal@m2osw.com",
139  "", "normal", "m2osw.com", "normal@m2osw.com", "normal@m2osw.com" },
140  { "No-Reply", "",
141  "", "", "", "", "" },
142  { "No-Reply", "no-reply@m2osw.com",
143  "", "no-reply", "m2osw.com", "no-reply@m2osw.com", "no-reply@m2osw.com" },
144  { "", "\"Complex <name> for !a! \\\"USER\\\"\" <user@example.co.uk>",
145  "Complex <name> for !a! \"USER\"", "user", "example.co.uk", "user@example.co.uk", "\"Complex <name> for !a! \\\"USER\\\"\" <user@example.co.uk>" },
146  { "", "(Comment \n New-Line) alexis@m2osw.com",
147  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
148  { "", "(Comment (Sub-Comment (Sub-Sub-Comment (Sub-Sub-Sub-Comment \\) This is still the Sub-Sub-Sub-Comment!!!)))) alexis@m2osw.com",
149  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
150  { "Group with some sub-comments", "",
151  "", "", "", "", "" },
152  { "Group with some sub-comments", "alexis@m2osw.com",
153  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
154  // TBD: since the colons get canonicalized to %3A we do not need the '[' and ']' in the canonicalized version
155  { "", "\"Wilke, Alexis\" <\"alexis,wilke\"@[:special:.m2osw.com]>",
156  "Wilke, Alexis", "alexis,wilke", ":special:.m2osw.com", "\"alexis,wilke\"@[:special:.m2osw.com]", "\"Wilke, Alexis\" <\"alexis,wilke\"@%3Aspecial%3A.m2osw.com>" },
157  { "", "alexis@m2osw.com (Simple Comment)",
158  "", "alexis", "m2osw.com", "alexis@m2osw.com", "alexis@m2osw.com" },
159 #endif
160 
161  // end list with nulls
162  { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr }
163 };
164 
167 {
168  { "MAILER-DAEMON@mail.exdox.com (Mail Delivery System)", 1},
169 #if 1
170  { "alexis@m2osw.com", 1 },
171  { "a@m2osw.com", 1 },
172  { "b@c.com", 1 },
173  { " \t alexis@m2osw.com\n \t", 1 },
174  { "\"Wilke, Alexis\" <alexis@m2osw.com>", 1 },
175  { " (* Pascal Comments *) \t alexis@m2osw.com\n (Just (kidding) he! he!) \t", 1 },
176  { "(Start-Comment)alexis@ \t [ \t m2osw.com \t ] \n (More (comment) here) \r\n\t", 1 },
177  { "(Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) \r\n\t", 1 },
178  { "< (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) > \r\n\t", 1 },
179  { "(With full name) Alexis Wilke < (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) > \r\n\t", 1 },
180  { " (Now a group:) This Group: (With full name) Alexis Wilke < \n alexis \t @ \t [ \t m2osw.com \t ] \n (Less) >; \r\n\t", 2 },
181  { "People: Alexis Wilke <alexis@m2osw.com>, John Smith <john@m2osw.com>; Lists: Contact <contact@m2osw.com>, Resume <resume@m2osw.com>; normal@m2osw.com, No-Reply: no-reply@m2osw.com;", 9 },
182  { "\"Complex <name> for !a! \\\"USER\\\"\" <user@example.co.uk>", 1 },
183  { "(Comment \n New-Line) alexis@m2osw.com", 1 },
184  { "(Comment (Sub-Comment (Sub-Sub-Comment (Sub-Sub-Sub-Comment \\) This is still the Sub-Sub-Sub-Comment!!!)))) alexis@m2osw.com", 1 },
185  { "Group with (Comment (Sub-Comment (Sub-Sub-Comment (Sub-Sub-Sub-Comment \\) This is still the Sub-Sub-Sub-Comment!!!)))) some sub-comments \t : alexis@m2osw.com;", 2 },
186  { "\"Wilke, Alexis\" <\"alexis,wilke\"@[:special:.m2osw.com]>", 1 },
187  { "alexis@m2osw.com (Simple Comment)", 1 },
188 #endif
189 
190  // end of list
191  { nullptr, 0 }
192 };
193 
194 
206 std::string email_to_vstring(const std::string& e)
207 {
208  std::string result;
209  char buf[3];
210 
211  for(const char *s(e.c_str()); *s != '\0'; ++s)
212  {
213  if(static_cast<unsigned char>(*s) < ' ')
214  {
215  switch(*s)
216  {
217  case '\a': result += "\\a"; break;
218  case '\b': result += "\\b"; break;
219  case '\f': result += "\\f"; break;
220  case '\n': result += "\\n"; break;
221  case '\r': result += "\\r"; break;
222  case '\t': result += "\\t"; break;
223  case '\v': result += "\\v"; break;
224  default:
225  buf[0] = '^';
226  buf[1] = *s + '@';
227  buf[2] = '\0';
228  result += buf;
229  break;
230 
231  }
232  }
233  else if(*s == 0x7F)
234  {
235  result += "<DEL>";
236  }
237  else if(static_cast<unsigned char>(*s) > 0x80)
238  {
239  static const char *hc = "0123456789ABCDEF";
240  result += "\\x";
241  buf[0] = hc[*s >> 4];
242  buf[1] = hc[*s & 15];
243  buf[2] = '\0';
244  result += buf;
245  }
246  else
247  {
248  result += *s;
249  }
250  }
251 
252  return result;
253 }
254 
255 
256 void test_valid_emails()
257 {
258  const tld_email *results(list_of_results);
259  for(const valid_email *v(list_of_valid_emails); v->f_input_email != nullptr; ++v)
260  {
261  if(verbose)
262  {
263  printf("*** testing email \"%s\", start with C++ test\n", email_to_vstring(v->f_input_email).c_str());
264  fflush(stdout);
265  }
266 
267  const tld_email * const cresults(results);
268 
269  // C++ test
270  {
272  tld_result r(list.parse(v->f_input_email, 0));
273  int max(v->f_count);
274  if(r != TLD_RESULT_SUCCESS)
275  {
276  fprintf(stderr, "return value is %d instead of %d with [%s]\n", r, TLD_RESULT_SUCCESS, v->f_input_email);
277  error("error: unexpected return value.");
278  results += max;
279  }
280  else if(list.count() != max)
281  {
282  fprintf(stderr, "parse() returned %d as count, expected %d\n", list.count(), max);
283  error("error: unexpected count");
284  results += max;
285  }
286  else
287  {
288  // test the C++ function first
289  {
291  for(int i(0); i < max; ++i, ++results)
292  {
293  if(results->f_group == nullptr)
294  {
295  error("error: end of results array reached before completion of the test.\n");
296  return;
297  }
298 
299  if(!list.next(e))
300  {
301  error("error: next() returned false too soon.");
302  }
303  if(e.f_group != results->f_group)
304  {
305  error("error: next() returned the wrong group. Got \"" + e.f_group + "\" instead of \"" + results->f_group + "\".");
306  }
307  if(e.f_original_email != results->f_original_email)
308  {
309  error("error: next() returned the wrong original email. Got \"" + e.f_original_email + "\" instead of \"" + results->f_original_email + "\".");
310  }
311  if(e.f_fullname != results->f_fullname)
312  {
313  error("error: next() returned the wrong fullname. Got \"" + e.f_fullname + "\" instead of \"" + results->f_fullname + "\".");
314  }
315  if(e.f_username != results->f_username)
316  {
317  error("error: next() returned the wrong username. Got \"" + e.f_username + "\" instead of \"" + results->f_username + "\".");
318  }
319  if(e.f_domain != results->f_domain)
320  {
321  error("error: next() returned the wrong username. Got \"" + e.f_domain + "\" instead of \"" + results->f_domain + "\".");
322  }
323  if(e.f_email_only != results->f_email_only)
324  {
325  error("error: next() returned the wrong email only. Got \"" + e.f_email_only + "\" instead of \"" + results->f_email_only + "\".");
326  }
328  {
329  error("error: next() returned the wrong canonicalized email. Got \"" + e.f_canonicalized_email + "\" instead of \"" + results->f_canonicalized_email + "\".");
330  }
331  }
332  if(list.next(e))
333  {
334  error("error: next(e) returned the wrong result, it should be false after the whole set of emails were read.");
335  }
336  }
337  // try the C function which also allows us to test the rewind()
338  list.rewind();
339  {
340  results = cresults;
341  tld_email e;
342  for(int i(0); i < max; ++i, ++results)
343  {
344  if(!list.next(&e))
345  {
346  error("error: next() returned false too soon.");
347  }
348  if(strcmp(e.f_group, results->f_group) != 0)
349  {
350  error("error: next() returned the wrong group. Got \"" + std::string(e.f_group) + "\" from \"" + results->f_group + "\".");
351  }
352  if(strcmp(e.f_original_email, results->f_original_email) != 0)
353  {
354  error("error: next() returned the wrong original email. Got \"" + std::string(e.f_original_email) + "\" instead of \"" + results->f_original_email + "\".");
355  }
356  if(strcmp(e.f_fullname, results->f_fullname) != 0)
357  {
358  error("error: next() returned the wrong fullname.");
359  }
360  if(strcmp(e.f_username, results->f_username) != 0)
361  {
362  error("error: next() returned the wrong username.");
363  }
364  if(strcmp(e.f_domain, results->f_domain) != 0)
365  {
366  error("error: next() returned the wrong username.");
367  }
368  if(strcmp(e.f_email_only, results->f_email_only) != 0)
369  {
370  error("error: next() returned the wrong email only.");
371  }
372  if(strcmp(e.f_canonicalized_email, results->f_canonicalized_email) != 0)
373  {
374  error("error: next() returned the wrong canonicalized email.");
375  }
376  }
377  if(list.next(&e))
378  {
379  error("error: next(&e) returned the wrong result, it should be false after the whole set of emails were read.");
380  }
381  }
382  }
383  }
384 
385  if(verbose)
386  {
387  printf("*** C test now\n");
388  fflush(stdout);
389  }
390  // C test
391  {
393  list = tld_email_alloc();
394  tld_result r = tld_email_parse(list, v->f_input_email, 0);
395  const int max(v->f_count);
396  if(r != TLD_RESULT_SUCCESS)
397  {
398  fprintf(stderr, "return value is %d instead of %d for \"%s\"\n", r, TLD_RESULT_SUCCESS, v->f_input_email);
399  error("error: unexpected return value.");
400  }
401  else if(tld_email_count(list) != max)
402  {
403  fprintf(stderr, "parse() returned %d as count, expected %d\n", tld_email_count(list), max);
404  error("error: unexpected count");
405  }
406  else
407  {
408  // test the C++ function first
409  for(int repeat(0); repeat < 2; ++repeat)
410  {
411  results = cresults;
412  struct tld_email e;
413  for(int i(0); i < max; ++i, ++results)
414  {
415  if(results->f_group == nullptr)
416  {
417  error("error: end of results array reached before completion of the test.\n");
418  return;
419  }
420 
421  if(tld_email_next(list, &e) != 1)
422  {
423  error("error: next() returned false too soon.");
424  }
425  if(strcmp(e.f_group, results->f_group) != 0)
426  {
427  error("error: next() returned the wrong group. Got \"" + std::string(e.f_group) + "\" from \"" + results->f_group + "\".");
428  }
429  if(strcmp(e.f_original_email, results->f_original_email) != 0)
430  {
431  error("error: next() returned the wrong original email. Got \"" + std::string(e.f_original_email) + "\" instead of \"" + results->f_original_email + "\".");
432  }
433  if(strcmp(e.f_fullname, results->f_fullname) != 0)
434  {
435  error("error: next() returned the wrong fullname.");
436  }
437  if(strcmp(e.f_username, results->f_username) != 0)
438  {
439  error("error: next() returned the wrong username.");
440  }
441  if(strcmp(e.f_domain, results->f_domain) != 0)
442  {
443  error("error: next() returned the wrong username.");
444  }
445  if(strcmp(e.f_email_only, results->f_email_only) != 0)
446  {
447  error("error: next() returned the wrong email only.");
448  }
449  if(strcmp(e.f_canonicalized_email, results->f_canonicalized_email) != 0)
450  {
451  error("error: next() returned the wrong canonicalized email.");
452  }
453  }
454  if(tld_email_next(list, &e) != 0)
455  {
456  error("error: next(&e) returned the wrong result, it should be false after the whole set of emails were read.");
457  }
458  // try again
459  tld_email_rewind(list);
460  }
461  }
462  tld_email_free(list);
463  }
464  }
465 
466  {
467  // all valid atom characters
468  const char valid_chars[] =
469  "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
470  "abcdefghijklmnopqrstuvwxyz"
471  "0123456789"
472  "!#$%&'*+-/=?^_`{|}~" // here there is a NUL
473  ;
474  for(size_t i(0); i < sizeof(valid_chars) / sizeof(valid_chars[0]) - 1; ++i)
475  {
477  std::string e("abc");
478  e += valid_chars[i];
479  e += "def@m2osw.com";
480  if(verbose)
481  {
482  printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
483  fflush(stdout);
484  }
485  tld_result r(list.parse(e, 0));
486  if(r != TLD_RESULT_SUCCESS)
487  {
488  fprintf(stderr, "call to list.parse() failed with %d expected %d for email \"%s\" (all valid atom characters)\n", r, TLD_RESULT_SUCCESS, e.c_str());
489  error("error: unexpected return value.");
490  }
491  }
492  }
493 
494  {
495  // all valid quoted characters: " " to "\x7E" except the " and \ characters
496  if(sizeof(int) < 4)
497  {
498  error("error: the ctrl variable needs to be at least 32 bits");
499  return;
500  }
501  const int ctrl(1 << '\t');
502  for(size_t i(1); i <= 126; ++i)
503  {
504  switch(i)
505  {
506  case ' ': // at this point we disallow the space which causes problems with Snap!
507  case '\t': // at this point we disallow the tab which causes problems with Snap!
508  case '"':
509  case '\\':
510  case 0x7F: // not included in the loop anyway
511  break;
512 
513  default:
514  if(i >= ' ' || (ctrl & (1 << i)) != 0)
515  {
517  std::string e("\"abc");
518  e += static_cast<char>(i);
519  e += "def\"@m2osw.com";
520  if(verbose)
521  {
522  printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
523  fflush(stdout);
524  }
525  tld_result r(list.parse(e, 0));
526  if(r != TLD_RESULT_SUCCESS)
527  {
528  fprintf(stderr, "call to list.parse() failed with %d expected %d for email \"%s\" (all valid characters)\n", r, TLD_RESULT_SUCCESS, email_to_vstring(e).c_str());
529  error("error: unexpected return value.");
530  }
531  }
532  break;
533 
534  }
535  }
536  }
537 
538  {
539  // all valid quoted pair: '\t' and " " to "\x7E"
540  // -- at this time the \t and " " do not work here
541  for(size_t i(33); i <= 126; ++i)
542  {
544  std::string e("\"abc\\");
545  if(i == 31)
546  {
547  e += static_cast<char>('\t');
548  }
549  else
550  {
551  e += static_cast<char>(i);
552  }
553  e += "def\"@m2osw.com";
554  if(verbose)
555  {
556  printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
557  fflush(stdout);
558  }
559  tld_result r(list.parse(e, 0));
560  if(r != TLD_RESULT_SUCCESS)
561  {
562  fprintf(stderr, "call to list.parse() failed with %d expected %d for email \"%s\" (all valid quoted pairs)\n", r, TLD_RESULT_SUCCESS, email_to_vstring(e).c_str());
563  error("error: unexpected return value.");
564  }
565  }
566  }
567 
568  {
569  // all valid comment characters: " " to "\x7E" except the " and \ characters
570  if(sizeof(int) < 4)
571  {
572  error("error: the ctrl variable needs to be at least 32 bits");
573  return;
574  }
575  const int ctrl((1 << '\t') | (1 << '\r') | (1 << '\n'));
576  for(size_t i(1); i <= 126; ++i)
577  {
578  // we skip all the special characters in a comment since
579  // those are already tested somewhere else
580  switch(i)
581  {
582  case '(': // avoid a sub-comment
583  case ')': // avoid closing the comment mid-way
584  case '\\': // tested somewhere else
585  case 0x7F: // not included in the loop anyway
586  break;
587 
588  default:
589  if(i >= ' ' || (ctrl & (1 << i)) != 0)
590  {
592  std::string e("(Comment \"");
593  e += static_cast<char>(i);
594  e += "\" char.) alexis@m2osw.com";
595  if(verbose)
596  {
597  printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
598  fflush(stdout);
599  }
600  tld_result r(list.parse(e, 0));
601  if(r != TLD_RESULT_SUCCESS)
602  {
603  error("error: unexpected return value.");
604  }
605  }
606  break;
607 
608  }
609  }
610  }
611 
612  {
613  // all valid domain characters: "!" to "\x7E" except the [, ], and \ characters
614  for(size_t i('!'); i <= 126; ++i)
615  {
616  // a dot is valid but we cannot test it between two other dots
617  if(i == '[' || i == ']' || i == '\\' || i == '.')
618  {
619  continue;
620  }
622  std::string e("alexis@[ m2osw.");
623  e += static_cast<char>(i);
624  if(i == '%')
625  {
626  e += "25";
627  }
628  e += ".com\t]";
629  if(verbose)
630  {
631  printf("*** testing all atom characters with email \"%s\"\n", email_to_vstring(e).c_str());
632  fflush(stdout);
633  }
634  tld_result r(list.parse(e, 0));
635  if(r != TLD_RESULT_SUCCESS)
636  {
637  error("error: unexpected return value while testing a domain with special character \"" + e + "\"");
638  }
639  }
640  }
641 
642  {
643  if(tld_email_list::quote_string("Test quoting a simple comment", '(') != "(Test quoting a simple comment)")
644  {
645  error("error: unexpected return value when testing a simple comment quotation");
646  }
647  if(tld_email_list::quote_string("Test (quoting) a complex )comment(", '(') != "(Test \\(quoting\\) a complex \\)comment\\()")
648  {
649  error("error: unexpected return value when testing a complex comment quotation");
650  }
651  }
652 }
653 
654 
655 
656 
664 {
668  const char * f_input_email;
669 };
670 
671 const invalid_email list_of_invalid_emails[] =
672 {
673  { TLD_RESULT_NULL, "alexism2osw.com (missing @)" },
674  { TLD_RESULT_INVALID, "doug barbieri@m2osw.com\n \t (space in email address)" },
675  { TLD_RESULT_NO_TLD, "doug_barbieri@m2osw com\n \t (space in email domain)" },
676  { TLD_RESULT_NOT_FOUND, "doug_barbieri@m2osw.com org (space in email domain after dot)" },
677  { TLD_RESULT_NOT_FOUND, "<doug_barbieri@m2osw.com org> (space in email domain after dot)" },
678  { TLD_RESULT_INVALID, "<doug_barbieri@this sub domain.m2osw.com> (space in email domain after dot)" },
679  { TLD_RESULT_INVALID, " \v alexis@m2osw.com\n \t (bad control)" },
680  { TLD_RESULT_INVALID, " (* Pascal Comments *) \t alexis@m2osw.com\n (missing closing parenthesis\\)" },
681  { TLD_RESULT_INVALID, "(Start-Comment)alexis@ \t [ \t m2osw.com \t ] \n (extra after domain done) \"more\tdata\" \r\n\t" },
682  { TLD_RESULT_INVALID, "(Test with dots in user name) al.ex.is@ \t(missing closing bracket ]) [ \t m2osw.com \t " },
683  { TLD_RESULT_NULL, "< (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (Missing >) \r\n\t" },
684  { TLD_RESULT_INVALID, "(Full name with period) Alexis.Wilke < (Test with dots in user name) al.ex.is@ \t [ \t m2osw.com \t ] \n (More (comments) there) > \r\n\t" },
685  { TLD_RESULT_INVALID, " (Now a group:) This Group: (With full name) Alexis Wilke < \n alexis \t @ \t [ \t m2osw.com \t ] \n (missing ;) > \r\n\t" },
686  { TLD_RESULT_INVALID, "Good Group: alexis@m2osw.com, bad-group: test@example.com;" },
687  { TLD_RESULT_INVALID, "(No Group Name): alexis@m2osw.com;" },
688  { TLD_RESULT_INVALID, " (No Group Name) : alexis@m2osw.com;" },
690  { TLD_RESULT_INVALID, "(Group with CTRL) Group \v Unexpected: alexis@m2osw.com;" },
692  { TLD_RESULT_INVALID, "\"alexis@m2osw.com;\v\"" },
697  { TLD_RESULT_INVALID, "\"Alexis Wilke\\" }, // \ followed by NUL
698  { TLD_RESULT_INVALID, "(Comment with \\\\ followed by NUL: \\" },
699  { TLD_RESULT_INVALID, "(Test Errors Once Done) \"Wilke, Alexis\" <alexis@m2osw.com> \"Bad\"" },
700  { TLD_RESULT_INVALID, "(Comment with CTRL \b) \"Wilke, Alexis\" <alexis@m2osw.com>" },
701  { TLD_RESULT_INVALID, "[m2osw.com]" }, // missing user name
702  { TLD_RESULT_INVALID, "good@[bad-slash\\.com]" },
703  { TLD_RESULT_INVALID, "good@[bad[reopen.com]" },
704  { TLD_RESULT_INVALID, "good@[bad-duplicate.com] more.net" }, // two domains
705  { TLD_RESULT_INVALID, "(Test Errors Once Done) \"Wilke, Alexis\" <alexis@m2osw.com> [Bad]" },
706  { TLD_RESULT_INVALID, "(Test Errors Once Done) alexis@start[Bad]" },
707  { TLD_RESULT_INVALID, "(Test Errors Once Done) alexis@[first][Bad]" },
708  { TLD_RESULT_INVALID, "(Test Errors Once Done) alexis@[control:\v]" },
709  { TLD_RESULT_NULL, "(Test Errors Once Done) alexis@[ spaces BAD]" },
710  { TLD_RESULT_NULL, "(Wind Domain...) alexis@[ ]" },
711  { TLD_RESULT_NULL, "(More Spaces Test) alexis@[no-left-trim no-right-trim]" },
712  { TLD_RESULT_NULL, "(Dot Dot Dot Domain) alexis@[ . . . ]" },
713  { TLD_RESULT_INVALID, "(Dot Only Domain) alexis@[ . ]" },
714  { TLD_RESULT_INVALID, "(Spurious Angle) alexis>@m2osw.com" },
715  { TLD_RESULT_INVALID, "(Spurious Angle) alexis@m2osw.com>" },
716  { TLD_RESULT_INVALID, "(Double Angle) <alexis@m2osw.com>>" },
717  { TLD_RESULT_NULL, "(Missing domain) <alexis@>" },
718  { TLD_RESULT_NULL, "(Missing domain) alexis@" },
719  { TLD_RESULT_INVALID, "(2 domains) <alexis@[m2osw.com]bad>" },
720  { TLD_RESULT_INVALID, "(Double @) <alexis@m2osw.com> @" },
721  { TLD_RESULT_INVALID, "(Double @) alexis@m2osw.com@" },
722  { TLD_RESULT_INVALID, "(Extra Chars) <alexis@m2osw.com> bad" },
723  { TLD_RESULT_NULL, "(Empty username within brackets) <@m2osw.com>" },
724  { TLD_RESULT_NULL, "(Empty User Name) @m2osw.com" },
725  { TLD_RESULT_INVALID, "(Cannot start with a dot) .alexis@m2osw.com" },
726  { TLD_RESULT_INVALID, "(Cannot start with a dot) <.alexis@m2osw.com>" },
727  { TLD_RESULT_INVALID, "(Cannot end with a dot) alexis.@m2osw.com" },
728  { TLD_RESULT_INVALID, "(Cannot end with a dot) <alexis.@m2osw.com>" },
729  { TLD_RESULT_INVALID, "(Cannot include double dots) ale..xis@m2osw.com" },
730  { TLD_RESULT_NOT_FOUND, "(End domain with dot not considered valid!) alexis@m2osw.com." },
731  { TLD_RESULT_INVALID, "(End domain with dot not considered valid!) <alexis@m2osw.com.>" },
732  { TLD_RESULT_NULL, "(Bad Emails) alexis,m2osw.com" },
733  { TLD_RESULT_INVALID, "(Bad Char) alexis@m2osw\001com" },
734  { TLD_RESULT_NOT_FOUND, "(Bad Extension) alexis@m2osw.comm" },
735  { TLD_RESULT_INVALID, "(Bad Extension) alexis@m2osw.ar" },
736  { TLD_RESULT_INVALID, "(Bad Extension) alexis@m2osw.nom.ar" },
737  { TLD_RESULT_NO_TLD, "(Bad Extension) alexis@m2osw" },
738  { TLD_RESULT_INVALID, "(Bad Extension) alexis@[m2osw..com]" },
739 
740  // end of list
741  { TLD_RESULT_SUCCESS, nullptr }
742 };
743 
744 void test_invalid_emails()
745 {
746  for(const invalid_email *v(list_of_invalid_emails); v->f_input_email != nullptr; ++v)
747  {
748  if(verbose)
749  {
750  printf("+++ testing email \"%s\"\n", email_to_vstring(v->f_input_email).c_str());
751  }
752 
753  // C++ test
754  {
756  tld_result r(list.parse(v->f_input_email, 0));
757  if(r != v->f_result)
758  {
759  std::stringstream ss;
760  ss << "error: unexpected return value. Got " << static_cast<int>(r) << ", expected " << static_cast<int>(v->f_result) << " for \"" << v->f_input_email << "\" (C++)";
761  error(ss.str());
762  }
763  }
764 
765  // C test
766  {
768  list = tld_email_alloc();
769  tld_result r = tld_email_parse(list, v->f_input_email, 0);
770  if(r != v->f_result)
771  {
772  std::stringstream ss;
773  ss << "error: unexpected return value. Got " << static_cast<int>(r) << ", expected " << static_cast<int>(v->f_result) << " for \"" << v->f_input_email << "\" (C)";
774  error(ss.str());
775  }
776  tld_email_free(list);
777  list = nullptr;
778  }
779  }
780 }
781 
782 
783 void contract_furfilled(tld_email_list::tld_email_t & e)
784 {
785  if(!e.f_group.empty()
786  || !e.f_original_email.empty()
787  || !e.f_fullname.empty()
788  || !e.f_username.empty()
789  || !e.f_domain.empty()
790  || !e.f_email_only.empty()
791  || !e.f_canonicalized_email.empty())
792  {
793  error("error: one of the structure parameters was modified on error!");
794  }
795 }
796 
797 
798 void test_direct_email()
799 {
801 
803  // missing closing \"
804  EXPECTED_THROW(email.parse("\"blah alexis@m2osw.com"), std::logic_error);
805  contract_furfilled(email);
806 
807  // missing closing )
808  EXPECTED_THROW(email.parse("(comment alexis@m2osw.com"), std::logic_error);
809  contract_furfilled(email);
810 
811  // use of \ at the end of the comment
812  EXPECTED_THROW(email.parse("(comment\\"), std::logic_error);
813  contract_furfilled(email);
814 
815  // missing closing ]
816  EXPECTED_THROW(email.parse("alexis@[m2osw.com"), std::logic_error);
817  contract_furfilled(email);
818 
820  // missing closing )
821  EXPECTED_THROW(email.parse_group("Group (comment"), std::logic_error);
822  contract_furfilled(email);
823 
824  // use of \ at the end of the comment
825  EXPECTED_THROW(email.parse_group("Group (comment \\"), std::logic_error);
826  contract_furfilled(email);
827 }
828 
829 
830 
837 {
838  const char * f_field;
840 };
841 
849 const email_field_types list_of_email_field_types[] =
850 {
851  // make sure case does not have side effects
856 
857  // check all fields that are expected to include emails
859  { "resent-from", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
860  { "sender", TLD_EMAIL_FIELD_TYPE_MAILBOX },
861  { "resent-sender", TLD_EMAIL_FIELD_TYPE_MAILBOX },
864  { "reply-to", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
865  { "resent-to", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
866  { "resent-cc", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
868  { "resent-bcc", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
869 
870  // check all fields with a colon
871  { "from: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
872  { "resent-from: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX_LIST },
873  { "sender: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX },
874  { "resent-sender: someone", TLD_EMAIL_FIELD_TYPE_MAILBOX },
875  { "to: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
876  { "cc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
877  { "reply-to: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
878  { "resent-to: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
879  { "resent-cc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST },
880  { "bcc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
881  { "resent-bcc: someone", TLD_EMAIL_FIELD_TYPE_ADDRESS_LIST_OPT },
882 
883  // check other fields
884  { "message-id", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
885  { "date", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
886  { "subject", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
887  { "x-extension", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
888 
889  // check other fields with a colon
890  { "message-id: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
891  { "date: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
892  { "subject: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
893  { "x-extension: something", TLD_EMAIL_FIELD_TYPE_UNKNOWN },
894 
895  // check for invalid field names
896  { "s\xfc\x62ject", TLD_EMAIL_FIELD_TYPE_INVALID },
897  { "subj\xe9\x63t", TLD_EMAIL_FIELD_TYPE_INVALID },
898  { "-bad-dash", TLD_EMAIL_FIELD_TYPE_INVALID },
899  { "0bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
900  { "1bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
901  { "2bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
902  { "3bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
903  { "4bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
904  { "5bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
905  { "6bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
906  { "7bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
907  { "8bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
908  { "9bad-digit", TLD_EMAIL_FIELD_TYPE_INVALID },
909  { "" /*empty*/, TLD_EMAIL_FIELD_TYPE_INVALID },
910 };
911 
912 void test_email_field_types()
913 {
914  for(size_t i(0); i < sizeof(list_of_email_field_types) / sizeof(list_of_email_field_types[0]); ++i)
915  {
916  tld_email_field_type type(tld_email_list::email_field_type(list_of_email_field_types[i].f_field));
917  if(type != list_of_email_field_types[i].f_type)
918  {
919  std::stringstream ss;
920  ss << "error: email type mismatch for \"" << list_of_email_field_types[i].f_field
921  << "\", expected " << static_cast<int>(list_of_email_field_types[i].f_type)
922  << ", got " << static_cast<int>(type) << " instead.";
923  error(ss.str());
924  }
925  }
926 }
927 
928 
929 
930 int main(int argc, char *argv[])
931 {
932  printf("testing tld emails version %s\n", tld_version());
933 
934  if(argc > 1)
935  {
936  if(strcmp(argv[1], "-v") == 0)
937  {
938  verbose = 1;
939  }
940  }
941 
942  /* Call all the tests, one by one.
943  *
944  * Failures are "recorded" in the err_count global variable
945  * and the process stops with an error message and exit(1)
946  * if err_count is not zero.
947  *
948  * Exceptions that should not occur are expected to also
949  * be caught and reported as errors.
950  */
951  try
952  {
953  test_valid_emails();
954  test_invalid_emails();
955  test_direct_email();
956  test_email_field_types();
957  }
958  catch(const invalid_domain&)
959  {
960  error("error: caught an exception when all emails are expected to be valid.");
961  }
962 
963  if(err_count)
964  {
965  fprintf(stderr, "%d error%s occured.\n",
966  err_count, err_count != 1 ? "s" : "");
967  }
968  exit(err_count ? 1 : 0);
969 }
970 
971 /* vim: ts=4 sw=4 et
972  */
const tld_email list_of_results[]
List of results to verify all the fields of the parser output. There is one entry per group and email...
The input represents an optional list of email addresses.
Definition: tld.h:132
tld_email_field_type f_type
The type we expect the library to return for that field.
int count() const
Return the number of emails recorded.
Definition: tld_emails.cpp:791
std::string f_email_only
The complete email address without display name.
Definition: tld.h:194
const char * f_input_email
The valid emails to be parsed.
int main(int argc, char *argv[])
Console tool to generate the tld_data.c file.
Definition: tld_parser.cpp:907
std::string f_canonicalized_email
The email including the display name.
Definition: tld.h:195
const char * f_email_only
The complete email address without display name.
Definition: tld.h:121
void error(const std::string &msg)
Print an error.
Structure used to define a set of fields to test.
The input URI is empty.
Definition: tld.h:85
std::string f_group
The group this emails was defined in.
Definition: tld.h:189
void rewind() const
Rewind the reader to the start of the list.
Definition: tld_emails.cpp:804
Define a valid email string.
const char * f_fullname
The user full or display name.
Definition: tld.h:118
The C++ side of the email list implementation.
Definition: tld.h:181
int f_count
The number of emails returned on f_input_email was parsed, plus one per group.
std::string f_username
The user being named in this email address.
Definition: tld.h:192
static tld_email_field_type email_field_type(const std::string &name)
Check whether a name represents a field with a list of emails.
Definition: tld_emails.cpp:892
std::string email_to_vstring(const std::string &e)
Transform an email string in a C-like string.
The public header of the libtld library.
int err_count
The number of errors encountered before exiting.
LIBTLD_EXPORT int tld_email_count(struct tld_email_list *list)
Return the number of emails found after a parse.
bool next(tld_email_t &e) const
Retrieve a copy of the next email information.
Definition: tld_emails.cpp:821
LIBTLD_EXPORT struct tld_email_list * tld_email_alloc()
Allocate a list of emails object.
const char * f_original_email
The email as read from the source.
Definition: tld.h:117
const char * f_canonicalized_email
The email including the display name.
Definition: tld.h:122
The input represents a mailbox list.
Definition: tld.h:129
const char * f_username
The user being named in this email address.
Definition: tld.h:119
Parts of one email.
Definition: tld.h:184
tld_result parse(const std::string &email)
Parse one email to a tld_email_t object.
Definition: tld_emails.cpp:975
The TLD was found, but it is marked as invalid.
Definition: tld.h:84
std::string f_domain
The domain part of the email address.
Definition: tld.h:193
tld_result f_result
The expected reslut, if the call does not return this exact value the test fails. ...
LIBTLD_EXPORT void tld_email_free(struct tld_email_list *list)
Free the list of emails.
LIBTLD_EXPORT int tld_email_next(struct tld_email_list *list, struct tld_email *e)
Retrieve the next email.
The input does not represent valid emails.
Definition: tld.h:128
void list()
List the default schemes accepted.
The URI has a TLD that could not be determined.
Definition: tld.h:88
int verbose
Whether to be verbose, turned off by default.
const valid_email list_of_valid_emails[]
The list of valid emails used to check the parser out.
The input represents a mailbox.
Definition: tld.h:130
The input of email_field_type() was not valid.
Definition: tld.h:127
const char * f_group
The group this emails was defined in.
Definition: tld.h:116
The input URI has no TLD defined.
Definition: tld.h:86
tld_result parse_group(const std::string &group)
Parse a group including comments.
std::string f_fullname
The user full or display name.
Definition: tld.h:191
static std::string quote_string(const std::string &name, char quote)
Transform a name if it requires quotation.
Definition: tld_emails.cpp:716
const char * f_domain
The domain part of the email address.
Definition: tld.h:120
Define an invalid email.
Exception thrown when querying for data of an invalid domain.
Definition: tld.h:154
LIBTLD_EXPORT const char * tld_version()
Return the version of the library.
Definition: tld.c:1043
tld_result
The result returned by tld().
Definition: tld.h:81
#define EXPECTED_THROW(s, e)
Macro to check that exceptions are raised without having to write the try/catch each time...
LIBTLD_EXPORT enum tld_result tld_email_parse(struct tld_email_list *list, const char *emails, int flags)
Parse a list of emails in the email list object.
const char * f_field
The name of the field to be tested.
Success! The TLD of the specified URI is valid.
Definition: tld.h:83
Parts of one email.
Definition: tld.h:114
const char * f_input_email
The pointer to the invalid email to be tested.
The input represents a mandatory list of mailboxes.
Definition: tld.h:131
tld_result parse(const std::string &emails, int flags)
Parse a new list of emails.
Definition: tld_emails.cpp:395
tld_email_field_type
Type of email as determined by the email_field_type() function.
Definition: tld.h:125
std::string f_original_email
The email as read from the source.
Definition: tld.h:190
LIBTLD_EXPORT void tld_email_rewind(struct tld_email_list *list)
Rewind the reading of the emails.

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.

Syndicate content

Snap! Websites
An Open Source CMS System in C++

Contact Us Directly