libtld: /home/snapwebsites/snapcpp/contrib/libtld/src/tld_emails.cpp Source File

libtld  1.5.13
A library to determine the Top-Level Domain name of any URL.
tld_emails.cpp
Go to the documentation of this file.
1 /* TLD library -- TLD, emails extractions
2  * Copyright (c) 2013-2019 Made to Order Software Corp. All Rights Reserved
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the
6  * "Software"), to deal in the Software without restriction, including
7  * without limitation the rights to use, copy, modify, merge, publish,
8  * distribute, sublicense, and/or sell copies of the Software, and to
9  * permit persons to whom the Software is furnished to do so, subject to
10  * the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included
13  * in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
19  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
20  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
21  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22  */
23 #include "libtld/tld.h"
24 
25 // C lib
26 //
27 #include <stdio.h>
28 #include <string.h>
29 
30 // C++ lib
31 //
32 #include <memory>
33 #include <iostream>
34 #include <algorithm>
35 
36 
285 namespace
286 {
297 void trim(std::string& value)
298 {
299  if(!value.empty())
300  {
301  size_t i(value.length());
302  for(; i > 0; --i)
303  {
304  const char c(value[i - 1]);
305  if(c != ' ' && c != '\r' && c != '\n' && c != '\t')
306  {
307  break;
308  }
309  }
310  value.resize(i);
311  }
312 }
313 
323 bool is_quoted_char(char c)
324 {
325  // 0x7F is the Delete key which is viewed as a control
326  // here we accept all characters over 127 in case the user
327  // has UTF-8 as input data
328  return c == '\t' || c >= ' ' && c != 0x7F;
329 }
330 
341 bool is_atom_char(char c)
342 {
343  return (c >= 'A' && c <= 'Z')
344  || (c >= 'a' && c <= 'z')
345  || (c >= '0' && c <= '9')
346  || c == '!' || c == '#'
347  || c == '$' || c == '%'
348  || c == '&' || c == '\''
349  || c == '*' || c == '+'
350  || c == '-' || c == '/'
351  || c == '=' || c == '?'
352  || c == '^' || c == '_'
353  || c == '`' || c == '{'
354  || c == '|' || c == '}'
355  || c == '~';
356 }
357 } // no name namespace
358 
359 
368  //: f_input("") -- auto-init
369  : f_flags(0)
370  , f_result(TLD_RESULT_SUCCESS)
371  //, f_last_group("") -- auto-init
372  , f_pos(0)
373  //, f_email_list() -- auto-init
374 {
375 }
376 
395 tld_result tld_email_list::parse(std::string const & emails, int flags)
396 {
397  f_input = emails;
398  f_flags = flags;
400  f_last_group.clear();
401  f_pos = 0; // always rewind too
402  f_email_list.clear();
403 
406  {
407  f_email_list.clear();
408  }
409 
410  return f_result;
411 }
412 
419 {
420  // old emails supposedly accepted \0 in headers!
421  // we actually do not even support control characters as
422  // defined in the newest version of the Internet Message
423  // (RFC 5322); the following loop, though, does not check
424  // all the characters, only those necessary to cut all the
425  // email elements properly
426 
427  char const * start(f_input.c_str());
428  bool group(true);
429  char const * s(start);
430  for(; *s != '\0'; ++s)
431  {
432  switch(*s)
433  {
434  case ' ':
435  case '\n':
436  case '\r':
437  case '\t':
438  // skip leading spaces immediately
439  if(start == s)
440  {
441  start = s + 1;
442  }
443  break;
444 
445  case ';':
446  // end of this group
447  {
448  // trim ending spaces
449  char const * end(s);
450  for(; end > start; --end)
451  {
452  char const c(end[-1]);
453  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
454  {
455  break;
456  }
457  }
458  if(end - start > 0)
459  {
460  std::string const e(start, end - start);
461  tld_email_t email;
462  email.f_group = f_last_group;
463  f_result = email.parse(e);
465  {
466  return;
467  }
468  f_email_list.push_back(email);
469  }
470  }
471  f_last_group = "";
472  group = true;
473  start = s + 1;
474  break;
475 
476  case ':':
477  // group label
478  if(!group)
479  {
480  // wrong place for this ':' character
482  return;
483  }
484  {
485  // trim ending spaces
486  char const * end(s);
487  for(; end > start; --end)
488  {
489  char const c(end[-1]);
490  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
491  {
492  break;
493  }
494  }
495  if(end - start <= 0)
496  {
497  // An explicitly empty group name is not legal
499  return;
500  }
501  std::string const last_group(start, end - start);
502  // always add the group with an empty email (in case there
503  // is no email; and it clearly delimit each group.)
504  tld_email_t email;
505  f_result = email.parse_group(last_group);
507  {
508  // this happens if the group name is invalid
509  // (i.e. include controls or is empty)
510  return;
511  }
512  f_last_group = email.f_group;
513  f_email_list.push_back(email);
514  }
515  start = s + 1;
516  group = false; // cannot get another legal ':' until we find the ';'
517  break;
518 
519  case ',':
520  // email separation
521  {
522  // trim ending spaces
523  char const * end(s);
524  for(; end > start; --end)
525  {
526  char const c(end[-1]);
527  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
528  {
529  break;
530  }
531  }
532  if(end - start > 0)
533  {
534  std::string const e(start, end - start);
535  tld_email_t email;
536  email.f_group = f_last_group;
537  f_result = email.parse(e);
539  {
540  return;
541  }
542  f_email_list.push_back(email);
543  }
544  }
545  start = s + 1;
546  break;
547 
548  case '"':
549  // quoted strings may include escaped characters so it is a
550  // special case, also it could include a comma
551  for(++s; *s != '\0' && *s != '"'; ++s)
552  {
553  if(*s == '\\')
554  {
555  if(!is_quoted_char(s[1]))
556  {
557  // "\NUL" is never considered valid
559  return;
560  }
561  ++s;
562  }
563  }
564  if(*s == '\0')
565  {
566  // unterminated quoted string
568  return;
569  }
570  break;
571 
572  case '(':
573  {
574  // comments may include other comments
575  int comment_count(1);
576  for(++s; *s != '\0'; ++s)
577  {
578  if(*s == '\\')
579  {
580  if(!is_quoted_char(s[1]))
581  {
582  // "\NUL" is never considered valid
584  return;
585  }
586  ++s;
587  }
588  else if(*s == '(')
589  {
590  ++comment_count;
591  }
592  else if(*s == ')')
593  {
594  --comment_count;
595  if(comment_count <= 0)
596  {
597  break;
598  }
599  }
600  }
601  if(*s == '\0')
602  {
603  // unterminated comment
605  return;
606  }
607  }
608  break;
609 
610  case '[':
611  for(++s; *s != ']'; ++s)
612  {
613  if(*s == '\0' || *s == '[' || *s == '\\')
614  {
615  // domain literal cannot include '[', ']', or '\'
616  // and it must end with ']'
617  //
619  return;
620  }
621  }
622  break;
623 
624  }
625  }
626 
627  if(!group)
628  {
629  // the ';' to end a group is missing
631  return;
632  }
633 
634  {
635  // trim ending spaces
636  char const * end(s);
637  for(; end > start; --end)
638  {
639  char const c(end[-1]);
640  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
641  {
642  break;
643  }
644  }
645  if(end - start > 0)
646  {
647  std::string const e(start, end - start);
648  tld_email_t email;
649  email.f_group = f_last_group;
650  f_result = email.parse(e);
652  {
653  return;
654  }
655  f_email_list.push_back(email);
656  }
657  }
658 }
659 
716 std::string tld_email_list::quote_string(const std::string& str, char quote)
717 {
718  bool apply_quotes(false);
719  char open(quote);
720  char close('"');
721  char const * extra("");
722  char const * escape("");
723  switch(quote)
724  {
725  case '(':
726  close = ')';
727  apply_quotes = true;
728  escape = "()";
729  break;
730 
731  case '"':
732  extra = " \t";
733  escape = "\"";
734  break;
735 
736  case '\'':
737  open = '"';
738  close = '"';
739  extra = ".";
740  escape = "\"";
741  break;
742 
743  case '[':
744  close = ']';
745  extra = ".";
746  break;
747 
748  }
749  if(!apply_quotes)
750  {
751  // check whether quotes are required
752  char const * s(str.c_str());
753  for(; *s != '\0'; ++s)
754  {
755  if(!is_atom_char(*s) && strchr(extra, *s) == nullptr)
756  {
757  break;
758  }
759  }
760  apply_quotes = *s != '\0';
761  }
762  if(apply_quotes)
763  {
764  std::string result;
765  result += open;
766  for(const char *s(str.c_str()); *s != '\0'; ++s)
767  {
768  if(strchr(escape, *s) != nullptr)
769  {
770  result += '\\';
771  }
772  result += *s;
773  }
774  result += close;
775  return result;
776  }
777  return str;
778 }
779 
792 {
793  return static_cast<int>(f_email_list.size());
794 }
795 
805 {
806  f_pos = 0;
807 }
808 
822 {
823  if(f_pos >= static_cast<int>(f_email_list.size()))
824  {
825  return false;
826  }
827 
828  e = f_email_list[f_pos];
829  ++f_pos;
830 
831  return true;
832 }
833 
853 {
854  if(f_pos >= static_cast<int>(f_email_list.size()))
855  {
856  return false;
857  }
858 
859  e->f_group = f_email_list[f_pos].f_group.c_str();
860  e->f_original_email = f_email_list[f_pos].f_original_email.c_str();
861  e->f_fullname = f_email_list[f_pos].f_fullname.c_str();
862  e->f_username = f_email_list[f_pos].f_username.c_str();
863  e->f_domain = f_email_list[f_pos].f_domain.c_str();
864  e->f_email_only = f_email_list[f_pos].f_email_only.c_str();
865  e->f_canonicalized_email = f_email_list[f_pos].f_canonicalized_email.c_str();
866  ++f_pos;
867 
868  return true;
869 }
870 
893 {
894  std::string uname;
895  for(const char *u(name.c_str()); *u != '\0' && *u != ':'; ++u)
896  {
897  if(*u >= 'a' && *u <= 'z')
898  {
899  uname += *u & 0x5F;
900  }
901  else if((*u >= 'A' && *u <= 'Z')
902  || (*u >= '0' && *u <= '9')
903  || *u == '-')
904  {
905  uname += *u;
906  }
907  else
908  {
910  }
911  }
912  // the field must start with a letter and it cannot be empty
913  if(uname.empty() || uname[0] < 'A' || uname[0] > 'Z')
914  {
916  }
917 
918  if(uname == "FROM"
919  || uname == "RESENT-FROM")
920  {
922  }
923  if(uname == "SENDER"
924  || uname == "RESENT-SENDER")
925  {
927  }
928  if(uname == "TO"
929  || uname == "CC"
930  || uname == "REPLY-TO"
931  || uname == "RESENT-TO"
932  || uname == "RESENT-CC")
933  {
935  }
936  if(uname == "BCC"
937  || uname == "RESENT-BCC")
938  {
940  }
941 
943 }
944 
976 {
977  // The following is parsing ONE email since we already removed the
978  // groups, commas, semi-colons, leading and ending spaces.
979  //
980  std::string value;
981  value.reserve(email.length());
982  std::string fullname;
983  std::string username;
984  std::string domain;
985  uint32_t count = 0;
986  bool has_angle(false);
987  bool found_at(false);
988  bool found_dot(false);
989  bool done(false);
990  char const * start(email.c_str());
991  char const * s(start);
992  for(; *s != '\0'; ++s)
993  {
994  switch(*s)
995  {
996  case '"':
997  if(done)
998  {
999  return TLD_RESULT_INVALID;
1000  }
1001  for(++s; *s != '"'; ++s)
1002  {
1003  if(*s == '\0')
1004  {
1005  throw std::logic_error("somehow we found a \\0 in a quoted string in tld_email_t which should not happen since it was already checked validity in tld_email_t::parse()");
1006  }
1007  if(*s == '\\')
1008  {
1009  // the backslash is not part of the result
1010  ++s;
1011  if(*s == '\0')
1012  {
1013  // this cannot actually happen because we are
1014  // expected to capture those at the previous
1015  // level
1016  throw std::logic_error("somehow we found a \\0 in a quoted string after a backslash in tld_email_t which should not happen since it was already checked validity in tld_email_t::parse()"); // LCOV_EXCL_LINE
1017  }
1018  }
1019  if((static_cast<unsigned char>(*s) < ' ' && *s != '\t') || *s == 0x7F)
1020  {
1021  // do not accept any control characters
1022  // (note that this is sufficient to check all characters
1023  // after the \ character)
1024  //
1025  return TLD_RESULT_INVALID;
1026  }
1027  value += *s;
1028  }
1029  // on entry of this loop, *s == '"'
1030  do
1031  {
1032  ++s;
1033  }
1034  while(*s == ' ');
1035  if( *s != '<' && *s != '@' )
1036  {
1037  // A space afterwards is allowed, but '<' is expected
1038  //
1039  return TLD_RESULT_INVALID;
1040  }
1041  --s;
1042  break;
1043 
1044  case '(':
1045  // comments are completely ignored
1046  count = 1;
1047  for(++s; count > 0; ++s)
1048  {
1049  char c(*s);
1050  switch(c)
1051  {
1052  case '\0':
1053  throw std::logic_error("somehow we found a \\0 in a comment in tld_email_t which should not happen since it was already checked in tld_email_t::parse()");
1054 
1055  case '(':
1056  ++count;
1057  break;
1058 
1059  case ')':
1060  --count;
1061  break;
1062 
1063  case '\n':
1064  case '\r':
1065  case '\t':
1066  c = ' ';
1067  break;
1068 
1069  case '\\':
1070  ++s;
1071  if(!is_quoted_char(*s))
1072  {
1073  throw std::logic_error("somehow we found a \\0 in a comment quoted pair in tld_email_t which should not happen since it was already checked in tld_email_t::parse()");
1074  }
1075  c = *s;
1076  break;
1077 
1078  }
1079  if(static_cast<unsigned char>(c) < ' ')
1080  {
1081  // do not accept any control characters in comments
1082  // (except \r, \n, and \t)
1083  return TLD_RESULT_INVALID;
1084  }
1085  }
1086  --s;
1087  break;
1088 
1089  case '[':
1090  if(!found_at || done || !value.empty() || !domain.empty())
1091  {
1092  // domain before the '@'
1093  //
1094  return TLD_RESULT_INVALID;
1095  }
1096  // trim spaces after the '['
1097  //
1098  for(++s; *s != ']'; ++s)
1099  {
1100  char const c(*s);
1101  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
1102  {
1103  break;
1104  }
1105  }
1106  for(; *s != '[' && *s != '\\' && *s != ']' && *s != ' ' && *s != '\n' && *s != '\r' && *s != '\t'; ++s)
1107  {
1108  if(*s == '\0')
1109  {
1110  throw std::logic_error("somehow we found a \\0 in a literal domain in tld_email_t which should not happen since it was already checked in tld_email_t::parse()");
1111  }
1112  // spaces are forbidden in domain names (see test above)
1113  //
1114  if(static_cast<unsigned char>(*s) < ' ' || *s == 0x7F)
1115  {
1116  // do not accept any control characters
1117  //
1118  return TLD_RESULT_INVALID;
1119  }
1120  value += *s;
1121  }
1122  // we can have spaces at the end, but those must be followed by ']'
1123  //
1124  for(; *s != '[' && *s != '\\' && *s != ']'; ++s)
1125  {
1126  char const c(*s);
1127  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
1128  {
1129  break;
1130  }
1131  }
1132  if(*s != ']' || value.empty())
1133  {
1134  // domain literal cannot include a space
1135  // nor can it be empty
1136  //
1137  return TLD_RESULT_NULL;
1138  }
1139  if(value[0] == '.'
1140  || *value.rbegin() == '.'
1141  || value.find("..") != std::string::npos)
1142  {
1143  // a domain cannot start or end with "."
1144  // a domain cannot include ".."
1145  //
1146  return TLD_RESULT_INVALID;
1147  }
1148  domain = value;
1149  value.clear();
1150  break;
1151 
1152  case '<':
1153  if(has_angle || found_at || found_dot || done)
1154  {
1155  // found two '<' or the '<' after the '@'
1156  // or we had a dot before meaning that we already have a dotted username
1157  // or we are done (a.k.a. found the '>')
1158  //
1159  return TLD_RESULT_INVALID;
1160  }
1161 
1162  // if we have an angle email address, whatever we found so far
1163  // is the user name; although it can be empty
1164  //
1165  trim(value);
1166  if(!value.empty())
1167  {
1168  fullname = value;
1169  value.clear();
1170  }
1171  has_angle = true;
1172  break;
1173 
1174  case '>':
1175  if(!has_angle || !found_at || done)
1176  {
1177  // missing '<' and/or '@'
1178  //
1179  return TLD_RESULT_INVALID;
1180  }
1181  if(domain.empty())
1182  {
1183  trim(value);
1184  if(value.empty())
1185  {
1186  // an empty domain name is not valid, apparently
1187  //
1188  return TLD_RESULT_NULL;
1189  }
1190  // we are done, we can only find spaces and comments
1191  //
1192  domain = value;
1193  }
1194  else
1195  {
1196  if(!value.empty())
1197  {
1198  return TLD_RESULT_INVALID;
1199  }
1200  }
1201  done = true;
1202  has_angle = false;
1203  value.clear();
1204  break;
1205 
1206  case '@':
1207  // Note: if done is true, found_at is also true here
1208  if(found_at || done)
1209  {
1210  // found two '@' characters
1211  return TLD_RESULT_INVALID;
1212  }
1213  found_at = true;
1214  found_dot = false; // reset this flag
1215  trim(value);
1216  if(value.empty())
1217  {
1218  // no username is not a valid entry
1219  //
1220  return TLD_RESULT_NULL;
1221  }
1222  username = value;
1223  value.clear();
1224  break;
1225 
1226  case ' ':
1227  case '\n':
1228  case '\r':
1229  case '\t':
1230  //
1231  // keep just one space
1232  //
1233  if( !value.empty() )
1234  {
1235  value += ' ';
1236  }
1237  // and skip all the others
1238  // (as far as I know this is not allowed in the RFC, only one space
1239  // between items; however, after a new-line / carriage return, you
1240  // could get many spaces and tabs and that's legal)
1241  //
1242  for(++s; *s != '\0'; ++s)
1243  {
1244  char const c(*s);
1245  if(c != ' ' && c != '\n' && c != '\r' && c != '\t')
1246  {
1247  break;
1248  }
1249  }
1250  --s; // the main loop will skip that last character (again)
1251  break;
1252 
1253  case '.':
1254  if(value.empty() // cannot start with a dot
1255  || (!value.empty() && *value.rbegin() == '.') // cannot include two dots one after the other
1256  || s[1] == '@' || s[1] == '>') // cannot end with a dot
1257  {
1258  return TLD_RESULT_INVALID;
1259  }
1260  found_dot = true;
1261  value += '.';
1262  break;
1263 
1264  default:
1265  // here we must have a valid atom character ([-A-Za-z0-9!#$%&'*+/=?^_`{|}~])
1266  //
1267  if(!is_atom_char(*s))
1268  {
1269  // not a valid atom character
1270  //
1271  return TLD_RESULT_INVALID;
1272  }
1273  value += *s;
1274  break;
1275 
1276  }
1277  }
1278 
1279  if(username.empty() || has_angle)
1280  {
1281  // no username means the '@' is missing
1282  // angle bracket was not closed ('>' missing)
1283  //
1284  return TLD_RESULT_NULL;
1285  }
1286 
1287  if(done)
1288  {
1289  if(!value.empty())
1290  {
1291  // nothing of substance can appear after the domain
1292  //
1293  return TLD_RESULT_INVALID;
1294  }
1295  }
1296  else
1297  {
1298  trim(value);
1299  if(value.empty())
1300  {
1301  if(domain.empty())
1302  {
1303  // domain is missing
1304  //
1305  return TLD_RESULT_NULL;
1306  }
1307  }
1308  else
1309  {
1310  if(!domain.empty())
1311  {
1312  // domain "defined twice"
1313  //
1314  return TLD_RESULT_INVALID;
1315  }
1316  domain = value;
1317  }
1318  }
1319 
1320  // finally, verify that the domain is indeed valid
1321  // (i.e. proper characters, structure, and TLD)
1322  // for that step we use the lowercase version
1323  //
1324  struct tld_info info;
1325  std::unique_ptr<char, void(*)(char *)> lowercase_domain(tld_domain_to_lowercase(domain.c_str()), reinterpret_cast<void(*)(char *)>(&::free));
1326  tld_result result(tld(lowercase_domain.get(), &info));
1327  if(result != TLD_RESULT_SUCCESS)
1328  {
1329  return result;
1330  }
1331 
1332  // EX-193 and EX-185: email must not have whitespace in it!
1333  //
1334  auto has_whitespace = [&]( char c )
1335  {
1336  return (c == ' ' || c == '\n' || c == '\r' || c == '\t');
1337  };
1338  if( std::find_if( std::begin(username), std::end(username), has_whitespace ) != std::end(username) )
1339  {
1340  return TLD_RESULT_INVALID;
1341  }
1342  //
1343  if( std::find_if( std::begin(domain), std::end(domain), has_whitespace ) != std::end(domain) )
1344  {
1345  return TLD_RESULT_INVALID;
1346  }
1347 
1348  f_original_email = email;
1349  f_fullname = fullname;
1350  f_username = username;
1351  f_domain = domain;
1352  f_email_only = quote_string(username, '\'') + "@" + quote_string(domain, '['); // TODO protect characters...
1353 
1354  // the canonicalized version uses the domain name in lowercase
1355  //
1356  std::string canonicalized_email(quote_string(username, '\'') + "@" + quote_string(lowercase_domain.get(), '[')); // TODO protect characters...
1357  if(fullname.empty())
1358  {
1359  f_canonicalized_email = canonicalized_email;
1360  }
1361  else
1362  {
1363  f_canonicalized_email = quote_string(fullname, '"') + " <" + canonicalized_email + ">"; // TODO protect characters...
1364  }
1365 
1366  return TLD_RESULT_SUCCESS;
1367 }
1368 
1393 {
1394  char const * s(group.c_str());
1395  std::string g;
1396  uint32_t count = 0;
1397 
1398  for(; *s != '\0'; ++s)
1399  {
1400  switch(*s)
1401  {
1402  case ' ':
1403  case '\n':
1404  case '\r':
1405  case '\t':
1406  if(!g.empty())
1407  {
1408  g += ' ';
1409  }
1410  for(++s; *s == ' ' || *s == '\n' || *s == '\r' || *s == '\t'; ++s);
1411  --s;
1412  break;
1413 
1414  case '(':
1415  count = 1;
1416 #pragma GCC diagnostic push
1417 #pragma GCC diagnostic ignored "-Wstrict-overflow"
1418  for(++s; count > 0; ++s)
1419 #pragma GCC diagnostic pop
1420  {
1421  if(*s == '\0')
1422  {
1423  throw std::logic_error("somehow we found a \\0 in a quoted string in tld_email_t which should not happen since it was already checked in tld_email_t::parse()");
1424  }
1425  switch(*s)
1426  {
1427  case '(':
1428  ++count;
1429  break;
1430 
1431  case ')':
1432  --count;
1433  break;
1434 
1435  case '\\':
1436  if(!is_quoted_char(s[1]))
1437  {
1438  throw std::logic_error("somehow we found a \\0 in a comment in tld_email_t which should not happen since it was already checked in tld_email_t::parse()");
1439  }
1440  ++s;
1441  break;
1442 
1443  // controls, etc. were already checked
1444  }
1445  }
1446  // come back on the ')' since the main for will do a ++s
1447  --s;
1448  break;
1449 
1450  default:
1451  if(static_cast<unsigned char>(*s) < ' ' || *s == 0x7F)
1452  {
1453  return TLD_RESULT_INVALID;
1454  }
1455  g += *s;
1456  break;
1457 
1458  }
1459  }
1460  if(g.empty())
1461  {
1462  return TLD_RESULT_INVALID;
1463  }
1464 
1465  f_group = g;
1466 
1467  return TLD_RESULT_SUCCESS;
1468 }
1469 
1484 {
1485  return new tld_email_list;
1486 }
1487 
1497 {
1498  delete list;
1499 }
1500 
1514 tld_result tld_email_parse(struct tld_email_list * list, char const * emails, int flags)
1515 {
1516  return list->parse(emails, flags);
1517 }
1518 
1529 {
1530  return list->count();
1531 }
1532 
1542 {
1543  list->rewind();
1544 }
1545 
1562 int tld_email_next(struct tld_email_list * list, struct tld_email * e)
1563 {
1564  return list->next(e) ? 1 : 0;
1565 }
1566 
1942 /* vim: ts=4 sw=4 et
1943  */
void parse_all_emails()
Parse all the emails in f_input.
Definition: tld_emails.cpp:418
tld_result f_result
The result of the parse() function.
Definition: tld.h:214
The input represents an optional list of email addresses.
Definition: tld.h:132
int count() const
Return the number of emails recorded.
Definition: tld_emails.cpp:791
int f_flags
The flags as passed to the parse() function.
Definition: tld.h:213
tld_email_list()
Initialize the tld_email_list object.
Definition: tld_emails.cpp:367
LIBTLD_EXPORT enum tld_result tld(const char *uri, struct tld_info *info)
Get information about the TLD for the specified URI.
Definition: tld.c:555
const char * f_email_only
The complete email address without display name.
Definition: tld.h:121
LIBTLD_EXPORT char * tld_domain_to_lowercase(const char *domain)
Transform a domain with a TLD to lowercase before processing.
The input URI is empty.
Definition: tld.h:85
std::string f_group
The group this emails was defined in.
Definition: tld.h:189
void rewind() const
Rewind the reader to the start of the list.
Definition: tld_emails.cpp:804
const char * f_fullname
The user full or display name.
Definition: tld.h:118
The C++ side of the email list implementation.
Definition: tld.h:181
static tld_email_field_type email_field_type(const std::string &name)
Check whether a name represents a field with a list of emails.
Definition: tld_emails.cpp:892
The public header of the libtld library.
bool next(tld_email_t &e) const
Retrieve a copy of the next email information.
Definition: tld_emails.cpp:821
const char * f_original_email
The email as read from the source.
Definition: tld.h:117
const char * f_canonicalized_email
The email including the display name.
Definition: tld.h:122
The input represents a mailbox list.
Definition: tld.h:129
const char * f_username
The user being named in this email address.
Definition: tld.h:119
Parts of one email.
Definition: tld.h:184
tld_result parse(const std::string &email)
Parse one email to a tld_email_t object.
Definition: tld_emails.cpp:975
The TLD was found, but it is marked as invalid.
Definition: tld.h:84
std::string f_last_group
The last group read in the input.
Definition: tld.h:215
tld_email_list_t f_email_list
The list of emails.
Definition: tld.h:217
The input does not represent valid emails.
Definition: tld.h:128
void list()
List the default schemes accepted.
int f_pos
The current position reading the emails.
Definition: tld.h:216
tld_result tld_email_parse(struct tld_email_list *list, char const *emails, int flags)
Parse a list of emails in the email list object.
The input represents a mailbox.
Definition: tld.h:130
The input of email_field_type() was not valid.
Definition: tld.h:127
const char * f_group
The group this emails was defined in.
Definition: tld.h:116
struct tld_email_list * tld_email_alloc()
Allocate a list of emails object.
int tld_email_next(struct tld_email_list *list, struct tld_email *e)
Retrieve the next email.
void tld_email_free(struct tld_email_list *list)
Free the list of emails.
tld_result parse_group(const std::string &group)
Parse a group including comments.
static std::string quote_string(const std::string &name, char quote)
Transform a name if it requires quotation.
Definition: tld_emails.cpp:716
const char * f_domain
The domain part of the email address.
Definition: tld.h:120
tld_result
The result returned by tld().
Definition: tld.h:81
Success! The TLD of the specified URI is valid.
Definition: tld.h:83
Parts of one email.
Definition: tld.h:114
std::string f_input
The input string of the last call to parse().
Definition: tld.h:212
Set of information returned by the tld() function.
Definition: tld.h:91
The input represents a mandatory list of mailboxes.
Definition: tld.h:131
void tld_email_rewind(struct tld_email_list *list)
Rewind the reading of the emails.
int tld_email_count(struct tld_email_list *list)
Return the number of emails found after a parse.
tld_result parse(const std::string &emails, int flags)
Parse a new list of emails.
Definition: tld_emails.cpp:395
tld_email_field_type
Type of email as determined by the email_field_type() function.
Definition: tld.h:125

This document is part of the Snap! Websites Project.

Copyright by Made to Order Software Corp.

Syndicate content

Snap! Websites
An Open Source CMS System in C++

Contact Us Directly