MythTV  master
eitfixup.cpp
Go to the documentation of this file.
1 // C++ headers
2 #include <algorithm>
3 #include <array>
4 
5 // MythTV headers
7 #include "libmythbase/programinfo.h" // for CategoryType, subtitle types and audio and video properties
8 
9 #include "channelutil.h" // for GetDefaultAuthority()
10 #include "eitfixup.h"
11 #include "mpeg/dishdescriptors.h" // for dish_theme_type_to_string
12 
13 /*------------------------------------------------------------------------
14  * Event Fix Up Scripts - Turned on by entry in dtv_privatetype table
15  *------------------------------------------------------------------------*/
16 
17 static const QRegularExpression kStereo { R"(\b\(?[sS]tereo\)?\b)" };
18 static const QRegularExpression kUKSpaceColonStart { R"(^[ |:]*)" };
19 static const QRegularExpression kDotAtEnd { "\\.$" };
20 
21 #if QT_VERSION < QT_VERSION_CHECK(5,15,2)
22 #define capturedView capturedRef
23 #endif
24 
25 static const QMap<QChar,quint16> r2v = {
26  {'I' , 1}, {'V' , 5}, {'X' , 10}, {'L' , 50},
27  {'C' , 100}, {'D' , 500}, {'M' , 1000},
28  {QChar(0x399), 1}, // Greek Ι
29 };
30 
31 int EITFixUp::parseRoman (QString roman)
32 {
33  if (roman.isEmpty())
34  return 0;
35 
36  uint result = 0;
37  for (int i = 0; i < roman.size() - 1; i++)
38  {
39  int v1 = r2v[roman.at(i)];
40  int v2 = r2v[roman.at(i+1)];
41  result += (v1 >= v2) ? v1 : -v1;
42  }
43  return result + r2v[roman.back()];
44 }
45 
46 
48 {
49  if (event.m_fixup)
50  {
51  if (event.m_subtitle == event.m_title)
52  event.m_subtitle = QString("");
53 
54  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
55  {
56  event.m_description = event.m_subtitle;
57  event.m_subtitle = QString("");
58  }
59  }
60 
61  if (kFixHTML & event.m_fixup)
62  FixStripHTML(event);
63 
64  if (kFixHDTV & event.m_fixup)
65  event.m_videoProps |= VID_HDTV;
66 
67  if (kFixBell & event.m_fixup)
68  FixBellExpressVu(event);
69 
70  if (kFixDish & event.m_fixup)
71  FixBellExpressVu(event);
72 
73  if (kFixUK & event.m_fixup)
74  FixUK(event);
75 
76  if (kFixPBS & event.m_fixup)
77  FixPBS(event);
78 
79  if (kFixComHem & event.m_fixup)
80  FixComHem(event, (kFixSubtitle & event.m_fixup) != 0U);
81 
82  if (kFixAUStar & event.m_fixup)
83  FixAUStar(event);
84 
85  if (kFixAUDescription & event.m_fixup)
86  FixAUDescription(event);
87 
88  if (kFixAUFreeview & event.m_fixup)
89  FixAUFreeview(event);
90 
91  if (kFixAUNine & event.m_fixup)
92  FixAUNine(event);
93 
94  if (kFixAUSeven & event.m_fixup)
95  FixAUSeven(event);
96 
97  if (kFixMCA & event.m_fixup)
98  FixMCA(event);
99 
100  if (kFixRTL & event.m_fixup)
101  FixRTL(event);
102 
103  if (kFixP7S1 & event.m_fixup)
104  FixPRO7(event);
105 
106  if (kFixATV & event.m_fixup)
107  FixATV(event);
108 
109  if (kFixDisneyChannel & event.m_fixup)
110  FixDisneyChannel(event);
111 
112  if (kFixFI & event.m_fixup)
113  FixFI(event);
114 
115  if (kFixPremiere & event.m_fixup)
116  FixPremiere(event);
117 
118  if (kFixNL & event.m_fixup)
119  FixNL(event);
120 
121  if (kFixNO & event.m_fixup)
122  FixNO(event);
123 
124  if (kFixNRK_DVBT & event.m_fixup)
125  FixNRK_DVBT(event);
126 
127  if (kFixDK & event.m_fixup)
128  FixDK(event);
129 
130  if (kFixCategory & event.m_fixup)
131  FixCategory(event);
132 
133  if (kFixGreekSubtitle & event.m_fixup)
134  FixGreekSubtitle(event);
135 
136  if (kFixGreekEIT & event.m_fixup)
137  FixGreekEIT(event);
138 
139  if (kFixGreekCategories & event.m_fixup)
140  FixGreekCategories(event);
141 
142  if (kFixUnitymedia & event.m_fixup)
143  FixUnitymedia(event);
144 
145  // Clean up text strings after all fixups have been applied.
146  if (event.m_fixup)
147  {
148  static const QRegularExpression emptyParens { R"(\(\s*\))" };
149  if (!event.m_title.isEmpty())
150  {
151  event.m_title.remove(QChar('\0')).remove(emptyParens);
152  event.m_title = event.m_title.simplified();
153  }
154 
155  if (!event.m_subtitle.isEmpty())
156  {
157  event.m_subtitle.remove(QChar('\0'));
158  event.m_subtitle.remove(emptyParens);
159  event.m_subtitle = event.m_subtitle.simplified();
160  }
161 
162  if (!event.m_description.isEmpty())
163  {
164  event.m_description.remove(QChar('\0'));
165  event.m_description.remove(emptyParens);
166  event.m_description = event.m_description.simplified();
167  }
168  }
169 
170  if (kFixGenericDVB & event.m_fixup)
171  {
172  event.m_programId = AddDVBEITAuthority(event.m_chanid, event.m_programId);
173  event.m_seriesId = AddDVBEITAuthority(event.m_chanid, event.m_seriesId);
174  }
175 
176  // Are any items left unhandled? report them to allow fixups improvements
177  if (!event.m_items.empty())
178  {
179  for (auto i = event.m_items.begin(); i != event.m_items.end(); ++i)
180  {
181  LOG(VB_EIT, LOG_DEBUG, QString("Unhandled item in EIT for"
182  " channel id \"%1\", \"%2\": %3").arg(event.m_chanid)
183  .arg(i.key(), i.value()));
184  }
185  }
186 }
187 
203 QString EITFixUp::AddDVBEITAuthority(uint chanid, const QString &id)
204 {
205  if (id.isEmpty())
206  return id;
207 
208  // CRIDs are not case sensitive, so change all to lower case
209  QString crid = id.toLower();
210 
211  // remove "crid://"
212  if (crid.startsWith("crid://"))
213  crid.remove(0,7);
214 
215  // if id is a CRID with authority, return it
216  if (crid.length() >= 1 && crid[0] != '/')
217  return crid;
218 
219  QString authority = ChannelUtil::GetDefaultAuthority(chanid);
220  if (authority.isEmpty())
221  return ""; // no authority, not a valid CRID, return empty
222 
223  return authority + crid;
224 }
225 
231 {
232  // A 0x0D character is present between the content
233  // and the subtitle if its present
234  int position = event.m_description.indexOf('\r');
235 
236  if (position != -1)
237  {
238  // Subtitle present in the title, so get
239  // it and adjust the description
240  event.m_subtitle = event.m_description.left(position);
241  event.m_description = event.m_description.right(
242  event.m_description.length() - position - 2);
243  }
244 
245  // Take out the content description which is
246  // always next with a period after it
247  position = event.m_description.indexOf(".");
248  // Make sure they didn't leave it out and
249  // you come up with an odd category
250  if (position < 10)
251  {
252  }
253  else
254  {
255  event.m_category = "Unknown";
256  }
257 
258  // If the content descriptor didn't come up with anything, try parsing the category
259  // out of the description.
260  if (event.m_category.isEmpty())
261  {
262  // Take out the content description which is
263  // always next with a period after it
264  position = event.m_description.indexOf(".");
265  if ((position + 1) < event.m_description.length())
266  position = event.m_description.indexOf(". ");
267  // Make sure they didn't leave it out and
268  // you come up with an odd category
269  if ((position > -1) && position < 20)
270  {
271  const QString stmp = event.m_description;
272  event.m_description = stmp.right(stmp.length() - position - 2);
273  event.m_category = stmp.left(position);
274 
275  int position_p = event.m_category.indexOf("(");
276  if (position_p == -1)
277  event.m_description = stmp.right(stmp.length() - position - 2);
278  else
279  event.m_category = "Unknown";
280  }
281  else
282  {
283  event.m_category = "Unknown";
284  }
285 
286  // When a channel is off air the category is "-"
287  // so leave the category as blank
288  if (event.m_category == "-")
289  event.m_category = "OffAir";
290 
291  if (event.m_category.length() > 20)
292  event.m_category = "Unknown";
293  }
294  else if (event.m_categoryType)
295  {
296  QString theme = dish_theme_type_to_string(event.m_categoryType);
297  event.m_description = event.m_description.replace(theme, "");
298  if (event.m_description.startsWith("."))
299  event.m_description = event.m_description.right(event.m_description.length() - 1);
300  if (event.m_description.startsWith(" "))
301  event.m_description = event.m_description.right(event.m_description.length() - 1);
302  }
303 
304  // See if a year is present as (xxxx)
305  static const QRegularExpression bellYear { R"(\([0-9]{4}\))" };
306  position = event.m_description.indexOf(bellYear);
307  if (position != -1 && !event.m_category.isEmpty())
308  {
309  // Parse out the year
310  bool ok = false;
311  uint y = event.m_description.mid(position + 1, 4).toUInt(&ok);
312  if (ok)
313  {
314  event.m_originalairdate = QDate(y, 1, 1);
315  event.m_airdate = y;
316  event.m_previouslyshown = true;
317  }
318 
319  // Get the actors if they exist
320  if (position > 3)
321  {
322  static const QRegularExpression bellActors { R"(\set\s|,)" };
323  QString tmp = event.m_description.left(position-3);
324 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
325  QStringList actors =
326  tmp.split(bellActors, QString::SkipEmptyParts);
327 #else
328  QStringList actors =
329  tmp.split(bellActors, Qt::SkipEmptyParts);
330 #endif
331 
332  /* Possible TODO: if EIT inlcude the priority and/or character
333  * names for the actors, include them in AddPerson call. */
334  for (const auto & actor : qAsConst(actors))
335  event.AddPerson(DBPerson::kActor, actor);
336  }
337  // Remove the year and actors from the description
338  event.m_description = event.m_description.right(
339  event.m_description.length() - position - 7);
340  }
341 
342  // Check for (CC) in the decription and
343  // set the <subtitles type="teletext"> flag
344  position = event.m_description.indexOf("(CC)");
345  if (position != -1)
346  {
347  event.m_subtitleType |= SUB_HARDHEAR;
348  event.m_description = event.m_description.replace("(CC)", "");
349  }
350 
351  // Check for (Stereo) in the decription and set the <audio> tags
352  auto match = kStereo.match(event.m_description);
353  if (match.hasMatch())
354  {
355  event.m_audioProps |= AUD_STEREO;
356  event.m_description.remove(match.capturedStart(0),
357  match.capturedLength(0));
358  }
359 
360  // Check for "title (All Day, HD)" in the title
361  static const QRegularExpression bellPPVTitleAllDayHD { R"(\s*\(All Day\, HD\)\s*$)" };
362  match = bellPPVTitleAllDayHD.match(event.m_title);
363  if (match.hasMatch())
364  {
365  event.m_title.remove(match.capturedStart(), match.capturedLength());
366  event.m_videoProps |= VID_HDTV;
367  }
368 
369  // Check for "title (All Day)" in the title
370  static const QRegularExpression bellPPVTitleAllDay { R"(\s*\(All Day.*\)\s*$)" };
371  match = bellPPVTitleAllDay.match(event.m_title);
372  if (match.hasMatch())
373  event.m_title.remove(match.capturedStart(), match.capturedLength());
374 
375  // Check for "HD - title" in the title
376  static const QRegularExpression bellPPVTitleHD { R"(^HD\s?-\s?)" };
377  match = bellPPVTitleHD.match(event.m_title);
378  if (match.hasMatch())
379  {
380  event.m_title.remove(match.capturedStart(), match.capturedLength());
381  event.m_videoProps |= VID_HDTV;
382  }
383 
384  // Check for (HD) in the decription
385  position = event.m_description.indexOf("(HD)");
386  if (position != -1)
387  {
388  event.m_description = event.m_description.replace("(HD)", "");
389  event.m_videoProps |= VID_HDTV;
390  }
391 
392  // Check for (HD) in the title
393  position = event.m_title.indexOf("(HD)");
394  if (position != -1)
395  {
396  event.m_title = event.m_title.replace("(HD)", "");
397  event.m_videoProps |= VID_HDTV;
398  }
399 
400  // Check for HD at the end of the title
401  static const QRegularExpression dishPPVTitleHD { R"(\sHD\s*$)" };
402  match = dishPPVTitleHD.match(event.m_title);
403  if (match.hasMatch())
404  {
405  event.m_title.remove(match.capturedStart(), match.capturedLength());
406  event.m_videoProps |= VID_HDTV;
407  }
408 
409  // Check for (DD) at the end of the description
410  position = event.m_description.indexOf("(DD)");
411  if (position != -1)
412  {
413  event.m_description = event.m_description.replace("(DD)", "");
414  event.m_audioProps |= AUD_DOLBY;
415  event.m_audioProps |= AUD_STEREO;
416  }
417 
418  // Remove SAP from Dish descriptions
419  position = event.m_description.indexOf("(SAP)");
420  if (position != -1)
421  {
422  event.m_description = event.m_description.replace("(SAP", "");
423  event.m_subtitleType |= SUB_HARDHEAR;
424  }
425 
426  // Remove any trailing colon in title
427  static const QRegularExpression dishPPVTitleColon { R"(\:\s*$)" };
428  match = dishPPVTitleColon.match(event.m_title);
429  if (match.hasMatch())
430  event.m_title.remove(match.capturedStart(), match.capturedLength());
431 
432  // Remove New at the end of the description
433  static const QRegularExpression dishDescriptionNew { R"(\s*New\.\s*)" };
434  match = dishDescriptionNew.match(event.m_description);
435  if (match.hasMatch())
436  {
437  event.m_previouslyshown = false;
438  event.m_description.remove(match.capturedStart(), match.capturedLength());
439  }
440 
441  // Remove Series Finale at the end of the desciption
442  static const QRegularExpression dishDescriptionFinale { R"(\s*(Series|Season)\sFinale\.\s*)" };
443  match = dishDescriptionFinale.match(event.m_description);
444  if (match.hasMatch())
445  {
446  event.m_previouslyshown = false;
447  event.m_description.remove(match.capturedStart(), match.capturedLength());
448  }
449 
450  // Remove Series Finale at the end of the desciption
451  static const QRegularExpression dishDescriptionFinale2 { R"(\s*Finale\.\s*)" };
452  match = dishDescriptionFinale2.match(event.m_description);
453  if (match.hasMatch())
454  {
455  event.m_previouslyshown = false;
456  event.m_description.remove(match.capturedStart(), match.capturedLength());
457  }
458 
459  // Remove Series Premiere at the end of the description
460  static const QRegularExpression dishDescriptionPremiere { R"(\s*(Series|Season)\s(Premier|Premiere)\.\s*)" };
461  match = dishDescriptionPremiere.match(event.m_description);
462  if (match.hasMatch())
463  {
464  event.m_previouslyshown = false;
465  event.m_description.remove(match.capturedStart(), match.capturedLength());
466  }
467 
468  // Remove Series Premiere at the end of the description
469  static const QRegularExpression dishDescriptionPremiere2 { R"(\s*(Premier|Premiere)\.\s*)" };
470  match = dishDescriptionPremiere2.match(event.m_description);
471  if (match.hasMatch())
472  {
473  event.m_previouslyshown = false;
474  event.m_description.remove(match.capturedStart(), match.capturedLength());
475  }
476 
477  // Remove Dish's PPV code at the end of the description
478  static const QRegularExpression ppvcode { R"(\s*\(([A-Z]|[0-9]){5}\)\s*$)",
479  QRegularExpression::CaseInsensitiveOption };
480  match = ppvcode.match(event.m_description);
481  if (match.hasMatch())
482  event.m_description.remove(match.capturedStart(), match.capturedLength());
483 
484  // Remove trailing garbage
485  static const QRegularExpression dishPPVSpacePerenEnd { R"(\s\)\s*$)" };
486  match = dishPPVSpacePerenEnd.match(event.m_description);
487  if (match.hasMatch())
488  event.m_description.remove(match.capturedStart(), match.capturedLength());
489 
490  // Check for subtitle "All Day (... Eastern)" in the subtitle
491  static const QRegularExpression bellPPVSubtitleAllDay { R"(^All Day \(.*\sEastern\)\s*$)" };
492  match = bellPPVSubtitleAllDay.match(event.m_subtitle);
493  if (match.hasMatch())
494  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
495 
496  // Check for description "(... Eastern)" in the description
497  static const QRegularExpression bellPPVDescriptionAllDay { R"(^\(.*\sEastern\))" };
498  match = bellPPVDescriptionAllDay.match(event.m_description);
499  if (match.hasMatch())
500  event.m_description.remove(match.capturedStart(), match.capturedLength());
501 
502  // Check for description "(... ET)" in the description
503  static const QRegularExpression bellPPVDescriptionAllDay2 { R"(^\([0-9].*am-[0-9].*am\sET\))" };
504  match = bellPPVDescriptionAllDay2.match(event.m_description);
505  if (match.hasMatch())
506  event.m_description.remove(match.capturedStart(), match.capturedLength());
507 
508  // Check for description "(nnnnn)" in the description
509  static const QRegularExpression bellPPVDescriptionEventId { R"(\([0-9]{5}\))" };
510  match = bellPPVDescriptionEventId.match(event.m_description);
511  if (match.hasMatch())
512  event.m_description.remove(match.capturedStart(), match.capturedLength());
513 }
514 
519 {
520  QStringList strListColon = event.m_description.split(":");
521  QStringList strListEnd;
522 
523  bool fColon = false;
524  bool fQuotedSubtitle = false;
525  QString strEnd;
526  if (strListColon.count()>1)
527  {
528  bool fDoubleDot = false;
529  bool fSingleDot = true;
530  int nLength = strListColon[0].length();
531 
532  int nPosition1 = event.m_description.indexOf("..");
533  if ((nPosition1 < nLength) && (nPosition1 >= 0))
534  fDoubleDot = true;
535  nPosition1 = event.m_description.indexOf(".");
536  if (nPosition1==-1)
537  fSingleDot = false;
538  if (nPosition1 > nLength)
539  fSingleDot = false;
540  else
541  {
542  QString strTmp = event.m_description.mid(nPosition1+1,
543  nLength-nPosition1);
544 
545  QStringList tmp = strTmp.split(" ");
546  if (((uint) tmp.size()) < kMaxDotToColon)
547  fSingleDot = false;
548  }
549 
550  if (fDoubleDot)
551  {
552  strListEnd = strListColon;
553  fColon = true;
554  }
555  else if (!fSingleDot)
556  {
557  QStringList strListTmp;
558  uint nTitle=0;
559  int nTitleMax=-1;
560  for (int i =0; (i<strListColon.count()) && (nTitleMax==-1);i++)
561  {
562  const QStringList tmp = strListColon[i].split(" ");
563 
564  nTitle += tmp.size();
565 
566  if (nTitle < kMaxToTitle)
567  strListTmp.push_back(strListColon[i]);
568  else
569  nTitleMax=i;
570  }
571  QString strPartial;
572  for (int i=0;i<(nTitleMax-1);i++)
573  strPartial+=strListTmp[i]+":";
574  if (nTitleMax>0)
575  {
576  strPartial+=strListTmp[nTitleMax-1];
577  strListEnd.push_back(strPartial);
578  }
579  for (int i=nTitleMax+1;i<strListColon.count();i++)
580  strListEnd.push_back(strListColon[i]);
581  fColon = true;
582  }
583  }
584  static const QRegularExpression ukQuotedSubtitle { R"(^'([\w\s\-,]+?)\.' )" };
585  auto match = ukQuotedSubtitle.match(event.m_description);
586  if (match.hasMatch())
587  {
588  event.m_subtitle = match.captured(1);
589  event.m_description.remove(match.capturedStart(0),
590  match.capturedLength(0));
591  fQuotedSubtitle = true;
592  }
593  QStringList strListPeriod;
594  QStringList strListQuestion;
595  QStringList strListExcl;
596  if (!(fColon || fQuotedSubtitle))
597  {
598  strListPeriod = event.m_description.split(".");
599  if (strListPeriod.count() >1)
600  {
601  int nPosition1 = event.m_description.indexOf(".");
602  int nPosition2 = event.m_description.indexOf("..");
603  if ((nPosition1 < nPosition2) || (nPosition2==-1))
604  strListEnd = strListPeriod;
605  }
606 
607  strListQuestion = event.m_description.split("?");
608  strListExcl = event.m_description.split("!");
609  if ((strListQuestion.size() > 1) &&
610  ((uint)strListQuestion.size() <= kMaxQuestionExclamation))
611  {
612  strListEnd = strListQuestion;
613  strEnd = "?";
614  }
615  else if ((strListExcl.size() > 1) &&
616  ((uint)strListExcl.size() <= kMaxQuestionExclamation))
617  {
618  strListEnd = strListExcl;
619  strEnd = "!";
620  }
621  else
622  strEnd.clear();
623  }
624 
625  if (!strListEnd.empty())
626  {
627 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
628  QStringList strListSpace = strListEnd[0].split(
629  " ", QString::SkipEmptyParts);
630 #else
631  QStringList strListSpace = strListEnd[0].split(
632  " ", Qt::SkipEmptyParts);
633 #endif
634  if (fColon && ((uint)strListSpace.size() > kMaxToTitle))
635  return;
636  if ((uint)strListSpace.size() > kDotToTitle)
637  return;
638  static const QRegularExpression ukExclusionFromSubtitle {
639  "(starring|stars\\s|drama|seres|sitcom)",
640  QRegularExpression::CaseInsensitiveOption };
641  if (strListSpace.filter(ukExclusionFromSubtitle).empty())
642  {
643  event.m_subtitle = strListEnd[0]+strEnd;
644  event.m_subtitle.remove(kUKSpaceColonStart);
645  event.m_description=
646  event.m_description.mid(strListEnd[0].length()+1);
647  event.m_description.remove(kUKSpaceColonStart);
648  }
649  }
650 }
651 
652 
657 {
658  static const QRegularExpression uk24ep { R"(^\d{1,2}:00[ap]m to \d{1,2}:00[ap]m: )" };
659  static const QRegularExpression ukTime { R"(\d{1,2}[\.:]\d{1,2}\s*(am|pm|))" };
660  QString strFull;
661 
662  bool isMovie = event.m_category.startsWith("Movie",Qt::CaseInsensitive) ||
663  event.m_category.startsWith("Film",Qt::CaseInsensitive);
664  // BBC three case (could add another record here ?)
665  static const QRegularExpression ukThen { R"(\s*?(Then|Followed by) 60 Seconds\.)",
666  QRegularExpression::CaseInsensitiveOption };
667  static const QRegularExpression ukNew { R"((New\.|\s*?(Brand New|New)\s*?(Series|Episode)\s*?[:\.\-]))",
668  QRegularExpression::CaseInsensitiveOption };
669  static const QRegularExpression ukNewTitle { R"(^(Brand New|New:)\s*)",
670  QRegularExpression::CaseInsensitiveOption };
671  event.m_description = event.m_description.remove(ukThen);
672  event.m_description = event.m_description.remove(ukNew);
673  event.m_title = event.m_title.remove(ukNewTitle);
674 
675  // Removal of Class TV, CBBC and CBeebies etc..
676  static const QRegularExpression ukTitleRemove { "^(?:[tT]4:|Schools\\s*?:)" };
677  static const QRegularExpression ukDescriptionRemove { R"(^(?:CBBC\s*?\.|CBeebies\s*?\.|Class TV\s*?:|BBC Switch\.))" };
678  event.m_title = event.m_title.remove(ukTitleRemove);
679  event.m_description = event.m_description.remove(ukDescriptionRemove);
680 
681  // Removal of BBC FOUR and BBC THREE
682  static const QRegularExpression ukBBC34 { R"(BBC (?:THREE|FOUR) on BBC (?:ONE|TWO)\.)",
683  QRegularExpression::CaseInsensitiveOption };
684  event.m_description = event.m_description.remove(ukBBC34);
685 
686  // BBC 7 [Rpt of ...] case.
687  static const QRegularExpression ukBBC7rpt { R"(\[Rptd?[^]]+?\d{1,2}\.\d{1,2}[ap]m\]\.)" };
688  event.m_description = event.m_description.remove(ukBBC7rpt);
689 
690  // "All New To 4Music!
691  static const QRegularExpression ukAllNew { R"(All New To 4Music!\s?)" };
692  event.m_description = event.m_description.remove(ukAllNew);
693 
694  // Removal of 'Also in HD' text
695  static const QRegularExpression ukAlsoInHD { R"(\s*Also in HD\.)",
696  QRegularExpression::CaseInsensitiveOption };
697  event.m_description = event.m_description.remove(ukAlsoInHD);
698 
699  // Remove [AD,S] etc.
700  static const QRegularExpression ukCC { R"(\[(?:(AD|SL|S|W|HD),?)+\])" };
701  auto match = ukCC.match(event.m_description);
702  while (match.hasMatch())
703  {
704  QStringList tmpCCitems = match.captured(0).remove("[").remove("]").split(",");
705  if (tmpCCitems.contains("AD"))
706  event.m_audioProps |= AUD_VISUALIMPAIR;
707  if (tmpCCitems.contains("HD"))
708  event.m_videoProps |= VID_HDTV;
709  if (tmpCCitems.contains("S"))
710  event.m_subtitleType |= SUB_NORMAL;
711  if (tmpCCitems.contains("SL"))
712  event.m_subtitleType |= SUB_SIGNED;
713  if (tmpCCitems.contains("W"))
714  event.m_videoProps |= VID_WIDESCREEN;
715  event.m_description.remove(match.capturedStart(0),
716  match.capturedLength(0));
717  match = ukCC.match(event.m_description, match.capturedStart(0));
718  }
719 
720  event.m_title = event.m_title.trimmed();
721  event.m_description = event.m_description.trimmed();
722 
723  // Constituents of UK season regexp, decomposed for clarity
724 
725  // Matches Season 2, S 2 and "Series 2," etc but not "hits 2"
726  // cap1 = season
727  static const QString seasonStr = R"(\b(?:Season|Series|S)\s*(\d+)\s*,?)";
728 
729  // Work out the season and episode numbers (if any)
730  // Matching pattern "Season 2 Episode|Ep 3 of 14|3/14" etc
731 
732  // Matches Episode 3, Ep 3/4, Ep 3 of 4 etc but not "step 1"
733  // cap1 = ep, cap2 = total
734  static const QString longEp = R"(\b(?:Ep|Episode)\s*(\d+)\s*(?:(?:/|of)\s*(\d*))?)";
735 
736  // Matches S2 Ep 3/4, "Season 2, Ep 3 of 4", Episode 3 etc
737  // cap1 = season, cap2 = ep, cap3 = total
738  static const QString longSeasEp = QString("\\(?(?:%1)?\\s*%2").arg(seasonStr, longEp);
739 
740  // Matches long seas/ep with surrounding parenthesis & trailing period
741  // cap1 = season, cap2 = ep, cap3 = total
742  static const QString longContext = QString(R"(\(*%1\s*\)?\s*\.?)").arg(longSeasEp);
743 
744  // Matches 3/4, 3 of 4
745  // cap1 = ep, cap2 = total
746  static const QString shortEp = R"((\d+)\s*(?:/|of)\s*(\d+))";
747 
748  // Matches short ep/total, ignoring Parts and idioms such as 9/11, 24/7 etc.
749  // ie. x/y in parenthesis or has no leading or trailing text in the sentence.
750  // cap0 may include previous/anchoring period
751  // cap1 = shortEp with surrounding parenthesis & trailing period (to remove)
752  // cap2 = ep, cap3 = total,
753  static const QString shortContext =
754  QString(R"((?:^|\.)(\s*\(*\s*%1[\s)]*(?:[).:]|$)))").arg(shortEp);
755 
756  // Prefer long format resorting to short format
757  // cap0 = long match to remove, cap1 = long season, cap2 = long ep, cap3 = long total,
758  // cap4 = short match to remove, cap5 = short ep, cap6 = short total
759  static const QRegularExpression ukSeries { "(?:" + longContext + "|" + shortContext + ")",
760  QRegularExpression::CaseInsensitiveOption };
761 
762  bool series = false;
763  bool fromTitle = true;
764  match = ukSeries.match(event.m_title);
765  if (!match.hasMatch())
766  {
767  fromTitle = false;
768  match = ukSeries.match(event.m_description);
769  }
770  if (match.hasMatch())
771  {
772  if (!match.captured(1).isEmpty())
773  {
774  event.m_season = match.captured(1).toUInt();
775  series = true;
776  }
777 
778  if (!match.captured(2).isEmpty())
779  {
780  event.m_episode = match.captured(2).toUInt();
781  series = true;
782  }
783  else if (!match.captured(5).isEmpty())
784  {
785  event.m_episode = match.captured(5).toUInt();
786  series = true;
787  }
788 
789  if (!match.captured(3).isEmpty())
790  {
791  event.m_totalepisodes = match.captured(3).toUInt();
792  series = true;
793  }
794  else if (!match.captured(6).isEmpty())
795  {
796  event.m_totalepisodes = match.captured(6).toUInt();
797  series = true;
798  }
799 
800  // Remove long or short match. Short text doesn't start at position2
801  int form = match.captured(4).isEmpty() ? 0 : 4;
802 
803  if (fromTitle)
804  {
805  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from title (%4) \"%5\"")
806  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
807  .arg(event.m_title, event.m_description));
808 
809  event.m_title.remove(match.capturedStart(form),
810  match.capturedLength(form));
811  }
812  else
813  {
814  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from description (%4) \"%5\"")
815  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
816  .arg(event.m_title, event.m_description));
817 
818  if (match.capturedStart(form) == 0)
819  {
820  // Remove from the start of the description.
821  // Otherwise it ends up in the subtitle.
822  event.m_description.remove(match.capturedStart(form),
823  match.capturedLength(form));
824  }
825  }
826  }
827 
828  if (isMovie)
829  event.m_categoryType = ProgramInfo::kCategoryMovie;
830  else if (series)
831  event.m_categoryType = ProgramInfo::kCategorySeries;
832 
833  // Multi-part episodes, or films (e.g. ITV film split by news)
834  // Matches Part 1, Pt 1/2, Part 1 of 2 etc.
835  static const QRegularExpression ukPart { R"([-(\:,.]\s*(?:Part|Pt)\s*(\d+)\s*(?:(?:of|/)\s*(\d+))?\s*[-):,.])",
836  QRegularExpression::CaseInsensitiveOption };
837  match = ukPart.match(event.m_title);
838  auto match2 = ukPart.match(event.m_description);
839  if (match.hasMatch())
840  {
841  event.m_partnumber = match.captured(1).toUInt();
842  event.m_parttotal = match.captured(2).toUInt();
843 
844  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from title (%3)")
845  .arg(event.m_partnumber).arg(event.m_parttotal).arg(event.m_title));
846 
847  // Remove from the title
848  event.m_title.remove(match.capturedStart(0),
849  match.capturedLength(0));
850  }
851  else if (match2.hasMatch())
852  {
853  event.m_partnumber = match2.captured(1).toUInt();
854  event.m_parttotal = match2.captured(2).toUInt();
855 
856  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from description (%3) \"%4\"")
857  .arg(event.m_partnumber).arg(event.m_parttotal)
858  .arg(event.m_title, event.m_description));
859 
860  // Remove from the start of the description.
861  // Otherwise it ends up in the subtitle.
862  if (match2.capturedStart(0) == 0)
863  {
864  // Retain a single colon (subtitle separator) if we remove any
865  QString sub = match2.captured(0).contains(":") ? ":" : "";
866  event.m_description = event.m_description.replace(match2.captured(0), sub);
867  }
868  }
869 
870  static const QRegularExpression ukStarring { R"((?:Western\s)?[Ss]tarring ([\w\s\-']+?)[Aa]nd\s([\w\s\-']+?)[\.|,]\s*(\d{4})?(?:\.\s)?)" };
871  match = ukStarring.match(event.m_description);
872  if (match.hasMatch())
873  {
874  // if we match this we've captured 2 actors and an (optional) airdate
875  /* Possible TODO: if EIT inlcude the priority and/or character
876  * names for the actors, include them in AddPerson call. */
877  event.AddPerson(DBPerson::kActor, match.captured(1));
878  event.AddPerson(DBPerson::kActor, match.captured(2));
879  if (match.captured(3).length() > 0)
880  {
881  bool ok = false;
882  uint y = match.captured(3).toUInt(&ok);
883  if (ok)
884  {
885  event.m_airdate = y;
886  event.m_originalairdate = QDate(y, 1, 1);
887  }
888  }
889  }
890 
891  static const QRegularExpression ukLaONoSplit { "^Law & Order: (?:Criminal Intent|LA|"
892  "Special Victims Unit|Trial by Jury|UK|You the Jury)" };
893  if (!event.m_title.startsWith("CSI:") && !event.m_title.startsWith("CD:") &&
894  !event.m_title.contains(ukLaONoSplit) &&
895  !event.m_title.startsWith("Mission: Impossible"))
896  {
897  static const QRegularExpression ukDoubleDotStart { R"(^\.\.+)" };
898  static const QRegularExpression ukDoubleDotEnd { R"(\.\.+$)" };
899  if ((event.m_title.indexOf(ukDoubleDotEnd) != -1) &&
900  (event.m_description.indexOf(ukDoubleDotStart) != -1))
901  {
902  QString strPart=event.m_title.remove(ukDoubleDotEnd)+" ";
903  strFull = strPart + event.m_description.remove(ukDoubleDotStart);
904  int position1 = -1;
905  static const QRegularExpression ukCEPQ { R"([:\!\.\?]\s)" };
906  static const QRegularExpression ukSpaceStart { "^ " };
907  if (isMovie &&
908  ((position1 = strFull.indexOf(ukCEPQ,strPart.length())) != -1))
909  {
910  if (strFull[position1] == '!' || strFull[position1] == '?'
911  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
912  position1++;
913  event.m_title = strFull.left(position1);
914  event.m_description = strFull.mid(position1 + 1);
915  event.m_description.remove(ukSpaceStart);
916  }
917  else if ((position1 = strFull.indexOf(ukCEPQ)) != -1)
918  {
919  if (strFull[position1] == '!' || strFull[position1] == '?'
920  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
921  position1++;
922  event.m_title = strFull.left(position1);
923  event.m_description = strFull.mid(position1 + 1);
924  event.m_description.remove(ukSpaceStart);
925  SetUKSubtitle(event);
926  }
927  }
928  else if (event.m_description.indexOf(uk24ep) != -1)
929  {
930  auto match24 = uk24ep.match(event.m_description);
931  if (match24.hasMatch())
932  {
933  // Special case for episodes of 24.
934  // -2 from the length cause we don't want ": " on the end
935  event.m_subtitle = event.m_description.mid(match24.capturedStart(0),
936  match24.captured(0).length() - 2);
937  event.m_description = event.m_description.remove(match24.captured(0));
938  }
939  }
940  else if (event.m_description.indexOf(ukTime) == -1)
941  {
942  static const QRegularExpression ukYearColon { R"(^[\d]{4}:)" };
943  if (!isMovie && (event.m_title.indexOf(ukYearColon) < 0))
944  {
945  int position1 = -1;
946  if (((position1 = event.m_title.indexOf(":")) != -1) &&
947  (event.m_description.indexOf(":") < 0 ))
948  {
949  static const QRegularExpression ukCompleteDots { R"(^\.\.+$)" };
950  if (event.m_title.mid(position1+1).indexOf(ukCompleteDots)==0)
951  {
952  SetUKSubtitle(event);
953  QString strTmp = event.m_title.mid(position1+1);
954  event.m_title.resize(position1);
955  event.m_subtitle = strTmp+event.m_subtitle;
956  }
957  else if ((uint)position1 < kSubtitleMaxLen)
958  {
959  event.m_subtitle = event.m_title.mid(position1 + 1);
960  event.m_title = event.m_title.left(position1);
961  }
962  }
963  else
964  SetUKSubtitle(event);
965  }
966  }
967  }
968 
969  if (!isMovie && event.m_subtitle.isEmpty() &&
970  !event.m_title.startsWith("The X-Files"))
971  {
972  int position1 = -1;
973  if ((position1=event.m_description.indexOf(ukTime)) != -1)
974  {
975  static const QRegularExpression ukColonPeriod { R"([:\.])" };
976  int position2 = event.m_description.indexOf(ukColonPeriod);
977  if ((position2>=0) && (position2 < (position1-2)))
978  SetUKSubtitle(event);
979  }
980  else if ((position1=event.m_title.indexOf("-")) != -1)
981  {
982  if ((uint)position1 < kSubtitleMaxLen)
983  {
984  event.m_subtitle = event.m_title.mid(position1 + 1);
985  event.m_subtitle.remove(kUKSpaceColonStart);
986  event.m_title = event.m_title.left(position1);
987  }
988  }
989  else
990  SetUKSubtitle(event);
991  }
992 
993  // Work out the year (if any)
994  static const QRegularExpression ukYear { R"([\[\(]([\d]{4})[\)\]])" };
995  match = ukYear.match(event.m_description);
996  if (match.hasMatch())
997  {
998  event.m_description.remove(match.capturedStart(0),
999  match.capturedLength(0));
1000  bool ok = false;
1001  uint y = match.captured(1).toUInt(&ok);
1002  if (ok)
1003  {
1004  event.m_airdate = y;
1005  event.m_originalairdate = QDate(y, 1, 1);
1006  }
1007  }
1008 
1009  // Trim leading/trailing '.'
1010  static const QRegularExpression ukDotSpaceStart { R"(^\. )" };
1011  static const QRegularExpression ukDotEnd { R"(\.$)" };
1012  event.m_subtitle.remove(ukDotSpaceStart);
1013  if (event.m_subtitle.lastIndexOf("..") != (event.m_subtitle.length()-2))
1014  event.m_subtitle.remove(ukDotEnd);
1015 
1016  // Reverse the subtitle and empty description
1017  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
1018  {
1019  event.m_description=event.m_subtitle;
1020  event.m_subtitle.clear();
1021  }
1022 }
1023 
1028 {
1029  /* Used for PBS ATSC Subtitles are separated by a colon */
1030  int position = event.m_description.indexOf(':');
1031  if (position != -1)
1032  {
1033  const QString stmp = event.m_description;
1034  event.m_subtitle = stmp.left(position);
1035  event.m_description = stmp.right(stmp.length() - position - 2);
1036  }
1037 }
1038 
1042 void EITFixUp::FixComHem(DBEventEIT &event, bool process_subtitle)
1043 {
1044  static const QRegularExpression comHemPersSeparator { R"((, |\soch\s))" };
1045 
1046  // Reverse what EITFixUp::Fix() did
1047  if (event.m_subtitle.isEmpty() && !event.m_description.isEmpty())
1048  {
1049  event.m_subtitle = event.m_description;
1050  event.m_description = "";
1051  }
1052 
1053  // Remove subtitle, it contains the category and we already know that
1054  event.m_subtitle = "";
1055 
1056  bool isSeries = false;
1057  // Try to find episode numbers
1058  static const QRegularExpression comHemSeries1
1059  { R"(\s?(?:[dD]el|[eE]pisode)\s([0-9]+)(?:\s?(?:/|:|av)\s?([0-9]+))?\.)" };
1060  static const QRegularExpression comHemSeries2 { R"(\s?-?\s?([Dd]el\s+([0-9]+)))" };
1061  auto match = comHemSeries1.match(event.m_description);
1062  auto match2 = comHemSeries2.match(event.m_title);
1063  if (match2.hasMatch())
1064  {
1065  event.m_partnumber = match2.capturedView(2).toUInt();
1066  event.m_title.remove(match2.capturedStart(), match2.capturedLength());
1067  }
1068  else if (match.hasMatch())
1069  {
1070  if (match.capturedStart(1) != -1)
1071  event.m_partnumber = match.capturedView(1).toUInt();
1072  if (match.capturedStart(2) != -1)
1073  event.m_parttotal = match.capturedView(2).toUInt();
1074 
1075  // Remove the episode numbers, but only if it's not at the begining
1076  // of the description (subtitle code might use it)
1077  if (match.capturedStart() > 0)
1078  event.m_description.remove(match.capturedStart(),
1079  match.capturedLength());
1080  isSeries = true;
1081  }
1082 
1083  // Add partnumber/parttotal to subtitle
1084  // This will be overwritten if we find a better subtitle
1085  if (event.m_partnumber > 0)
1086  {
1087  event.m_subtitle = QString("Del %1").arg(event.m_partnumber);
1088  if (event.m_parttotal > 0)
1089  event.m_subtitle += QString(" av %1").arg(event.m_parttotal);
1090  }
1091 
1092  // Move subtitle info from title to subtitle
1093  static const QRegularExpression comHemTSub { R"(\s+-\s+([^\-]+))" };
1094  match = comHemTSub.match(event.m_title);
1095  if (match.hasMatch())
1096  {
1097  event.m_subtitle = match.captured(1);
1098  event.m_title.remove(match.capturedStart(), match.capturedLength());
1099  }
1100 
1101  // No need to continue without a description.
1102  if (event.m_description.length() <= 0)
1103  return;
1104 
1105  // Try to find country category, year and possibly other information
1106  // from the begining of the description
1107  static const QRegularExpression comHemCountry
1108  { R"(^(\(.+\))?\s?([^ ]+)\s([^\.0-9]+)\sfrån\s([0-9]{4})(?:\smed\s([^\.]+))?\.?)" };
1109  match = comHemCountry.match(event.m_description);
1110  if (match.hasMatch())
1111  {
1112  QString replacement;
1113 
1114  // Original title, usually english title
1115  // note: list[1] contains extra () around the text that needs removing
1116  if (!match.capturedView(1).isEmpty())
1117  {
1118  replacement = match.captured(1) + " ";
1119  //store it somewhere?
1120  }
1121 
1122  // Countr(y|ies)
1123  if (!match.capturedView(2).isEmpty())
1124  {
1125  replacement += match.captured(2) + " ";
1126  //store it somewhere?
1127  }
1128 
1129  // Category
1130  if (!match.capturedView(3).isEmpty())
1131  {
1132  replacement += match.captured(3) + ".";
1133  if(event.m_category.isEmpty())
1134  {
1135  event.m_category = match.captured(3);
1136  }
1137 
1138  if(match.captured(3).indexOf("serie")!=-1)
1139  {
1140  isSeries = true;
1141  }
1142  }
1143 
1144  // Year
1145  if (!match.capturedView(4).isEmpty())
1146  {
1147  bool ok = false;
1148  uint y = match.capturedView(4).trimmed().toUInt(&ok);
1149  if (ok)
1150  event.m_airdate = y;
1151  }
1152 
1153  // Actors
1154  if (!match.capturedView(5).isEmpty())
1155  {
1156 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1157  const QStringList actors =
1158  match.captured(5).split(comHemPersSeparator, QString::SkipEmptyParts);
1159 #else
1160  const QStringList actors =
1161  match.captured(5).split(comHemPersSeparator, Qt::SkipEmptyParts);
1162 #endif
1163  /* Possible TODO: if EIT inlcude the priority and/or character
1164  * names for the actors, include them in AddPerson call. */
1165  for (const auto & actor : qAsConst(actors))
1166  event.AddPerson(DBPerson::kActor, actor);
1167  }
1168 
1169  // Remove year and actors.
1170  // The reason category is left in the description is because otherwise
1171  // the country would look wierd like "Amerikansk. Rest of description."
1172  event.m_description = event.m_description.replace(match.captured(0),replacement);
1173  }
1174 
1175  if (isSeries)
1176  event.m_categoryType = ProgramInfo::kCategorySeries;
1177 
1178  // Look for additional persons in the description
1179  static const QRegularExpression comHemPersons
1180  { R"(\s?([Rr]egi|[Ss]kådespelare|[Pp]rogramledare|[Ii] rollerna):\s([^\.]+)\.)" };
1181  auto iter = comHemPersons.globalMatch(event.m_description);
1182  while (iter.hasNext())
1183  {
1184  auto pmatch = iter.next();
1186 
1187  static const QRegularExpression comHemDirector { "[Rr]egi" };
1188  static const QRegularExpression comHemActor { "[Ss]kådespelare|[Ii] rollerna" };
1189  static const QRegularExpression comHemHost { "[Pp]rogramledare" };
1190  auto dmatch = comHemDirector.match(pmatch.capturedView(1));
1191  auto amatch = comHemActor.match(pmatch.capturedView(1));
1192  auto hmatch = comHemHost.match(pmatch.capturedView(1));
1193  if (dmatch.hasMatch())
1194  role = DBPerson::kDirector;
1195  else if (amatch.hasMatch())
1196  role = DBPerson::kActor;
1197  else if (hmatch.hasMatch())
1198  role = DBPerson::kHost;
1199  else
1200  {
1201  event.m_description.remove(pmatch.capturedStart(), pmatch.capturedLength());
1202  continue;
1203  }
1204 
1205 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1206  const QStringList actors =
1207  pmatch.captured(2).split(comHemPersSeparator, QString::SkipEmptyParts);
1208 #else
1209  const QStringList actors =
1210  pmatch.captured(2).split(comHemPersSeparator, Qt::SkipEmptyParts);
1211 #endif
1212  /* Possible TODO: if EIT inlcude the priority and/or character
1213  * names for the actors, include them in AddPerson call. */
1214  for (const auto & actor : qAsConst(actors))
1215  event.AddPerson(role, actor);
1216 
1217  // Remove it
1218  event.m_description=event.m_description.replace(pmatch.captured(0),"");
1219  }
1220 
1221  // Is this event on a channel we shoud look for a subtitle?
1222  // The subtitle is the first sentence in the description, but the
1223  // subtitle can't be the only thing in the description and it must be
1224  // shorter than 55 characters or we risk picking up the wrong thing.
1225  if (process_subtitle)
1226  {
1227  static const QRegularExpression comHemSub { R"([.\?\!] )" };
1228  int pos2 = event.m_description.indexOf(comHemSub);
1229  bool pvalid = pos2 != -1 && pos2 <= 55;
1230  if (pvalid && (event.m_description.length() - (pos2 + 2)) > 0)
1231  {
1232  event.m_subtitle = event.m_description.left(
1233  pos2 + (event.m_description[pos2] == '?' ? 1 : 0));
1234  event.m_description = event.m_description.mid(pos2 + 2);
1235  }
1236  }
1237 
1238  // Teletext subtitles?
1239  static const QRegularExpression comHemTT { "[Tt]ext-[Tt][Vv]" };
1240  if (event.m_description.indexOf(comHemTT) != -1)
1241  event.m_subtitleType |= SUB_NORMAL;
1242 
1243  // Try to findout if this is a rerun and if so the date.
1244  static const QRegularExpression comHemRerun1 { R"([Rr]epris\sfrån\s([^\.]+)(?:\.|$))" };
1245  static const QRegularExpression comHemRerun2 { R"(([0-9]+)/([0-9]+)(?:\s-\s([0-9]{4}))?)" };
1246  match = comHemRerun1.match(event.m_description);
1247  if (!match.hasMatch())
1248  return;
1249 
1250  // Rerun from today
1251  if (match.captured(1) == "i dag")
1252  {
1253  event.m_originalairdate = event.m_starttime.date();
1254  return;
1255  }
1256 
1257  // Rerun from yesterday afternoon
1258  if (match.captured(1) == "eftermiddagen")
1259  {
1260  event.m_originalairdate = event.m_starttime.date().addDays(-1);
1261  return;
1262  }
1263 
1264  // Rerun with day, month and possibly year specified
1265  match2 = comHemRerun2.match(match.capturedView(1));
1266  if (match2.hasMatch())
1267  {
1268  int day = match2.capturedView(1).toInt();
1269  int month = match2.capturedView(2).toInt();
1270  //int year;
1271  //if (match2.capturedLength(3) > 0)
1272  // year = match2.capturedView(3).toInt();
1273  //else
1274  // year = event.m_starttime.date().year();
1275 
1276  if (day > 0 && month > 0)
1277  {
1278  QDate date(event.m_starttime.date().year(), month, day);
1279  // it's a rerun so it must be in the past
1280  if (date > event.m_starttime.date())
1281  date = date.addYears(-1);
1282  event.m_originalairdate = date;
1283  }
1284  return;
1285  }
1286 }
1287 
1292 {
1293  event.m_category = event.m_subtitle;
1294  /* Used for DVB-S Subtitles are separated by a colon */
1295  int position = event.m_description.indexOf(':');
1296  if (position != -1)
1297  {
1298  const QString stmp = event.m_description;
1299  event.m_subtitle = stmp.left(position);
1300  event.m_description = stmp.right(stmp.length() - position - 2);
1301  }
1302 }
1303 
1308 {
1309  if (event.m_description.startsWith("[Program data ") || event.m_description.startsWith("[Program info "))//TEN
1310  {
1311  event.m_description = "";//event.m_subtitle;
1312  }
1313  if (event.m_description.endsWith("Copyright West TV Ltd. 2011)"))
1314  event.m_description.resize(event.m_description.length()-40);
1315 
1316  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())//due to ten's copyright info, this won't be caught before
1317  {
1318  event.m_description = event.m_subtitle;
1319  event.m_subtitle.clear();
1320  }
1321  if (event.m_description.startsWith(event.m_title+" - "))
1322  event.m_description.remove(0,event.m_title.length()+3);
1323  if (event.m_title.startsWith("LIVE: ", Qt::CaseInsensitive))
1324  {
1325  event.m_title.remove(0, 6);
1326  event.m_description.prepend("(Live) ");
1327  }
1328 }
1329 
1334 {
1335  static const QRegularExpression rating { "\\((G|PG|M|MA)\\)" };
1336  auto match = rating.match(event.m_description);
1337  if (match.hasMatch())
1338  {
1339  EventRating prograting;
1340  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1341  event.m_ratings.push_back(prograting);
1342  event.m_description.remove(0,match.capturedLength()+1);
1343  }
1344  if (event.m_description.startsWith("[HD]"))
1345  {
1346  event.m_videoProps |= VID_HDTV;
1347  event.m_description.remove(0,5);
1348  }
1349  if (event.m_description.startsWith("[CC]"))
1350  {
1351  event.m_subtitleType |= SUB_NORMAL;
1352  event.m_description.remove(0,5);
1353  }
1354  if (event.m_subtitle == "Movie")
1355  {
1356  event.m_subtitle.clear();
1357  event.m_categoryType = ProgramInfo::kCategoryMovie;
1358  }
1359  if (event.m_description.startsWith(event.m_title))
1360  event.m_description.remove(0,event.m_title.length()+1);
1361 }
1362 
1367 {
1368  if (event.m_description.endsWith(" Rpt"))
1369  {
1370  event.m_previouslyshown = true;
1371  event.m_description.resize(event.m_description.size()-4);
1372  }
1373  static const QRegularExpression year { "(\\d{4})$" };
1374  auto match = year.match(event.m_description);
1375  if (match.hasMatch())
1376  {
1377  event.m_airdate = match.capturedView(1).toUInt();
1378  event.m_description.resize(event.m_description.size()-5);
1379  }
1380  if (event.m_description.endsWith(" CC"))
1381  {
1382  event.m_subtitleType |= SUB_NORMAL;
1383  event.m_description.resize(event.m_description.size()-3);
1384  }
1385  QString advisories;//store the advisories to append later
1386  static const QRegularExpression adv { "(\\([A-Z,]+\\))$" };
1387  match = adv.match(event.m_description);
1388  if (match.hasMatch())
1389  {
1390  advisories = match.captured(1);
1391  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1392  }
1393  static const QRegularExpression rating { "(C|G|PG|M|MA)$" };
1394  match = rating.match(event.m_description);
1395  if (match.hasMatch())
1396  {
1397  EventRating prograting;
1398  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1399  if (!advisories.isEmpty())
1400  prograting.m_rating.append(" ").append(advisories);
1401  event.m_ratings.push_back(prograting);
1402  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1403  }
1404 }
1409 {
1410  // If the description has been truncated to fit within the
1411  // 'subtitle' eit field, none of the following will work (ABC)
1412  if (event.m_description.endsWith(".."))
1413  return;
1414  event.m_description = event.m_description.trimmed();
1415 
1416  static const QRegularExpression auFreeviewSY { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\)$)" };
1417  auto match = auFreeviewSY.match(event.m_description);
1418  if (match.hasMatch())
1419  {
1420  if (event.m_subtitle.isEmpty())//nine sometimes has an actual subtitle field and the brackets thingo)
1421  event.m_subtitle = match.captured(2);
1422  event.m_airdate = match.capturedView(3).toUInt();
1423  event.m_description = match.captured(1);
1424  return;
1425  }
1426  static const QRegularExpression auFreeviewY { "(.*) \\(([12][0-9][0-9][0-9])\\)$" };
1427  match = auFreeviewY.match(event.m_description);
1428  if (match.hasMatch())
1429  {
1430  event.m_airdate = match.capturedView(2).toUInt();
1431  event.m_description = match.captured(1);
1432  return;
1433  }
1434  static const QRegularExpression auFreeviewSYC { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1435  match = auFreeviewSYC.match(event.m_description);
1436  if (match.hasMatch())
1437  {
1438  if (event.m_subtitle.isEmpty())
1439  event.m_subtitle = match.captured(2);
1440  event.m_airdate = match.capturedView(3).toUInt();
1441  QStringList actors = match.captured(4).split("/");
1442  /* Possible TODO: if EIT inlcude the priority and/or character
1443  * names for the actors, include them in AddPerson call. */
1444  for (const QString& actor : qAsConst(actors))
1445  event.AddPerson(DBPerson::kActor, actor);
1446  event.m_description = match.captured(1);
1447  return;
1448  }
1449  static const QRegularExpression auFreeviewYC { R"((.*) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1450  match = auFreeviewYC.match(event.m_description);
1451  if (match.hasMatch())
1452  {
1453  event.m_airdate = match.capturedView(2).toUInt();
1454  QStringList actors = match.captured(3).split("/");
1455  /* Possible TODO: if EIT inlcude the priority and/or character
1456  * names for the actors, include them in AddPerson call. */
1457  for (const QString& actor : qAsConst(actors))
1458  event.AddPerson(DBPerson::kActor, actor);
1459  event.m_description = match.captured(1);
1460  }
1461 }
1462 
1467 {
1468  const uint SUBTITLE_PCT = 60; // % of description to allow subtitle to
1469  const uint lSUBTITLE_MAX_LEN = 128;// max length of subtitle field in db.
1470 
1471  // Remove subtitle, it contains category information too specific to use
1472  event.m_subtitle = QString("");
1473 
1474  // No need to continue without a description.
1475  if (event.m_description.length() <= 0)
1476  return;
1477 
1478  // Replace incomplete title if the full one is in the description
1479  static const QRegularExpression mcaIncompleteTitle { R"((.*).\.\.\.$)" };
1480  auto match = mcaIncompleteTitle.match(event.m_title);
1481  if (match.hasMatch())
1482  {
1483  static const QString mcaCompleteTitlea { "^'?(" };
1484  static const QString mcaCompleteTitleb { R"([^\.\?]+[^\'])'?[\.\?]\s+(.+))" };
1485  static const QRegularExpression mcaCompleteTitle
1486  { mcaCompleteTitlea + match.captured(1) + mcaCompleteTitleb,
1487  QRegularExpression::CaseInsensitiveOption};
1488  match = mcaCompleteTitle.match(event.m_description);
1489  if (match.hasMatch())
1490  {
1491  event.m_title = match.captured(1).trimmed();
1492  event.m_description = match.captured(2).trimmed();
1493  }
1494  }
1495 
1496  // Try to find subtitle in description
1497  static const QRegularExpression mcaSubtitle { R"(^'([^\.]+)'\.\s+(.+))" };
1498  match = mcaSubtitle.match(event.m_description);
1499  if (match.hasMatch())
1500  {
1501  uint matchLen = match.capturedLength(1);
1502  uint evDescLen = std::max(static_cast<int>(event.m_description.length()), 1);
1503 
1504  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1505  ((matchLen * 100 / evDescLen) < SUBTITLE_PCT))
1506  {
1507  event.m_subtitle = match.captured(1);
1508  event.m_description = match.captured(2);
1509  }
1510  }
1511 
1512  // Try to find episode numbers in subtitle
1513  static const QRegularExpression mcaSeries { R"(^S?(\d+)\/E?(\d+)\s-\s(.*)$)" };
1514  match = mcaSeries.match(event.m_subtitle);
1515  if (match.hasMatch())
1516  {
1517  uint season = match.capturedView(1).toUInt();
1518  uint episode = match.capturedView(2).toUInt();
1519  event.m_subtitle = match.captured(3).trimmed();
1520  event.m_syndicatedepisodenumber =
1521  QString("S%1E%2").arg(season).arg(episode);
1522  event.m_season = season;
1523  event.m_episode = episode;
1524  event.m_categoryType = ProgramInfo::kCategorySeries;
1525  }
1526 
1527  // Closed captioned?
1528  static const QRegularExpression mcaCC { R"(,?\s(HI|English) Subtitles\.?)" };
1529  int position = event.m_description.indexOf(mcaCC);
1530  if (position > 0)
1531  {
1532  event.m_subtitleType |= SUB_HARDHEAR;
1533  event.m_description.remove(mcaCC);
1534  }
1535 
1536  // Dolby Digital 5.1?
1537  static const QRegularExpression mcaDD { R"(,?\sDD\.?)" };
1538  position = event.m_description.indexOf(mcaDD);
1539  if ((position > 0) && (position > event.m_description.length() - 7))
1540  {
1541  event.m_audioProps |= AUD_DOLBY;
1542  event.m_description.remove(mcaDD);
1543  }
1544 
1545  // Remove bouquet tags
1546  static const QRegularExpression mcaAvail { R"(\s(Only available on [^\.]*bouquet|Not available in RSA [^\.]*)\.?)" };
1547  event.m_description.remove(mcaAvail);
1548 
1549  // Try to find year and director from the end of the description
1550  bool isMovie = false;
1551  static const QRegularExpression mcaCredits { R"((.*)\s\((\d{4})\)\s*([^\.]+)\.?\s*$)" };
1552  match = mcaCredits.match(event.m_description);
1553  if (match.hasMatch())
1554  {
1555  isMovie = true;
1556  event.m_description = match.captured(1).trimmed();
1557  bool ok = false;
1558  uint y = match.captured(2).trimmed().toUInt(&ok);
1559  if (ok)
1560  event.m_airdate = y;
1561  event.AddPerson(DBPerson::kDirector, match.captured(3).trimmed());
1562  }
1563  else
1564  {
1565  // Try to find year only from the end of the description
1566  static const QRegularExpression mcaYear { R"((.*)\s\((\d{4})\)\s*$)" };
1567  match = mcaYear.match(event.m_description);
1568  if (match.hasMatch())
1569  {
1570  isMovie = true;
1571  event.m_description = match.captured(1).trimmed();
1572  bool ok = false;
1573  uint y = match.captured(2).trimmed().toUInt(&ok);
1574  if (ok)
1575  event.m_airdate = y;
1576  }
1577  }
1578 
1579  if (isMovie)
1580  {
1581  static const QRegularExpression mcaActors { R"((.*\.)\s+([^\.]+\s[A-Z][^\.]+)\.\s*)" };
1582  match = mcaActors.match(event.m_description);
1583  if (match.hasMatch())
1584  {
1585  static const QRegularExpression mcaActorsSeparator { "(,\\s+)" };
1586 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1587  const QStringList actors = match.captured(2).split(
1588  mcaActorsSeparator, QString::SkipEmptyParts);
1589 #else
1590  const QStringList actors = match.captured(2).split(
1591  mcaActorsSeparator, Qt::SkipEmptyParts);
1592 #endif
1593  /* Possible TODO: if EIT inlcude the priority and/or character
1594  * names for the actors, include them in AddPerson call. */
1595  for (const auto & actor : qAsConst(actors))
1596  event.AddPerson(DBPerson::kActor, actor.trimmed());
1597  event.m_description = match.captured(1).trimmed();
1598  }
1599  event.m_categoryType = ProgramInfo::kCategoryMovie;
1600  }
1601 }
1602 
1607 {
1608  // subtitle with episode number: "Folge *: 'subtitle'
1609  static const QRegularExpression superRTLSubtitle { R"(^Folge\s(\d{1,3}):\s'(.*)')" };
1610  auto match = superRTLSubtitle.match(event.m_subtitle);
1611  if (match.hasMatch())
1612  {
1613  event.m_season = 0;
1614  event.m_episode = match.capturedView(1).toUInt();
1615  event.m_subtitle = match.captured(2);
1616  }
1617 
1618  // No need to continue without a description or with an subtitle.
1619  if (event.m_description.length() <= 0 || event.m_subtitle.length() > 0)
1620  return;
1621 
1622  // Repeat
1623  static const QRegularExpression rtlRepeat
1624  { R"([\s\(]?Wiederholung.+vo[m|n].+(\d{2}\.\d{2}\.\d{4}|\d{2}[:\.]\d{2}\sUhr)\)?)" };
1625  match = rtlRepeat.match(event.m_description);
1626  if (match.hasMatch())
1627  {
1628  // remove '.' if it matches at the beginning of the description
1629  int pos = match.capturedStart(0);
1630  int length = match.capturedLength(0) + (pos ? 0 : 1);
1631  event.m_description = event.m_description.remove(pos, length).trimmed();
1632  }
1633 
1634  // should be (?:\x{8a}|\\.\\s*|$) but 0x8A gets replaced with 0x20
1635  static const QRegularExpression rtlSubtitle1 { R"(^Folge\s(\d{1,4})\s*:\s+'(.*)'(?:\s|\.\s*|$))" };
1636  static const QRegularExpression rtlSubtitle2 { R"(^Folge\s(\d{1,4})\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1637  static const QRegularExpression rtlSubtitle3 { R"(^(?:Folge\s)?(\d{1,4}(?:\/[IVX]+)?)\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1638  static const QRegularExpression rtlSubtitle4 { R"(^Thema.{0,5}:\s([^\.]+)\.\s*)" };
1639  static const QRegularExpression rtlSubtitle5 { "^'(.+)'\\.\\s*" };
1640  static const QRegularExpression rtlEpisodeNo1 { R"(^(Folge\s\d{1,4})\.*\s*)" };
1641  static const QRegularExpression rtlEpisodeNo2 { R"(^(\d{1,2}\/[IVX]+)\.*\s*)" };
1642 
1643  auto match1 = rtlSubtitle1.match(event.m_description);
1644  auto match2 = rtlSubtitle2.match(event.m_description);
1645  auto match3 = rtlSubtitle3.match(event.m_description);
1646  auto match4 = rtlSubtitle4.match(event.m_description);
1647  auto match5 = rtlSubtitle5.match(event.m_description);
1648  auto match6 = rtlEpisodeNo1.match(event.m_description);
1649  auto match7 = rtlEpisodeNo2.match(event.m_description);
1650 
1651  // subtitle with episode number: "Folge *: 'subtitle'. description
1652  if (match1.hasMatch())
1653  {
1654  event.m_syndicatedepisodenumber = match1.captured(1);
1655  event.m_subtitle = match1.captured(2);
1656  event.m_description =
1657  event.m_description.remove(0, match1.capturedLength());
1658  }
1659  // episode number subtitle
1660  else if (match2.hasMatch())
1661  {
1662  event.m_syndicatedepisodenumber = match2.captured(1);
1663  event.m_subtitle = match2.captured(2);
1664  event.m_description =
1665  event.m_description.remove(0, match2.capturedLength());
1666  }
1667  // episode number subtitle
1668  else if (match3.hasMatch())
1669  {
1670  event.m_syndicatedepisodenumber = match3.captured(1);
1671  event.m_subtitle = match3.captured(2);
1672  event.m_description =
1673  event.m_description.remove(0, match3.capturedLength());
1674  }
1675  // "Thema..."
1676  else if (match4.hasMatch())
1677  {
1678  event.m_subtitle = match4.captured(1);
1679  event.m_description =
1680  event.m_description.remove(0, match4.capturedLength());
1681  }
1682  // "'...'"
1683  else if (match5.hasMatch())
1684  {
1685  event.m_subtitle = match5.captured(1);
1686  event.m_description =
1687  event.m_description.remove(0, match5.capturedLength());
1688  }
1689  // episode number
1690  else if (match6.hasMatch())
1691  {
1692  event.m_syndicatedepisodenumber = match6.captured(2);
1693  event.m_subtitle = match6.captured(1);
1694  event.m_description =
1695  event.m_description.remove(0, match6.capturedLength());
1696  }
1697  // episode number
1698  else if (match7.hasMatch())
1699  {
1700  event.m_syndicatedepisodenumber = match7.captured(2);
1701  event.m_subtitle = match7.captured(1);
1702  event.m_description =
1703  event.m_description.remove(0, match7.capturedLength());
1704  }
1705 
1706  /* got an episode title now? (we did not have one at the start of this function) */
1707  if (!event.m_subtitle.isEmpty())
1709 
1710  /* if we do not have an episode title by now try some guessing as last resort */
1711  if (event.m_subtitle.length() == 0)
1712  {
1713  const uint SUBTITLE_PCT = 35; // % of description to allow subtitle up to
1714  const uint lSUBTITLE_MAX_LEN = 50; // max length of subtitle field in db
1715 
1716  static const QRegularExpression rtlSubtitle { R"(^([^\.]{3,})\.\s+(.+))" };
1717  match = rtlSubtitle.match(event.m_description);
1718  if (match.hasMatch())
1719  {
1720  uint matchLen = match.capturedLength(1);
1721  uint evDescLen = std::max(static_cast<int>(event.m_description.length()), 1);
1722 
1723  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1724  (matchLen * 100 / evDescLen < SUBTITLE_PCT))
1725  {
1726  event.m_subtitle = match.captured(1);
1727  event.m_description = match.captured(2);
1728  }
1729  }
1730  }
1731 }
1732 
1733 // FIXME add more jobs
1734 static const QMap<QString,DBPerson::Role> deCrewTitle {
1735  { "Regie", DBPerson::kDirector },
1736  { "Drehbuch", DBPerson::kWriter },
1737  { "Autor", DBPerson::kWriter },
1738 };
1739 
1744 {
1745  static const QRegularExpression pro7Subtitle { R"(,{0,1}([^,]*?),([^,]+?)\s{0,1}(\d{4})$)" };
1746  auto match = pro7Subtitle.match(event.m_subtitle);
1747  if (match.hasMatch())
1748  {
1749  if (event.m_airdate == 0)
1750  {
1751  event.m_airdate = match.captured(3).toUInt();
1752  }
1753  event.m_subtitle.remove(match.capturedStart(0),
1754  match.capturedLength(0));
1755  }
1756 
1757  /* handle cast, the very last in description */
1758  static const QRegularExpression pro7Cast { "\n\nDarsteller:\n(.*)$",
1759  QRegularExpression::DotMatchesEverythingOption };
1760  match = pro7Cast.match(event.m_description);
1761  if (match.hasMatch())
1762  {
1763  QStringList cast = match.captured(1).split("\n");
1764  for (const auto& line : qAsConst(cast))
1765  {
1766  static const QRegularExpression pro7CastOne { R"(^([^\(]*?)\((.*)\)$)" };
1767  auto match2 = pro7CastOne.match(line);
1768  if (match2.hasMatch())
1769  {
1770  /* Possible TODO: if EIT inlcude the priority and/or character
1771  * names for the actors, include them in AddPerson call. */
1772  event.AddPerson (DBPerson::kActor, match2.captured(1).simplified());
1773  }
1774  }
1775  event.m_description.remove(match.capturedStart(0),
1776  match.capturedLength(0));
1777  }
1778 
1779  /* handle crew, the new very last in description
1780  * format: "Role: Name" or "Role: Name1, Name2"
1781  */
1782  static const QRegularExpression pro7Crew { "\n\n(Regie:.*)$",
1783  QRegularExpression::DotMatchesEverythingOption };
1784  match = pro7Crew.match(event.m_description);
1785  if (match.hasMatch())
1786  {
1787  QStringList crew = match.captured(1).split("\n");
1788  for (const auto& line : qAsConst(crew))
1789  {
1790  static const QRegularExpression pro7CrewOne { R"(^(.*?):\s+(.*)$)" };
1791  auto match2 = pro7CrewOne.match(line);
1792  if (match2.hasMatch())
1793  {
1795  if (deCrewTitle.contains(match2.captured(1)))
1796  role = deCrewTitle[match2.captured(1)];
1797  QStringList names = match2.captured(2).simplified().split(R"(\s*,\s*)");
1798  for (const auto & name : qAsConst(names))
1799  {
1800  /* Possible TODO: if EIT inlcude the priority
1801  * and/or character names for the actors, include
1802  * them in AddPerson call. */
1803  event.AddPerson (role, name);
1804  }
1805  }
1806  }
1807  event.m_description.remove(match.capturedStart(0),
1808  match.capturedLength(0));
1809  }
1810 
1811  /* FIXME unless its Jamie Oliver, then there is neither Crew nor Cast only
1812  * \n\nKoch: Jamie Oliver
1813  */
1814 }
1815 
1820 {
1821  static const QRegularExpression deDisneyChannelSubtitle { R"(,([^,]+?)\s{0,1}(\d{4})$)" };
1822  auto match = deDisneyChannelSubtitle.match(event.m_subtitle);
1823  if (match.hasMatch())
1824  {
1825  if (event.m_airdate == 0)
1826  {
1827  event.m_airdate = match.captured(3).toUInt();
1828  }
1829  event.m_subtitle.remove(match.capturedStart(0),
1830  match.capturedLength(0));
1831  }
1832  static const QRegularExpression tmp { R"(\s[^\s]+?-(Serie))" };
1833  match = tmp.match(event.m_subtitle);
1834  if (match.hasMatch())
1835  {
1836  event.m_categoryType = ProgramInfo::kCategorySeries;
1837  event.m_category=match.captured(0).trimmed();
1838  event.m_subtitle.remove(match.capturedStart(0),
1839  match.capturedLength(0));
1840  }
1841 }
1842 
1847 {
1848  static const QRegularExpression atvSubtitle { R"(,{0,1}\sFolge\s(\d{1,3})$)" };
1849  event.m_subtitle.replace(atvSubtitle, "");
1850 }
1851 
1852 
1857 {
1858  static const QRegularExpression fiRerun { R"(\s?Uusinta[a-zA-Z\s]*\.?)" };
1859  auto match = fiRerun.match(event.m_description);
1860  if (match.hasMatch())
1861  {
1862  event.m_previouslyshown = true;
1863  event.m_description.remove(match.capturedStart(), match.capturedLength());
1864  }
1865 
1866  static const QRegularExpression fiRerun2 { R"(\([Uu]\))" };
1867  match = fiRerun2.match(event.m_description);
1868  if (match.hasMatch())
1869  {
1870  event.m_previouslyshown = true;
1871  event.m_description.remove(match.capturedStart(), match.capturedLength());
1872  }
1873 
1874  // Check for (Stereo) in the decription and set the <audio> tags
1875  match = kStereo.match(event.m_description);
1876  if (match.hasMatch())
1877  {
1878  event.m_audioProps |= AUD_STEREO;
1879  event.m_description.remove(match.capturedStart(), match.capturedLength());
1880  }
1881 
1882  // Remove age limit in parenthesis at end of title
1883  static const QRegularExpression fiAgeLimit { R"(\((\d{1,2}|[ST])\)$)" };
1884  match = fiAgeLimit.match(event.m_title);
1885  if (match.hasMatch())
1886  {
1887  EventRating prograting;
1888  prograting.m_system="FI"; prograting.m_rating = match.captured(1);
1889  event.m_ratings.push_back(prograting);
1890  event.m_title.remove(match.capturedStart(), match.capturedLength());
1891  }
1892 
1893  // Remove Film or Elokuva at start of title
1894  static const QRegularExpression fiFilm { "^(Film|Elokuva): " };
1895  match = fiFilm.match(event.m_title);
1896  if (match.hasMatch())
1897  {
1898  event.m_category = "Film";
1899  event.m_categoryType = ProgramInfo::kCategoryMovie;
1900  event.m_title.remove(match.capturedStart(), match.capturedLength());
1901  }
1902 }
1903 
1909 {
1910  QString country = "";
1911 
1912  static const QRegularExpression dePremiereLength { R"(\s?[0-9]+\sMin\.)" };
1913  event.m_description = event.m_description.replace(dePremiereLength, "");
1914 
1915  static const QRegularExpression dePremiereAirdate { R"(\s?([^\s^\.]+)\s((?:1|2)[0-9]{3})\.)" };
1916  auto match = dePremiereAirdate.match(event.m_description);
1917  if ( match.hasMatch())
1918  {
1919  country = match.captured(1).trimmed();
1920  bool ok = false;
1921  uint y = match.captured(2).toUInt(&ok);
1922  if (ok)
1923  event.m_airdate = y;
1924  event.m_description.remove(match.capturedStart(0),
1925  match.capturedLength(0));
1926  }
1927 
1928  static const QRegularExpression dePremiereCredits { R"(\sVon\s([^,]+)(?:,|\su\.\sa\.)\smit\s([^\.]*)\.)" };
1929  match = dePremiereCredits.match(event.m_description);
1930  if (match.hasMatch())
1931  {
1932  event.AddPerson(DBPerson::kDirector, match.captured(1));
1933 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1934  const QStringList actors = match.captured(2).split(
1935  ", ", QString::SkipEmptyParts);
1936 #else
1937  const QStringList actors = match.captured(2).split(
1938  ", ", Qt::SkipEmptyParts);
1939 #endif
1940  /* Possible TODO: if EIT inlcude the priority and/or character
1941  * names for the actors, include them in AddPerson call. */
1942  for (const auto & actor : qAsConst(actors))
1943  event.AddPerson(DBPerson::kActor, actor);
1944  event.m_description.remove(match.capturedStart(0),
1945  match.capturedLength(0));
1946  }
1947 
1948  event.m_description = event.m_description.replace("\u000A$", "");
1949  event.m_description = event.m_description.replace("\u000A", " ");
1950 
1951  // move the original titel from the title to subtitle
1952  static const QRegularExpression dePremiereOTitle { R"(\s*\(([^\)]*)\)$)" };
1953  match = dePremiereOTitle.match(event.m_title);
1954  if (match.hasMatch())
1955  {
1956  event.m_subtitle = QString("%1, %2").arg(match.captured(1), country);
1957  event.m_title.remove(match.capturedStart(0),
1958  match.capturedLength(0));
1959  }
1960 
1961  // Find infos about season and episode number
1962  static const QRegularExpression deSkyDescriptionSeasonEpisode { R"(^(\d{1,2}).\sStaffel,\sFolge\s(\d{1,2}):\s)" };
1963  match = deSkyDescriptionSeasonEpisode.match(event.m_description);
1964  if (match.hasMatch())
1965  {
1966  event.m_season = match.captured(1).trimmed().toUInt();
1967  event.m_episode = match.captured(2).trimmed().toUInt();
1968  event.m_description.remove(match.capturedStart(0),
1969  match.capturedLength(0));
1970  }
1971 }
1972 
1973 /*
1974  * Mapping table from English category names to Dutch names and types
1975  */
1976 struct NLMapResult {
1977  QString name;
1979 };
1980 static const QMap<QString, NLMapResult> categoryTrans = {
1981  { "Documentary", { "Documentaire", ProgramInfo::kCategoryNone } },
1982  { "News", { "Nieuws/actualiteiten", ProgramInfo::kCategoryNone } },
1983  { "Kids", { "Jeugd", ProgramInfo::kCategoryNone } },
1984  { "Show/game Show", { "Amusement", ProgramInfo::kCategoryTVShow } },
1985  { "Music/Ballet/Dance", { "Muziek", ProgramInfo::kCategoryNone } },
1986  { "News magazine", { "Informatief", ProgramInfo::kCategoryNone } },
1987  { "Movie", { "Film", ProgramInfo::kCategoryMovie } },
1988  { "Nature/animals/Environment", { "Natuur", ProgramInfo::kCategoryNone } },
1989  { "Movie - Adult", { "Erotiek", ProgramInfo::kCategoryNone } },
1990  { "Movie - Soap/melodrama/folkloric",
1991  { "Serie/soap", ProgramInfo::kCategorySeries } },
1992  { "Arts/Culture", { "Kunst/Cultuur", ProgramInfo::kCategoryNone } },
1993  { "Sports", { "Sport", ProgramInfo::kCategorySports } },
1994  { "Cartoons/Puppets", { "Animatie", ProgramInfo::kCategoryNone } },
1995  { "Movie - Comedy", { "Comedy", ProgramInfo::kCategorySeries } },
1996  { "Movie - Detective/Thriller", { "Misdaad", ProgramInfo::kCategoryNone } },
1997  { "Social/Spiritual Sciences", { "Religieus", ProgramInfo::kCategoryNone } },
1998 };
1999 
2004 {
2005  QString fullinfo = event.m_subtitle + event.m_description;
2006  event.m_subtitle = "";
2007 
2008  // Convert categories to Dutch categories Myth knows.
2009  // nog invoegen: comedy, sport, misdaad
2010 
2011  if (categoryTrans.contains(event.m_category))
2012  {
2013  auto [name, type] = categoryTrans[event.m_category];
2014  event.m_category = name;
2015  event.m_categoryType = type;
2016  }
2017 
2018  // Film - categories are usually not Films
2019  if (event.m_category.startsWith("Film -"))
2020  event.m_categoryType = ProgramInfo::kCategorySeries;
2021 
2022  // Get stereo info
2023  auto match = kStereo.match(fullinfo);
2024  if (match.hasMatch())
2025  {
2026  event.m_audioProps |= AUD_STEREO;
2027  fullinfo.remove(match.capturedStart(), match.capturedLength());
2028  }
2029 
2030  //Get widescreen info
2031  static const QRegularExpression nlWide { "breedbeeld" };
2032  match = nlWide.match(fullinfo);
2033  if (match.hasMatch())
2034  {
2035  event.m_videoProps |= VID_WIDESCREEN;
2036  fullinfo = fullinfo.replace("breedbeeld", ".");
2037  }
2038 
2039  // Get repeat info
2040  static const QRegularExpression nlRepeat { "herh." };
2041  match = nlRepeat.match(fullinfo);
2042  if (match.hasMatch())
2043  fullinfo = fullinfo.replace("herh.", ".");
2044 
2045  // Get teletext subtitle info
2046  static const QRegularExpression nlTxt { "txt" };
2047  match = nlTxt.match(fullinfo);
2048  if (match.hasMatch())
2049  {
2050  event.m_subtitleType |= SUB_NORMAL;
2051  fullinfo = fullinfo.replace("txt", ".");
2052  }
2053 
2054  // Get HDTV information
2055  static const QRegularExpression nlHD { R"(\sHD$)" };
2056  match = nlHD.match(event.m_title);
2057  if (match.hasMatch())
2058  {
2059  event.m_videoProps |= VID_HDTV;
2060  event.m_title.remove(match.capturedStart(), match.capturedLength());
2061  }
2062 
2063  // Try to make subtitle from Afl.:
2064  static const QRegularExpression nlSub { R"(\sAfl\.:\s([^\.]+)\.)" };
2065  match = nlSub.match(fullinfo);
2066  if (match.hasMatch())
2067  {
2068  QString tmpSubString = match.captured(0);
2069  tmpSubString = tmpSubString.right(match.capturedLength() - 7);
2070  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2071  fullinfo.remove(match.capturedStart(), match.capturedLength());
2072  }
2073 
2074  // Try to make subtitle from " "
2075  static const QRegularExpression nlSub2 { R"(\s\"([^\"]+)\")" };
2076  match = nlSub2.match(fullinfo);
2077  if (match.hasMatch())
2078  {
2079  QString tmpSubString = match.captured(0);
2080  tmpSubString = tmpSubString.right(match.capturedLength() - 2);
2081  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2082  fullinfo.remove(match.capturedStart(), match.capturedLength());
2083  }
2084 
2085 
2086  // This is trying to catch the case where the subtitle is in the main title
2087  // but avoid cases where it isn't a subtitle e.g cd:uk
2088  int position = 0;
2089  if (((position = event.m_title.indexOf(":")) != -1) &&
2090  (event.m_title[position + 1].toUpper() == event.m_title[position + 1]) &&
2091  (event.m_subtitle.isEmpty()))
2092  {
2093  event.m_subtitle = event.m_title.mid(position + 1);
2094  event.m_title = event.m_title.left(position);
2095  }
2096 
2097 
2098  // Get the actors
2099  static const QRegularExpression nlActors { R"(\sMet:\s.+e\.a\.)" };
2100  static const QRegularExpression nlPersSeparator { R"((, |\sen\s))" };
2101  match = nlActors.match(fullinfo);
2102  if (match.hasMatch())
2103  {
2104  QString tmpActorsString = match.captured(0);
2105  tmpActorsString = tmpActorsString.right(tmpActorsString.length() - 6);
2106  tmpActorsString = tmpActorsString.left(tmpActorsString.length() - 5);
2107 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2108  const QStringList actors =
2109  tmpActorsString.split(nlPersSeparator, QString::SkipEmptyParts);
2110 #else
2111  const QStringList actors =
2112  tmpActorsString.split(nlPersSeparator, Qt::SkipEmptyParts);
2113 #endif
2114  /* Possible TODO: if EIT inlcude the priority and/or character
2115  * names for the actors, include them in AddPerson call. */
2116  for (const auto & actor : qAsConst(actors))
2117  event.AddPerson(DBPerson::kActor, actor);
2118  fullinfo.remove(match.capturedStart(), match.capturedLength());
2119  }
2120 
2121  // Try to find presenter
2122  static const QRegularExpression nlPres { R"(\sPresentatie:\s([^\.]+)\.)" };
2123  match = nlPres.match(fullinfo);
2124  if (match.hasMatch())
2125  {
2126  QString tmpPresString = match.captured(0);
2127  tmpPresString = tmpPresString.right(tmpPresString.length() - 14);
2128  tmpPresString = tmpPresString.left(tmpPresString.length() -1);
2129 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2130  const QStringList presenters =
2131  tmpPresString.split(nlPersSeparator, QString::SkipEmptyParts);
2132 #else
2133  const QStringList presenters =
2134  tmpPresString.split(nlPersSeparator, Qt::SkipEmptyParts);
2135 #endif
2136  for (const auto & presenter : qAsConst(presenters))
2137  event.AddPerson(DBPerson::kPresenter, presenter);
2138  fullinfo.remove(match.capturedStart(), match.capturedLength());
2139  }
2140 
2141  // Try to find year
2142  static const QRegularExpression nlYear1 { R"(\suit\s([1-2][0-9]{3}))" };
2143  static const QRegularExpression nlYear2 { R"((\s\([A-Z]{0,3}/?)([1-2][0-9]{3})\))",
2144  QRegularExpression::CaseInsensitiveOption };
2145  match = nlYear1.match(fullinfo);
2146  if (match.hasMatch())
2147  {
2148  bool ok = false;
2149  uint y = match.capturedView(1).toUInt(&ok);
2150  if (ok)
2151  event.m_originalairdate = QDate(y, 1, 1);
2152  }
2153 
2154  match = nlYear2.match(fullinfo);
2155  if (match.hasMatch())
2156  {
2157  bool ok = false;
2158  uint y = match.capturedView(2).toUInt(&ok);
2159  if (ok)
2160  event.m_originalairdate = QDate(y, 1, 1);
2161  }
2162 
2163  // Try to find director
2164  static const QRegularExpression nlDirector { R"(\svan\s(([A-Z][a-z]+\s)|([A-Z]\.\s)))" };
2165  match = nlDirector.match(fullinfo);
2166  if (match.hasMatch())
2167  event.AddPerson(DBPerson::kDirector, match.captured(1));
2168 
2169  // Strip leftovers
2170  static const QRegularExpression nlRub { R"(\s?\(\W+\)\s?)" };
2171  fullinfo.remove(nlRub);
2172 
2173  // Strip category info from description
2174  static const QRegularExpression nlCat { "^(Amusement|Muziek|Informatief|Nieuws/actualiteiten|Jeugd|Animatie|Sport|Serie/soap|Kunst/Cultuur|Documentaire|Film|Natuur|Erotiek|Comedy|Misdaad|Religieus)\\.\\s" };
2175  fullinfo.remove(nlCat);
2176 
2177  // Remove omroep from title
2178  static const QRegularExpression nlOmroep { R"(\s\(([A-Z]+/?)+\)$)" };
2179  event.m_title.remove(nlOmroep);
2180 
2181  // Put information back in description
2182 
2183  event.m_description = fullinfo;
2184 }
2185 
2187 {
2188  // remove category movie from short events
2190  event.m_starttime.secsTo(event.m_endtime) < kMinMovieDuration)
2191  {
2192  /* default taken from ContentDescriptor::GetMythCategory */
2193  event.m_categoryType = ProgramInfo::kCategoryTVShow;
2194  }
2195 }
2196 
2201 {
2202  // Check for "title (R)" in the title
2203  static const QRegularExpression noRerun { "\\(R\\)" };
2204  auto match = noRerun.match(event.m_title);
2205  if (match.hasMatch())
2206  {
2207  event.m_previouslyshown = true;
2208  event.m_title.remove(match.capturedStart(), match.capturedLength());
2209  }
2210  // Check for "subtitle (HD)" in the subtitle
2211  static const QRegularExpression noHD { R"([\(\[]HD[\)\]])" };
2212  match = noHD.match(event.m_subtitle);
2213  if (match.hasMatch())
2214  {
2215  event.m_videoProps |= VID_HDTV;
2216  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
2217  }
2218  // Check for "description (HD)" in the description
2219  match = noHD.match(event.m_description);
2220  if (match.hasMatch())
2221  {
2222  event.m_videoProps |= VID_HDTV;
2223  event.m_description.remove(match.capturedStart(), match.capturedLength());
2224  }
2225 }
2226 
2231 {
2232  // Check for "title (R)" in the title
2233  static const QRegularExpression noRerun { "\\(R\\)" };
2234  auto match = noRerun.match(event.m_title);
2235  if (match.hasMatch())
2236  {
2237  event.m_previouslyshown = true;
2238  event.m_title.remove(match.capturedStart(), match.capturedLength());
2239  }
2240  // Check for "(R)" in the description
2241  match = noRerun.match(event.m_description);
2242  if (match.hasMatch())
2243  {
2244  event.m_previouslyshown = true;
2245  }
2246 
2247  // Move colon separated category from program-titles into description
2248  // Have seen "NRK2s historiekveld: Film: bla-bla"
2249  static const QRegularExpression noNRKCategories
2250  { "^(Superstrek[ea]r|Supersomm[ea]r|Superjul|Barne-tv|Fantorangen|Kuraffen|Supermorg[eo]n|Julemorg[eo]n|Sommermorg[eo]n|"
2251  "Kuraffen-TV|Sport i dag|NRKs sportsl.rdag|NRKs sportss.ndag|Dagens dokumentar|"
2252  "NRK2s historiekveld|Detektimen|Nattkino|Filmklassiker|Film|Kortfilm|P.skemorg[eo]n|"
2253  "Radioteatret|Opera|P2-Akademiet|Nyhetsmorg[eo]n i P2 og Alltid Nyheter:): (.+)" };
2254  match = noNRKCategories.match(event.m_title);
2255  if (match.hasMatch() && (match.capturedLength(2) > 1))
2256  {
2257  event.m_title = match.captured(2);
2258  event.m_description = "(" + match.captured(1) + ") " + event.m_description;
2259  }
2260 
2261  // Remove season premiere markings
2262  static const QRegularExpression noPremiere { "\\s+-\\s+(Sesongpremiere|Premiere|premiere)!?$" };
2263  match = noPremiere.match(event.m_title);
2264  if (match.hasMatch() && (match.capturedStart() >= 3))
2265  event.m_title.remove(match.capturedStart(), match.capturedLength());
2266 
2267  // Try to find colon-delimited subtitle in title, only tested for NRK channels
2268  if (!event.m_title.startsWith("CSI:") &&
2269  !event.m_title.startsWith("CD:") &&
2270  !event.m_title.startsWith("Distriktsnyheter: fra"))
2271  {
2272  static const QRegularExpression noColonSubtitle { "^([^:]+): (.+)" };
2273  match = noColonSubtitle.match(event.m_title);
2274  if (match.hasMatch())
2275  {
2276  if (event.m_subtitle.length() <= 0)
2277  {
2278  event.m_title = match.captured(1);
2279  event.m_subtitle = match.captured(2);
2280  }
2281  else if (event.m_subtitle == match.captured(2))
2282  {
2283  event.m_title = match.captured(1);
2284  }
2285  }
2286  }
2287 }
2288 
2293 {
2294  // Source: YouSee Rules of Operation v1.16
2295  // url: http://yousee.dk/~/media/pdf/CPE/Rules_Operation.ashx
2296  int episode = -1;
2297  int season = -1;
2298 
2299  // Title search
2300  // episode and part/part total
2301  static const QRegularExpression dkEpisode { R"(\(([0-9]+)\))" };
2302  auto match = dkEpisode.match(event.m_title);
2303  if (match.hasMatch())
2304  {
2305  episode = match.capturedView(1).toInt();
2306  event.m_partnumber = match.capturedView(1).toInt();
2307  event.m_title.remove(match.capturedStart(), match.capturedLength());
2308  }
2309 
2310  static const QRegularExpression dkPart { R"(\(([0-9]+):([0-9]+)\))" };
2311  match = dkPart.match(event.m_title);
2312  if (match.hasMatch())
2313  {
2314  episode = match.capturedView(1).toInt();
2315  event.m_partnumber = match.capturedView(1).toInt();
2316  event.m_parttotal = match.capturedView(2).toInt();
2317  event.m_title.remove(match.capturedStart(), match.capturedLength());
2318  }
2319 
2320  // subtitle delimiters
2321  static const QRegularExpression dkSubtitle1 { "^([^:]+): (.+)" };
2322  match = dkSubtitle1.match(event.m_title);
2323  if (match.hasMatch())
2324  {
2325  event.m_title = match.captured(1);
2326  event.m_subtitle = match.captured(2);
2327  }
2328  else
2329  {
2330  static const QRegularExpression dkSubtitle2 { "^([^:]+) - (.+)" };
2331  match = dkSubtitle2.match(event.m_title);
2332  if (match.hasMatch())
2333  {
2334  event.m_title = match.captured(1);
2335  event.m_subtitle = match.captured(2);
2336  }
2337  }
2338 
2339  // Description search
2340  // Season (Sæson [:digit:]+.) => episode = season episode number
2341  // or year (- år [:digit:]+(\\)|:) ) => episode = total episode number
2342  static const QRegularExpression dkSeason1 { "Sæson ([0-9]+)\\." };
2343  match = dkSeason1.match(event.m_description);
2344  if (match.hasMatch())
2345  {
2346  season = match.capturedView(1).toInt();
2347  }
2348  else
2349  {
2350  static const QRegularExpression dkSeason2 { "- år ([0-9]+) :" };
2351  match = dkSeason2.match(event.m_description);
2352  if (match.hasMatch())
2353  {
2354  season = match.capturedView(1).toInt();
2355  }
2356  }
2357 
2358  if (episode > 0)
2359  event.m_episode = episode;
2360 
2361  if (season > 0)
2362  event.m_season = season;
2363 
2364  //Feature:
2365  static const QRegularExpression dkFeatures { "Features:(.+)" };
2366  match = dkFeatures.match(event.m_description);
2367  if (match.hasMatch())
2368  {
2369  QString features = match.captured(1);
2370  event.m_description.remove(match.capturedStart(),
2371  match.capturedLength());
2372  // 16:9
2373  static const QRegularExpression dkWidescreen { " 16:9" };
2374  if (features.indexOf(dkWidescreen) != -1)
2375  event.m_videoProps |= VID_WIDESCREEN;
2376  // HDTV
2377  static const QRegularExpression dkHD { " HD" };
2378  if (features.indexOf(dkHD) != -1)
2379  event.m_videoProps |= VID_HDTV;
2380  // Dolby Digital surround
2381  static const QRegularExpression dkDolby { " 5:1" };
2382  if (features.indexOf(dkDolby) != -1)
2383  event.m_audioProps |= AUD_DOLBY;
2384  // surround
2385  static const QRegularExpression dkSurround { R"( \(\(S\)\))" };
2386  if (features.indexOf(dkSurround) != -1)
2387  event.m_audioProps |= AUD_SURROUND;
2388  // stereo
2389  static const QRegularExpression dkStereo { " S" };
2390  if (features.indexOf(dkStereo) != -1)
2391  event.m_audioProps |= AUD_STEREO;
2392  // (G)
2393  static const QRegularExpression dkReplay { " \\(G\\)" };
2394  if (features.indexOf(dkReplay) != -1)
2395  event.m_previouslyshown = true;
2396  // TTV
2397  static const QRegularExpression dkTxt { " TTV" };
2398  if (features.indexOf(dkTxt) != -1)
2399  event.m_subtitleType |= SUB_NORMAL;
2400  }
2401 
2402  // Series and program id
2403  // programid is currently not transmitted
2404  // YouSee doesn't use a default authority but uses the first byte after
2405  // the / to indicate if the seriesid is global unique or unique on the
2406  // service id
2407  if (event.m_seriesId.length() >= 1 && event.m_seriesId[0] == '/')
2408  {
2409  QString newid;
2410  if (event.m_seriesId[1] == '1')
2411  {
2412  newid = QString("%1%2").arg(event.m_chanid).
2413  arg(event.m_seriesId.mid(2,8));
2414  }
2415  else
2416  {
2417  newid = event.m_seriesId.mid(2,8);
2418  }
2419  event.m_seriesId = newid;
2420  }
2421 
2422  if (event.m_programId.length() >= 1 && event.m_programId[0] == '/')
2423  event.m_programId[0]='_';
2424 
2425  // Add season and episode number to subtitle
2426  if (episode > 0)
2427  {
2428  event.m_subtitle = QString("%1 (%2").arg(event.m_subtitle).arg(episode);
2429  if (event.m_parttotal >0)
2430  event.m_subtitle = QString("%1:%2").arg(event.m_subtitle).
2431  arg(event.m_parttotal);
2432  if (season > 0)
2433  {
2434  event.m_season = season;
2435  event.m_episode = episode;
2436  event.m_syndicatedepisodenumber =
2437  QString("S%1E%2").arg(season).arg(episode);
2438  event.m_subtitle = QString("%1 Sæson %2").arg(event.m_subtitle).
2439  arg(season);
2440  }
2441  event.m_subtitle = QString("%1)").arg(event.m_subtitle);
2442  }
2443 
2444  // Find actors and director in description
2445  static const QRegularExpression dkDirector { "(?:Instr.: |Instrukt.r: )(.+)$" };
2446  static const QRegularExpression dkPersonsSeparator { "(, )|(og )" };
2447  QStringList directors {};
2448  match = dkDirector.match(event.m_description);
2449  if (match.hasMatch())
2450  {
2451  QString tmpDirectorsString = match.captured(1);
2452 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2453  directors = tmpDirectorsString.split(dkPersonsSeparator, QString::SkipEmptyParts);
2454 #else
2455  directors = tmpDirectorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2456 #endif
2457  for (const auto & director : qAsConst(directors))
2458  {
2459  tmpDirectorsString = director.split(":").last().trimmed().
2460  remove(kDotAtEnd);
2461  if (tmpDirectorsString != "")
2462  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2463  }
2464  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2465  }
2466 
2467  static const QRegularExpression dkActors { "(?:Medvirkende: |Medv\\.: )(.+)" };
2468  match = dkActors.match(event.m_description);
2469  if (match.hasMatch())
2470  {
2471  QString tmpActorsString = match.captured(1);
2472 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2473  const QStringList actors =
2474  tmpActorsString.split(dkPersonsSeparator, QString::SkipEmptyParts);
2475 #else
2476  const QStringList actors =
2477  tmpActorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2478 #endif
2479  for (const auto & actor : qAsConst(actors))
2480  {
2481  tmpActorsString = actor.split(":").last().trimmed().remove(kDotAtEnd);
2482  if (!tmpActorsString.isEmpty() && !directors.contains(tmpActorsString))
2483  event.AddPerson(DBPerson::kActor, tmpActorsString);
2484  }
2485  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2486  }
2487 
2488  //find year
2489  static const QRegularExpression dkYear { " fra ([0-9]{4})[ \\.]" };
2490  match = dkYear.match(event.m_description);
2491  if (match.hasMatch())
2492  {
2493  bool ok = false;
2494  uint y = match.capturedView(1).toUInt(&ok);
2495  if (ok)
2496  event.m_originalairdate = QDate(y, 1, 1);
2497  }
2498 }
2499 
2504 {
2505  LOG(VB_EIT, LOG_INFO, QString("Applying html strip to %1").arg(event.m_title));
2506  static const QRegularExpression html { "</?EM>", QRegularExpression::CaseInsensitiveOption };
2507  event.m_title.remove(html);
2508 }
2509 
2510 // Moves the subtitle field into the description since it's just used
2511 // as more description field. All the sort-out will happen in the description
2512 // field. Also, sometimes the description is just a repeat of the title. If so,
2513 // we remove it.
2515 {
2516  if (event.m_title == event.m_description)
2517  {
2518  event.m_description = QString("");
2519  }
2520  if (!event.m_subtitle.isEmpty())
2521  {
2522  if (event.m_subtitle.trimmed().right(1) != ".'" )
2523  event.m_subtitle = event.m_subtitle.trimmed() + ".";
2524  event.m_description = event.m_subtitle.trimmed() + QString(" ") + event.m_description;
2525  event.m_subtitle = QString("");
2526  }
2527 }
2528 
2530 {
2531  // Program ratings
2532  static const QRegularExpression grRating { R"(\[(K|Κ|8|12|16|18)\]\s*)",
2533  QRegularExpression::CaseInsensitiveOption };
2534  auto match = grRating.match(event.m_title);
2535  if (match.hasMatch())
2536  {
2537  EventRating prograting;
2538  prograting.m_system="GR"; prograting.m_rating = match.captured(1);
2539  event.m_ratings.push_back(prograting);
2540  event.m_title.remove(match.capturedStart(), match.capturedLength());
2541  event.m_title = event.m_title.trimmed();
2542  }
2543 
2544  //Live show
2545  int position = event.m_title.indexOf("(Ζ)");
2546  if (position != -1)
2547  {
2548  event.m_title = event.m_title.replace("(Ζ)", "");
2549  event.m_description.prepend("Ζωντανή Μετάδοση. ");
2550  }
2551 
2552  // Greek not previously Shown
2553  static const QRegularExpression grNotPreviouslyShown {
2554  R"(\W?(?:-\s*)*(?:\b[Α1]['΄η]?\s*(?:τηλεοπτικ[ηή]\s*)?(?:μετ[αά]δοση|προβολ[ηή]))\W?)",
2555  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2556  match = grNotPreviouslyShown.match(event.m_title);
2557  if (match.hasMatch())
2558  {
2559  event.m_previouslyshown = false;
2560  event.m_title.remove(match.capturedStart(), match.capturedLength());
2561  }
2562 
2563  // Greek Replay (Ε)
2564  // it might look redundant compared to previous check but at least it helps
2565  // remove the (Ε) From the title.
2566  static const QRegularExpression grReplay { R"(\([ΕE]\))" };
2567  match = grReplay.match(event.m_title);
2568  if (match.hasMatch())
2569  {
2570  event.m_previouslyshown = true;
2571  event.m_title.remove(match.capturedStart(), match.capturedLength());
2572  }
2573 
2574  // Check for (HD) in the decription
2575  position = event.m_description.indexOf("(HD)");
2576  if (position != -1)
2577  {
2578  event.m_description = event.m_description.replace("(HD)", "");
2579  event.m_videoProps |= VID_HDTV;
2580  }
2581 
2582  // Check for (Full HD) in the decription
2583  position = event.m_description.indexOf("(Full HD)");
2584  if (position != -1)
2585  {
2586  event.m_description = event.m_description.replace("(Full HD)", "");
2587  event.m_videoProps |= VID_HDTV;
2588  }
2589 
2590  static const QRegularExpression grFixnofullstopActors { R"(\w\s(Παίζουν:|Πρωταγων))" };
2591  match = grFixnofullstopActors.match(event.m_description);
2592  if (match.hasMatch())
2593  event.m_description.insert(match.capturedStart() + 1, ".");
2594 
2595  // If they forgot the "." at the end of the sentence before the actors/directors begin, let's insert it.
2596  static const QRegularExpression grFixnofullstopDirectors { R"(\w\s(Σκηνοθ[εέ]))" };
2597  match = grFixnofullstopDirectors.match(event.m_description);
2598  if (match.hasMatch())
2599  event.m_description.insert(match.capturedStart() + 1, ".");
2600 
2601  // Find actors and director in description
2602  // I am looking for actors first and then for directors/presenters because
2603  // sometimes punctuation is missing and the "Παίζουν:" label is mistaken
2604  // for a director's/presenter's surname (directors/presenters are shown
2605  // before actors in the description field.). So removing the text after
2606  // adding the actors AND THEN looking for dir/pres helps to clear things up.
2607  static const QRegularExpression grActors { R"((?:[Ππ]α[ιί]ζουν:|[ΜMμ]ε τους:|Πρωταγωνιστο[υύ]ν:|Πρωταγωνιστε[ιί]:?)(?:\s+στο ρόλο(?: του| της)?\s(?:\w+\s[οη]\s))?([-\w\s']+(?:,[-\w\s']+)*)(?:κ\.[αά])?\W?)" };
2608  // cap(1) actors, just names
2609  static const QRegularExpression grPeopleSeparator { R"(([,-]\s+))" };
2610  match = grActors.match(event.m_description);
2611  if (match.hasMatch())
2612  {
2613  QString tmpActorsString = match.captured(1);
2614 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2615  const QStringList actors =
2616  tmpActorsString.split(grPeopleSeparator, QString::SkipEmptyParts);
2617 #else
2618  const QStringList actors =
2619  tmpActorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2620 #endif
2621  for (const auto & actor : qAsConst(actors))
2622  {
2623  tmpActorsString = actor.split(":").last().trimmed().remove(kDotAtEnd);
2624  if (tmpActorsString != "")
2625  event.AddPerson(DBPerson::kActor, tmpActorsString);
2626  }
2627  event.m_description.remove(match.capturedStart(), match.capturedLength());
2628  }
2629 
2630  // Director
2631  static const QRegularExpression grDirector { R"((?:Σκηνοθεσία: |Σκηνοθέτης: |Σκηνοθέτης - Επιμέλεια: )(\w+\s\w+\s?)(?:\W?))" };
2632  match = grDirector.match(event.m_description);
2633  if (match.hasMatch())
2634  {
2635  QString tmpDirectorsString = match.captured(1);
2636 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2637  const QStringList directors =
2638  tmpDirectorsString.split(grPeopleSeparator, QString::SkipEmptyParts);
2639 #else
2640  const QStringList directors =
2641  tmpDirectorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2642 #endif
2643  for (const auto & director : qAsConst(directors))
2644  {
2645  tmpDirectorsString = director.split(":").last().trimmed().
2646  remove(kDotAtEnd);
2647  if (tmpDirectorsString != "")
2648  {
2649  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2650  }
2651  }
2652  event.m_description.remove(match.capturedStart(), match.capturedLength());
2653  }
2654 
2655  //Try to find presenter
2656  static const QRegularExpression grPres { R"((?:Παρουσ[ιί]αση:(?:\b)*|Παρουσι[αά]ζ(?:ουν|ει)(?::|\sο|\sη)|Παρουσι[αά]στ(?:[ηή]ς|ρια|ριες|[εέ]ς)(?::|\sο|\sη)|Με τ(?:ον |ην )(?:[\s|:|ο|η])*(?:\b)*)([-\w\s]+(?:,[-\w\s]+)*)\W?)",
2657  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2658  match = grPres.match(event.m_description);
2659  if (match.hasMatch())
2660  {
2661  QString tmpPresentersString = match.captured(1);
2662 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2663  const QStringList presenters =
2664  tmpPresentersString.split(grPeopleSeparator, QString::SkipEmptyParts);
2665 #else
2666  const QStringList presenters =
2667  tmpPresentersString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2668 #endif
2669  for (const auto & presenter : qAsConst(presenters))
2670  {
2671  tmpPresentersString = presenter.split(":").last().trimmed().
2672  remove(kDotAtEnd);
2673  if (tmpPresentersString != "")
2674  {
2675  event.AddPerson(DBPerson::kPresenter, tmpPresentersString);
2676  }
2677  }
2678  event.m_description.remove(match.capturedStart(), match.capturedLength());
2679  }
2680 
2681  //find year e.g Παραγωγής 1966 ή ΝΤΟΚΙΜΑΝΤΕΡ - 1998 Κατάλληλο για όλους
2682  // Used in Private channels (not 'secret', just not owned by Government!)
2683  static const QRegularExpression grYear { R"(\W?(?:\s?παραγωγ[ηή]ς|\s?-|,)\s*([1-2][0-9]{3})(?:-\d{1,4})?)",
2684  QRegularExpression::CaseInsensitiveOption };
2685  match = grYear.match(event.m_description);
2686  if (match.hasMatch())
2687  {
2688  bool ok = false;
2689  uint y = match.capturedView(1).toUInt(&ok);
2690  if (ok)
2691  {
2692  event.m_originalairdate = QDate(y, 1, 1);
2693  event.m_description.remove(match.capturedStart(), match.capturedLength());
2694  }
2695  }
2696  // Remove " ."
2697  event.m_description = event.m_description.replace(" .",".").trimmed();
2698 
2699  //find country of origin and remove it from description.
2700  static const QRegularExpression grCountry {
2701  R"((?:\W|\b)(?:(ελλην|τουρκ|αμερικ[αά]ν|γαλλ|αγγλ|βρεττ?αν|γερμαν|ρωσσ?|ιταλ|ελβετ|σουηδ|ισπαν|πορτογαλ|μεξικ[αά]ν|κιν[εέ]ζικ|ιαπων|καναδ|βραζιλι[αά]ν)(ικ[ηή][ςσ])))",
2702  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2703  match = grCountry.match(event.m_description);
2704  if (match.hasMatch())
2705  event.m_description.remove(match.capturedStart(), match.capturedLength());
2706 
2707  // Work out the season and episode numbers (if any)
2708  // Matching pattern "Επεισ[όο]διο:?|Επ 3 από 14|3/14" etc
2709  bool series = false;
2710  static const QRegularExpression grSeason {
2711  R"((?:\W-?)*(?:\(-\s*)?\b(([Α-Ω|A|B|E|Z|H|I|K|M|N]{1,2})(?:'|΄)?|(\d{1,2})(?:ος|ου|oς|os)?)(?:\s*[ΚκKk][υύ]κλο(?:[σς]|υ))\s?)",
2712  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2713  // cap(2) is the season for ΑΒΓΔ
2714  // cap(3) is the season for 1234
2715  match = grSeason.match(event.m_title);
2716  if (match.hasMatch())
2717  {
2718  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2719  {
2720  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2721  //must convert them to numbers.
2722  int tmpinteger = match.capturedView(2).toUInt();
2723  if (tmpinteger < 1)
2724  {
2725  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2726  event.m_season = 6;
2727  else
2728  {
2729  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2730  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2731  if (tmpinteger != -1)
2732  event.m_season = tmpinteger;
2733  else
2734  //sometimes they use english letters instead of greek. Compensating:
2735  {
2736  static const QString LettToNumber2 = "0ABΓΔE6ZHΘIKΛMN";
2737  tmpinteger = LettToNumber2.indexOf(match.capturedView(2));
2738  if (tmpinteger != -1)
2739  event.m_season = tmpinteger;
2740  }
2741  }
2742  }
2743  }
2744  else if (!match.capturedView(3).isEmpty()) //number
2745  {
2746  event.m_season = match.capturedView(3).toUInt();
2747  }
2748  series = true;
2749  event.m_title.remove(match.capturedStart(), match.capturedLength());
2750  }
2751 
2752  // I have to search separately for season in title and description because it wouldn't work when in both.
2753  match = grSeason.match(event.m_description);
2754  if (match.hasMatch())
2755  {
2756  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2757  {
2758  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2759  //must convert them to numbers.
2760  int tmpinteger = match.capturedView(2).toUInt();
2761  if (tmpinteger < 1)
2762  {
2763  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2764  event.m_season = 6;
2765  else
2766  {
2767  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2768  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2769  if (tmpinteger != -1)
2770  event.m_season = tmpinteger;
2771  }
2772  }
2773  }
2774  else if (!match.capturedView(3).isEmpty()) //number
2775  {
2776  event.m_season = match.capturedView(3).toUInt();
2777  }
2778  series = true;
2779  event.m_description.remove(match.capturedStart(), match.capturedLength());
2780  }
2781 
2782 
2783  // If Season is in Roman Numerals (I,II,etc)
2784  static const QRegularExpression grSeasonAsRomanNumerals { ",\\s*([MDCLXVIΙΧ]+)$",
2785  QRegularExpression::CaseInsensitiveOption };
2786  match = grSeasonAsRomanNumerals.match(event.m_title);
2787  auto match2 = grSeasonAsRomanNumerals.match(event.m_description);
2788  if (match.hasMatch())
2789  {
2790  if (!match.capturedView(1).isEmpty()) //number
2791  event.m_season = parseRoman(match.captured(1).toUpper());
2792  series = true;
2793  event.m_title.remove(match.capturedStart(), match.capturedLength());
2794  event.m_title = event.m_title.trimmed();
2795  if (event.m_title.right(1) == ",")
2796  event.m_title.chop(1);
2797  }
2798  else if (match2.hasMatch())
2799  {
2800  if (!match2.capturedView(1).isEmpty()) //number
2801  event.m_season = parseRoman(match2.captured(1).toUpper());
2802  series = true;
2803  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2804  event.m_description = event.m_description.trimmed();
2805  if (event.m_description.right(1) == ",")
2806  event.m_description.chop(1);
2807  }
2808 
2809  static const QRegularExpression grlongEp { R"(\b(?:Επ.|επεισ[οό]διο:?)\s*(\d+)\W?)",
2810  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2811  // cap(1) is the Episode No.
2812  match = grlongEp.match(event.m_title);
2813  match2 = grlongEp.match(event.m_description);
2814  if (match.hasMatch() || match2.hasMatch())
2815  {
2816  if (!match.capturedView(1).isEmpty())
2817  {
2818  event.m_episode = match.capturedView(1).toUInt();
2819  series = true;
2820  event.m_title.remove(match.capturedStart(), match.capturedLength());
2821  }
2822  else if (!match2.capturedView(1).isEmpty())
2823  {
2824  event.m_episode = match2.capturedView(1).toUInt();
2825  series = true;
2826  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2827  }
2828  // Sometimes description omits Season if it's 1. We fix this
2829  if (0 == event.m_season)
2830  event.m_season = 1;
2831  }
2832 
2833  // Sometimes, especially on greek national tv, they include comments in the
2834  // title, e.g "connection to ert1", "ert archives".
2835  // Because they obscure the real title, I'll isolate and remove them.
2836 
2837  static const QRegularExpression grCommentsinTitle { R"(\(([Α-Ωα-ω\s\d-]+)\)(?:\s*$)*)" };
2838  // cap1 = real title
2839  // cap0 = real title in parentheses.
2840  match = grCommentsinTitle.match(event.m_title);
2841  if (match.hasMatch()) // found in title instead
2842  event.m_title.remove(match.capturedStart(), match.capturedLength());
2843 
2844  // Sometimes the real (mostly English) title of a movie or series is
2845  // enclosed in parentheses in the event title, subtitle or description.
2846  // Since the subtitle has been moved to the description field by
2847  // EITFixUp::FixGreekSubtitle, I will search for it only in the description.
2848  // It will replace the translated one to get better chances of metadata
2849  // retrieval. The old title will be moved in the description.
2850  static const QRegularExpression grRealTitleInDescription { R"(^\(([A-Za-z\s\d-]+)\)\s*)" };
2851  // cap1 = real title
2852  // cap0 = real title in parentheses.
2853  match = grRealTitleInDescription.match(event.m_description);
2854  if (match.hasMatch())
2855  {
2856  event.m_description.remove(0, match.capturedLength());
2857  if (match.captured(0) != event.m_title.trimmed())
2858  {
2859  event.m_description = "(" + event.m_title.trimmed() + "). " + event.m_description;
2860  }
2861  event.m_title = match.captured(1);
2862  // Remove the real title from the description
2863  }
2864  else // search in title
2865  {
2866  static const QRegularExpression grRealTitleInTitle { R"(\(([A-Za-z\s\d-]+)\)(?:\s*$)?)" };
2867  // cap1 = real title
2868  // cap0 = real title in parentheses.
2869  match = grRealTitleInTitle.match(event.m_title);
2870  if (match.hasMatch()) // found in title instead
2871  {
2872  event.m_title.remove(match.capturedStart(), match.capturedLength());
2873  QString tmpTranslTitle = event.m_title;
2874  //QString tmpTranslTitle = event.m_title.replace(tmptitle.cap(0),"");
2875  event.m_title = match.captured(1);
2876  event.m_description = "(" + tmpTranslTitle.trimmed() + "). " + event.m_description;
2877  }
2878  }
2879 
2880  // Description field: "^Episode: Lion in the cage. (Description follows)"
2881  static const QRegularExpression grEpisodeAsSubtitle { R"(^Επεισ[οό]διο:\s?([\w\s\-,']+)\.\s?)" };
2882  match = grEpisodeAsSubtitle.match(event.m_description);
2883  if (match.hasMatch())
2884  {
2885  event.m_subtitle = match.captured(1).trimmed();
2886  event.m_description.remove(match.capturedStart(), match.capturedLength());
2887  }
2888  static const QRegularExpression grMovie { R"(\bταιν[ιί]α\b)",
2889  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2890  bool isMovie = (event.m_description.indexOf(grMovie) !=-1) ;
2891  if (isMovie)
2892  event.m_categoryType = ProgramInfo::kCategoryMovie;
2893  else if (series)
2894  event.m_categoryType = ProgramInfo::kCategorySeries;
2895  // clear double commas.
2896  event.m_description.replace(",,", ",");
2897 
2898 // να σβήσω τα κομμάτια που περισσεύουν από την περιγραφή πχ παραγωγής χχχχ
2899 }
2900 
2902 {
2903  struct grCategoryEntry {
2904  QRegularExpression expr;
2905  QString category;
2906  };
2907  static const QRegularExpression grCategFood { "\\W?(?:εκπομπ[ηή]\\W)?(Γαστρονομ[ιί]α[σς]?|μαγειρικ[ηή][σς]?|chef|συνταγ[εέηή]|διατροφ|wine|μ[αά]γειρα[σς]?)\\W?",
2908  QRegularExpression::CaseInsensitiveOption };
2909  static const QRegularExpression grCategDrama { "\\W?(κοινωνικ[ηήό]|δραματικ[ηή]|δρ[αά]μα)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2910  QRegularExpression::CaseInsensitiveOption};
2911  static const QRegularExpression grCategComedy { "\\W?(κωμικ[ηήοό]|χιουμοριστικ[ηήοό]|κωμωδ[ιί]α)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2912  QRegularExpression::CaseInsensitiveOption};
2913  static const QRegularExpression grCategChildren { "\\W?(παιδικ[ηήοό]|κινο[υύ]μ[εέ]ν(ων|α)\\sσχ[εέ]δ[ιί](ων|α))\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2914  QRegularExpression::CaseInsensitiveOption};
2915  static const QRegularExpression grCategMystery { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(μυστηρ[ιί]ου)\\W?",
2916  QRegularExpression::CaseInsensitiveOption};
2917  static const QRegularExpression grCategFantasy { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(φαντασ[ιί]ας)\\W?",
2918  QRegularExpression::CaseInsensitiveOption};
2919  static const QRegularExpression grCategHistory { "\\W?(ιστορικ[ηήοό])\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2920  QRegularExpression::CaseInsensitiveOption};
2921  static const QRegularExpression grCategTeleMag { "\\W?(ενημερωτικ[ηή]|ψυχαγωγικ[ηή]|τηλεπεριοδικ[οό]|μαγκαζ[ιί]νο)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2922  QRegularExpression::CaseInsensitiveOption};
2923  static const QRegularExpression grCategTeleShop { "\\W?(οδηγ[οό][σς]?\\sαγορ[ωώ]ν|τηλεπ[ωώ]λ[ηή]σ|τηλεαγορ|τηλεμ[αά]ρκετ|telemarket)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2924  QRegularExpression::CaseInsensitiveOption};
2925  static const QRegularExpression grCategGameShow { "\\W?(τηλεπαιχν[ιί]δι|quiz)\\W?",
2926  QRegularExpression::CaseInsensitiveOption};
2927  static const QRegularExpression grCategDocumentary { "\\W?(ντοκ[ιυ]μαντ[εέ]ρ)\\W?",
2928  QRegularExpression::CaseInsensitiveOption};
2929  static const QRegularExpression grCategBiography { "\\W?(βιογραφ[ιί]α|βιογραφικ[οό][σς]?)\\W?",
2930  QRegularExpression::CaseInsensitiveOption};
2931  static const QRegularExpression grCategNews { "\\W?(δελτ[ιί]ο\\W?|ειδ[ηή]σε(ι[σς]|ων))\\W?",
2932  QRegularExpression::CaseInsensitiveOption};
2933  static const QRegularExpression grCategSports { "\\W?(champion|αθλητικ[αάοόηή]|πρωτ[αά]θλημα|ποδ[οό]σφαιρο(ου)?|κολ[υύ]μβηση|πατιν[αά]ζ|formula|μπ[αά]σκετ|β[οό]λε[ιϊ])\\W?",
2934  QRegularExpression::CaseInsensitiveOption};
2935  static const QRegularExpression grCategMusic { "\\W?(μουσικ[οόηή]|eurovision|τραγο[υύ]δι)\\W?",
2936  QRegularExpression::CaseInsensitiveOption};
2937  static const QRegularExpression grCategReality { "\\W?(ρι[αά]λιτι|reality)\\W?",
2938  QRegularExpression::CaseInsensitiveOption};
2939  static const QRegularExpression grCategReligion { "\\W?(θρησκε[ιί]α|θρησκευτικ|να[οό][σς]?|θε[ιί]α λειτουργ[ιί]α)\\W?",
2940  QRegularExpression::CaseInsensitiveOption};
2941  static const QRegularExpression grCategCulture { "\\W?(τ[εέ]χν(η|ε[σς])|πολιτισμ)\\W?",
2942  QRegularExpression::CaseInsensitiveOption};
2943  static const QRegularExpression grCategNature { "\\W?(φ[υύ]ση|περιβ[αά]λλο|κατασκευ|επιστ[ηή]μ(?!ονικ[ηή]ς φαντασ[ιί]ας))\\W?",
2944  QRegularExpression::CaseInsensitiveOption};
2945  static const QRegularExpression grCategSciFi { "\\W?(επιστ(.|ημονικ[ηή]ς)\\s?φαντασ[ιί]ας)\\W?",
2946  QRegularExpression::CaseInsensitiveOption};
2947  static const QRegularExpression grCategHealth { "\\W?(υγε[ιί]α|υγειιν|ιατρικ|διατροφ)\\W?",
2948  QRegularExpression::CaseInsensitiveOption};
2949  static const QRegularExpression grCategSpecial { "\\W?(αφι[εέ]ρωμα)\\W?",
2950  QRegularExpression::CaseInsensitiveOption};
2951  static const QList<grCategoryEntry> grCategoryDescData = {
2952  { grCategComedy, "Κωμωδία" },
2953  { grCategTeleMag, "Τηλεπεριοδικό" },
2954  { grCategNature, "Επιστήμη/Φύση" },
2955  { grCategHealth, "Υγεία" },
2956  { grCategReality, "Ριάλιτι" },
2957  { grCategDrama, "Κοινωνικό" },
2958  { grCategChildren, "Παιδικό" },
2959  { grCategSciFi, "Επιστ.Φαντασίας" },
2960  { grCategMystery, "Μυστηρίου" },
2961  { grCategFantasy, "Φαντασίας" },
2962  { grCategHistory, "Ιστορικό" },
2963  { grCategTeleShop, "Τηλεπωλήσεις" },
2964  { grCategFood, "Γαστρονομία" },
2965  { grCategGameShow, "Τηλεπαιχνίδι" },
2966  { grCategBiography, "Βιογραφία" },
2967  { grCategSports, "Αθλητικά" },
2968  { grCategMusic, "Μουσική" },
2969  { grCategDocumentary, "Ντοκιμαντέρ" },
2970  { grCategReligion, "Θρησκεία" },
2971  { grCategCulture, "Τέχνες/Πολιτισμός" },
2972  { grCategSpecial, "Αφιέρωμα" },
2973  };
2974  static const QList<grCategoryEntry> grCategoryTitleData = {
2975  { grCategTeleShop, "Τηλεπωλήσεις" },
2976  { grCategGameShow, "Τηλεπαιχνίδι" },
2977  { grCategMusic, "Μουσική" },
2978  { grCategNews, "Ειδήσεις" },
2979  };
2980 
2981  // Handle special cases
2982  if ((event.m_description.indexOf(grCategFantasy) != -1)
2983  && (event.m_description.indexOf(grCategMystery) != -1))
2984  {
2985  event.m_category = "Φαντασίας/Μυστηρίου";
2986  return;
2987  }
2988 
2989  // Find categories in the description
2990  for (const auto& [expression, category] : grCategoryDescData)
2991  {
2992  if (event.m_description.indexOf(expression) != -1) {
2993  event.m_category = category;
2994  return;
2995  }
2996  }
2997 
2998  // Find categories in the title
2999  for (const auto& [expression, category] : grCategoryTitleData)
3000  {
3001  if (event.m_title.indexOf(expression) != -1) {
3002  event.m_category = category;
3003  return;
3004  }
3005  }
3006 }
3007 
3009 {
3010  // TODO handle scraping the category and category_type from localized text in the short/long description
3011  // TODO remove short description (stored as episode title) which is just the beginning of the long description (actual description)
3012 
3013  // drop the short description if its copy the start of the long description
3014  if (event.m_description.startsWith (event.m_subtitle))
3015  {
3016  event.m_subtitle = "";
3017  }
3018 
3019  // handle cast and crew in items in the DVB Extended Event Descriptor
3020  // remove handled items from the map, so the left overs can be reported
3021  auto i = event.m_items.begin();
3022  while (i != event.m_items.end())
3023  {
3024  /* Possible TODO: if EIT inlcude the priority and/or character
3025  * names for the actors, include them in AddPerson call. */
3026  if ((QString::compare (i.key(), "Role Player") == 0) ||
3027  (QString::compare (i.key(), "Performing Artist") == 0))
3028  {
3029  event.AddPerson (DBPerson::kActor, i.value());
3030  i = event.m_items.erase (i);
3031  }
3032  else if (QString::compare (i.key(), "Director") == 0)
3033  {
3034  event.AddPerson (DBPerson::kDirector, i.value());
3035  i = event.m_items.erase (i);
3036  }
3037  else if (QString::compare (i.key(), "Commentary or Commentator") == 0)
3038  {
3039  event.AddPerson (DBPerson::kCommentator, i.value());
3040  i = event.m_items.erase (i);
3041  }
3042  else if (QString::compare (i.key(), "Presenter") == 0)
3043  {
3044  event.AddPerson (DBPerson::kPresenter, i.value());
3045  i = event.m_items.erase (i);
3046  }
3047  else if (QString::compare (i.key(), "Producer") == 0)
3048  {
3049  event.AddPerson (DBPerson::kProducer, i.value());
3050  i = event.m_items.erase (i);
3051  }
3052  else if (QString::compare (i.key(), "Scriptwriter") == 0)
3053  {
3054  event.AddPerson (DBPerson::kWriter, i.value());
3055  i = event.m_items.erase (i);
3056  }
3057  else
3058  {
3059  ++i;
3060  }
3061  }
3062 
3063  // handle star rating in the description
3064  static const QRegularExpression unitymediaImdbrating { R"(\s*IMDb Rating: (\d\.\d)\s?/10$)" };
3065  auto match = unitymediaImdbrating.match(event.m_description);
3066  if (match.hasMatch())
3067  {
3068  float stars = match.captured(1).toFloat();
3069  event.m_stars = stars / 10.0F;
3070  event.m_description.remove(match.capturedStart(0),
3071  match.capturedLength(0));
3072  }
3073 }
EITFixUp::FixGreekEIT
static void FixGreekEIT(DBEventEIT &event)
Definition: eitfixup.cpp:2529
DBEvent::m_season
uint m_season
Definition: programdata.h:172
EITFixUp::FixDK
static void FixDK(DBEventEIT &event)
Use this to clean YouSee's DVB-C guide in Denmark.
Definition: eitfixup.cpp:2292
EITFixUp::kFixUK
@ kFixUK
Definition: eitfixup.h:35
EITFixUp::kFixBell
@ kFixBell
Definition: eitfixup.h:34
NLMapResult::type
ProgramInfo::CategoryType type
Definition: eitfixup.cpp:1978
EITFixUp::kFixAUDescription
@ kFixAUDescription
Definition: eitfixup.h:52
EITFixUp::FixAUNine
static void FixAUNine(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1333
EventRating::m_system
QString m_system
Definition: programdata.h:78
kUKSpaceColonStart
static const QRegularExpression kUKSpaceColonStart
Definition: eitfixup.cpp:18
EITFixUp::kFixNO
@ kFixNO
Definition: eitfixup.h:47
DBEvent::m_totalepisodes
uint m_totalepisodes
Definition: programdata.h:174
EITFixUp::kFixNRK_DVBT
@ kFixNRK_DVBT
Definition: eitfixup.h:48
EITFixUp::FixBellExpressVu
static void FixBellExpressVu(DBEventEIT &event)
Use this for the Canadian BellExpressVu to standardize DVB-S guide.
Definition: eitfixup.cpp:230
EITFixUp::Fix
static void Fix(DBEventEIT &event)
Definition: eitfixup.cpp:47
EITFixUp::FixFI
static void FixFI(DBEventEIT &event)
Use this to clean DVB-T guide in Finland.
Definition: eitfixup.cpp:1856
EITFixUp::kFixAUStar
@ kFixAUStar
Definition: eitfixup.h:39
EITFixUp::kFixPremiere
@ kFixPremiere
Definition: eitfixup.h:43
EITFixUp::kFixATV
@ kFixATV
Definition: eitfixup.h:58
DBEventEIT::m_fixup
FixupValue m_fixup
Definition: programdata.h:222
EITFixUp::kFixHTML
@ kFixHTML
Definition: eitfixup.h:56
EventRating
Definition: programdata.h:75
EITFixUp::FixATV
static void FixATV(DBEventEIT &event)
Use this to standardise the ATV/ATV2 guide in Germany.
Definition: eitfixup.cpp:1846
DBEvent::m_starttime
QDateTime m_starttime
Definition: programdata.h:152
EITFixUp::kDotToTitle
static const uint kDotToTitle
Definition: eitfixup.h:19
EITFixUp::FixComHem
static void FixComHem(DBEventEIT &event, bool process_subtitle)
Use this to standardize ComHem DVB-C service in Sweden.
Definition: eitfixup.cpp:1042
DBEvent::m_partnumber
uint16_t m_partnumber
Definition: programdata.h:157
LOG
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:39
EITFixUp::kFixSubtitle
@ kFixSubtitle
Definition: eitfixup.h:38
EITFixUp::kMaxToTitle
static const uint kMaxToTitle
Definition: eitfixup.h:17
dish_theme_type_to_string
QString dish_theme_type_to_string(uint theme_type)
Definition: dishdescriptors.cpp:301
EITFixUp::FixGreekCategories
static void FixGreekCategories(DBEventEIT &event)
Definition: eitfixup.cpp:2901
DBPerson::kPresenter
@ kPresenter
Definition: programdata.h:39
EITFixUp::FixAUStar
static void FixAUStar(DBEventEIT &event)
Use this to standardize DVB-S guide in Australia.
Definition: eitfixup.cpp:1291
DBEvent::m_category
QString m_category
Definition: programdata.h:151
NLMapResult::name
QString name
Definition: eitfixup.cpp:1977
kStereo
static const QRegularExpression kStereo
Definition: eitfixup.cpp:17
EITFixUp::parseRoman
static int parseRoman(QString roman)
Definition: eitfixup.cpp:31
DBPerson::kDirector
@ kDirector
Definition: programdata.h:32
EITFixUp::FixNRK_DVBT
static void FixNRK_DVBT(DBEventEIT &event)
Use this to clean DVB-T guide in Norway (NRK)
Definition: eitfixup.cpp:2230
tmp
static guint32 * tmp
Definition: goom_core.cpp:26
r2v
static const QMap< QChar, quint16 > r2v
Definition: eitfixup.cpp:25
EITFixUp::FixCategory
static void FixCategory(DBEventEIT &event)
Definition: eitfixup.cpp:2186
DBEvent::m_seriesId
QString m_seriesId
Definition: programdata.h:165
ProgramInfo::kCategoryMovie
@ kCategoryMovie
Definition: programinfo.h:76
DBEvent::m_parttotal
uint16_t m_parttotal
Definition: programdata.h:158
EITFixUp::kFixCategory
@ kFixCategory
Definition: eitfixup.h:46
DBEvent::m_programId
QString m_programId
Definition: programdata.h:166
DBPerson::kUnknown
@ kUnknown
Definition: programdata.h:30
programinfo.h
DBEventEIT::m_chanid
uint32_t m_chanid
Definition: programdata.h:221
ProgramInfo::kCategoryTVShow
@ kCategoryTVShow
Definition: programinfo.h:77
mythlogging.h
DBEvent::m_categoryType
ProgramInfo::CategoryType m_categoryType
Definition: programdata.h:164
EITFixUp::SetUKSubtitle
static void SetUKSubtitle(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:518
deCrewTitle
static const QMap< QString, DBPerson::Role > deCrewTitle
Definition: eitfixup.cpp:1734
DBEvent::m_title
QString m_title
Definition: programdata.h:148
EITFixUp::FixAUSeven
static void FixAUSeven(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1366
ProgramInfo::kCategorySports
@ kCategorySports
Definition: programinfo.h:77
EITFixUp::kMinMovieDuration
static const int kMinMovieDuration
Definition: eitfixup.h:25
DBPerson::kProducer
@ kProducer
Definition: programdata.h:33
DBEvent::m_subtitle
QString m_subtitle
Definition: programdata.h:149
EITFixUp::kFixNL
@ kFixNL
Definition: eitfixup.h:45
EITFixUp::kFixGreekEIT
@ kFixGreekEIT
Definition: eitfixup.h:69
EITFixUp::kFixDisneyChannel
@ kFixDisneyChannel
Definition: eitfixup.h:59
hardwareprofile.scan.rating
def rating(profile, smoonURL, gate)
Definition: scan.py:39
EITFixUp::kSubtitleMaxLen
static const uint kSubtitleMaxLen
Definition: eitfixup.h:15
EITFixUp::FixNO
static void FixNO(DBEventEIT &event)
Use this to clean DVB-S guide in Norway.
Definition: eitfixup.cpp:2200
EITFixUp::FixNL
static void FixNL(DBEventEIT &event)
Use this to standardize @Home DVB-C guide in the Netherlands.
Definition: eitfixup.cpp:2003
EITFixUp::kFixDK
@ kFixDK
Definition: eitfixup.h:50
EITFixUp::kFixGreekCategories
@ kFixGreekCategories
Definition: eitfixup.h:70
EITFixUp::FixPremiere
static void FixPremiere(DBEventEIT &event)
Use this to standardize DVB-C guide in Germany for the providers Kabel Deutschland and Premiere.
Definition: eitfixup.cpp:1908
categoryTrans
static const QMap< QString, NLMapResult > categoryTrans
Definition: eitfixup.cpp:1980
uint
unsigned int uint
Definition: compat.h:79
DBPerson::kHost
@ kHost
Definition: programdata.h:37
ProgramInfo::CategoryType
CategoryType
Definition: programinfo.h:76
DBEvent::m_episode
uint m_episode
Definition: programdata.h:173
EITFixUp::kFixAUFreeview
@ kFixAUFreeview
Definition: eitfixup.h:51
channelutil.h
EITFixUp::kFixFI
@ kFixFI
Definition: eitfixup.h:42
EITFixUp::kFixComHem
@ kFixComHem
Definition: eitfixup.h:37
EITFixUp::kFixRTL
@ kFixRTL
Definition: eitfixup.h:41
EITFixUp::kFixDish
@ kFixDish
Definition: eitfixup.h:49
EITFixUp::FixAUDescription
static void FixAUDescription(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1307
EITFixUp::kFixGenericDVB
@ kFixGenericDVB
Definition: eitfixup.h:33
eitfixup.h
DBEventEIT
Definition: programdata.h:177
EITFixUp::kFixPBS
@ kFixPBS
Definition: eitfixup.h:36
DBEvent::m_description
QString m_description
Definition: programdata.h:150
EITFixUp::kFixAUSeven
@ kFixAUSeven
Definition: eitfixup.h:54
DBEventEIT::m_items
QMultiMap< QString, QString > m_items
Definition: programdata.h:223
EITFixUp::FixAUFreeview
static void FixAUFreeview(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1408
DBPerson::Role
Role
Definition: programdata.h:28
EITFixUp::AddDVBEITAuthority
static QString AddDVBEITAuthority(uint chanid, const QString &id)
This adds a DVB EIT default authority to series id or program id if one exists in the DB for that cha...
Definition: eitfixup.cpp:203
DBPerson::kCommentator
@ kCommentator
Definition: programdata.h:40
EITFixUp::FixPBS
static void FixPBS(DBEventEIT &event)
Use this to standardize PBS ATSC guide in the USA.
Definition: eitfixup.cpp:1027
EITFixUp::FixDisneyChannel
static void FixDisneyChannel(DBEventEIT &event)
Use this to standardise the Disney Channel guide in Germany.
Definition: eitfixup.cpp:1819
EITFixUp::kMaxDotToColon
static const uint kMaxDotToColon
Definition: eitfixup.h:23
EITFixUp::kFixHDTV
@ kFixHDTV
Definition: eitfixup.h:44
EITFixUp::kFixGreekSubtitle
@ kFixGreekSubtitle
Definition: eitfixup.h:68
DBEvent::m_endtime
QDateTime m_endtime
Definition: programdata.h:153
EITFixUp::kFixMCA
@ kFixMCA
Definition: eitfixup.h:40
NLMapResult
Definition: eitfixup.cpp:1976
EITFixUp::kFixP7S1
@ kFixP7S1
Definition: eitfixup.h:55
EITFixUp::FixUK
static void FixUK(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:656
dishdescriptors.h
EITFixUp::kFixUnitymedia
@ kFixUnitymedia
Definition: eitfixup.h:57
EITFixUp::FixStripHTML
static void FixStripHTML(DBEventEIT &event)
Use this to clean HTML Tags from EIT Data.
Definition: eitfixup.cpp:2503
ProgramInfo::kCategoryNone
@ kCategoryNone
Definition: programinfo.h:76
kDotAtEnd
static const QRegularExpression kDotAtEnd
Definition: eitfixup.cpp:19
ProgramInfo::kCategorySeries
@ kCategorySeries
Definition: programinfo.h:76
EITFixUp::FixGreekSubtitle
static void FixGreekSubtitle(DBEventEIT &event)
Definition: eitfixup.cpp:2514
EITFixUp::kFixAUNine
@ kFixAUNine
Definition: eitfixup.h:53
DBEvent::m_airdate
uint16_t m_airdate
movie year / production year
Definition: programdata.h:154
DBPerson::kWriter
@ kWriter
Definition: programdata.h:35
EventRating::m_rating
QString m_rating
Definition: programdata.h:79
EITFixUp::FixRTL
static void FixRTL(DBEventEIT &event)
Use this to standardise the RTL group guide in Germany.
Definition: eitfixup.cpp:1606
ChannelUtil::GetDefaultAuthority
static QString GetDefaultAuthority(uint chanid)
Returns the DVB default authority for the chanid given.
Definition: channelutil.cpp:1177
EITFixUp::FixMCA
static void FixMCA(DBEventEIT &event)
Use this to standardise the MultiChoice Africa DVB-S guide.
Definition: eitfixup.cpp:1466
DBPerson::kActor
@ kActor
Definition: programdata.h:31
EITFixUp::kMaxQuestionExclamation
static const uint kMaxQuestionExclamation
Definition: eitfixup.h:21
EITFixUp::FixUnitymedia
static void FixUnitymedia(DBEventEIT &event)
Definition: eitfixup.cpp:3008
EITFixUp::FixPRO7
static void FixPRO7(DBEventEIT &event)
Use this to standardise the PRO7/Sat1 group guide in Germany.
Definition: eitfixup.cpp:1743