MythTV  master
eitfixup.cpp
Go to the documentation of this file.
1 // C++ headers
2 #include <algorithm>
3 #include <array>
4 
5 // MythTV headers
6 #include "eitfixup.h"
7 #include "programinfo.h" // for CategoryType
8 #include "channelutil.h" // for GetDefaultAuthority()
9 
10 #include "programinfo.h" // for subtitle types and audio and video properties
11 #include "dishdescriptors.h" // for dish_theme_type_to_string
12 #include "mythlogging.h"
13 
14 /*------------------------------------------------------------------------
15  * Event Fix Up Scripts - Turned on by entry in dtv_privatetype table
16  *------------------------------------------------------------------------*/
17 
18 static const QRegularExpression kStereo { R"(\b\(?[sS]tereo\)?\b)" };
19 static const QRegularExpression kUKSpaceColonStart { R"(^[ |:]*)" };
20 
21 #if QT_VERSION < QT_VERSION_CHECK(5,15,2)
22 #define capturedView capturedRef
23 #endif
24 
25 static const QMap<QChar,quint16> r2v = {
26  {'I' , 1}, {'V' , 5}, {'X' , 10}, {'L' , 50},
27  {'C' , 100}, {'D' , 500}, {'M' , 1000},
28  {QChar(0x399), 1}, // Greek Ι
29 };
30 
31 int EITFixUp::parseRoman (QString roman)
32 {
33  if (roman.isEmpty())
34  return 0;
35 
36  uint result = 0;
37  for (int i = 0; i < roman.size() - 1; i++)
38  {
39  int v1 = r2v[roman.at(i)];
40  int v2 = r2v[roman.at(i+1)];
41  result += (v1 >= v2) ? v1 : -v1;
42  }
43 #if QT_VERSION < QT_VERSION_CHECK(5,10,0)
44  return result + r2v[roman.at(roman.size() - 1)];
45 #else
46  return result + r2v[roman.back()];
47 #endif
48 }
49 
50 
52 {
53  if (event.m_fixup)
54  {
55  if (event.m_subtitle == event.m_title)
56  event.m_subtitle = QString("");
57 
58  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
59  {
60  event.m_description = event.m_subtitle;
61  event.m_subtitle = QString("");
62  }
63  }
64 
65  if (kFixHTML & event.m_fixup)
66  FixStripHTML(event);
67 
68  if (kFixHDTV & event.m_fixup)
69  event.m_videoProps |= VID_HDTV;
70 
71  if (kFixBell & event.m_fixup)
72  FixBellExpressVu(event);
73 
74  if (kFixDish & event.m_fixup)
75  FixBellExpressVu(event);
76 
77  if (kFixUK & event.m_fixup)
78  FixUK(event);
79 
80  if (kFixPBS & event.m_fixup)
81  FixPBS(event);
82 
83  if (kFixComHem & event.m_fixup)
84  FixComHem(event, (kFixSubtitle & event.m_fixup) != 0U);
85 
86  if (kFixAUStar & event.m_fixup)
87  FixAUStar(event);
88 
89  if (kFixAUDescription & event.m_fixup)
90  FixAUDescription(event);
91 
92  if (kFixAUFreeview & event.m_fixup)
93  FixAUFreeview(event);
94 
95  if (kFixAUNine & event.m_fixup)
96  FixAUNine(event);
97 
98  if (kFixAUSeven & event.m_fixup)
99  FixAUSeven(event);
100 
101  if (kFixMCA & event.m_fixup)
102  FixMCA(event);
103 
104  if (kFixRTL & event.m_fixup)
105  FixRTL(event);
106 
107  if (kFixP7S1 & event.m_fixup)
108  FixPRO7(event);
109 
110  if (kFixATV & event.m_fixup)
111  FixATV(event);
112 
113  if (kFixDisneyChannel & event.m_fixup)
114  FixDisneyChannel(event);
115 
116  if (kFixFI & event.m_fixup)
117  FixFI(event);
118 
119  if (kFixPremiere & event.m_fixup)
120  FixPremiere(event);
121 
122  if (kFixNL & event.m_fixup)
123  FixNL(event);
124 
125  if (kFixNO & event.m_fixup)
126  FixNO(event);
127 
128  if (kFixNRK_DVBT & event.m_fixup)
129  FixNRK_DVBT(event);
130 
131  if (kFixDK & event.m_fixup)
132  FixDK(event);
133 
134  if (kFixCategory & event.m_fixup)
135  FixCategory(event);
136 
137  if (kFixGreekSubtitle & event.m_fixup)
138  FixGreekSubtitle(event);
139 
140  if (kFixGreekEIT & event.m_fixup)
141  FixGreekEIT(event);
142 
143  if (kFixGreekCategories & event.m_fixup)
144  FixGreekCategories(event);
145 
146  if (kFixUnitymedia & event.m_fixup)
147  FixUnitymedia(event);
148 
149  // Clean up text strings after all fixups have been applied.
150  if (event.m_fixup)
151  {
152  static const QRegularExpression emptyParens { R"(\(\s*\))" };
153  if (!event.m_title.isEmpty())
154  {
155  event.m_title.remove(QChar('\0')).remove(emptyParens);
156  event.m_title = event.m_title.simplified();
157  }
158 
159  if (!event.m_subtitle.isEmpty())
160  {
161  event.m_subtitle.remove(QChar('\0'));
162  event.m_subtitle.remove(emptyParens);
163  event.m_subtitle = event.m_subtitle.simplified();
164  }
165 
166  if (!event.m_description.isEmpty())
167  {
168  event.m_description.remove(QChar('\0'));
169  event.m_description.remove(emptyParens);
170  event.m_description = event.m_description.simplified();
171  }
172  }
173 
174  if (kFixGenericDVB & event.m_fixup)
175  {
176  event.m_programId = AddDVBEITAuthority(event.m_chanid, event.m_programId);
177  event.m_seriesId = AddDVBEITAuthority(event.m_chanid, event.m_seriesId);
178  }
179 
180  // Are any items left unhandled? report them to allow fixups improvements
181  if (!event.m_items.empty())
182  {
183  for (auto i = event.m_items.begin(); i != event.m_items.end(); ++i)
184  {
185  LOG(VB_EIT, LOG_DEBUG, QString("Unhandled item in EIT for"
186  " channel id \"%1\", \"%2\": %3").arg(event.m_chanid)
187  .arg(i.key(), i.value()));
188  }
189  }
190 }
191 
207 QString EITFixUp::AddDVBEITAuthority(uint chanid, const QString &id)
208 {
209  if (id.isEmpty())
210  return id;
211 
212  // CRIDs are not case sensitive, so change all to lower case
213  QString crid = id.toLower();
214 
215  // remove "crid://"
216  if (crid.startsWith("crid://"))
217  crid.remove(0,7);
218 
219  // if id is a CRID with authority, return it
220  if (crid.length() >= 1 && crid[0] != '/')
221  return crid;
222 
223  QString authority = ChannelUtil::GetDefaultAuthority(chanid);
224  if (authority.isEmpty())
225  return ""; // no authority, not a valid CRID, return empty
226 
227  return authority + crid;
228 }
229 
235 {
236  // A 0x0D character is present between the content
237  // and the subtitle if its present
238  int position = event.m_description.indexOf('\r');
239 
240  if (position != -1)
241  {
242  // Subtitle present in the title, so get
243  // it and adjust the description
244  event.m_subtitle = event.m_description.left(position);
245  event.m_description = event.m_description.right(
246  event.m_description.length() - position - 2);
247  }
248 
249  // Take out the content description which is
250  // always next with a period after it
251  position = event.m_description.indexOf(".");
252  // Make sure they didn't leave it out and
253  // you come up with an odd category
254  if (position < 10)
255  {
256  }
257  else
258  {
259  event.m_category = "Unknown";
260  }
261 
262  // If the content descriptor didn't come up with anything, try parsing the category
263  // out of the description.
264  if (event.m_category.isEmpty())
265  {
266  // Take out the content description which is
267  // always next with a period after it
268  position = event.m_description.indexOf(".");
269  if ((position + 1) < event.m_description.length())
270  position = event.m_description.indexOf(". ");
271  // Make sure they didn't leave it out and
272  // you come up with an odd category
273  if ((position > -1) && position < 20)
274  {
275  const QString stmp = event.m_description;
276  event.m_description = stmp.right(stmp.length() - position - 2);
277  event.m_category = stmp.left(position);
278 
279  int position_p = event.m_category.indexOf("(");
280  if (position_p == -1)
281  event.m_description = stmp.right(stmp.length() - position - 2);
282  else
283  event.m_category = "Unknown";
284  }
285  else
286  {
287  event.m_category = "Unknown";
288  }
289 
290  // When a channel is off air the category is "-"
291  // so leave the category as blank
292  if (event.m_category == "-")
293  event.m_category = "OffAir";
294 
295  if (event.m_category.length() > 20)
296  event.m_category = "Unknown";
297  }
298  else if (event.m_categoryType)
299  {
300  QString theme = dish_theme_type_to_string(event.m_categoryType);
301  event.m_description = event.m_description.replace(theme, "");
302  if (event.m_description.startsWith("."))
303  event.m_description = event.m_description.right(event.m_description.length() - 1);
304  if (event.m_description.startsWith(" "))
305  event.m_description = event.m_description.right(event.m_description.length() - 1);
306  }
307 
308  // See if a year is present as (xxxx)
309  static const QRegularExpression bellYear { R"(\([0-9]{4}\))" };
310  position = event.m_description.indexOf(bellYear);
311  if (position != -1 && !event.m_category.isEmpty())
312  {
313  // Parse out the year
314  bool ok = false;
315  uint y = event.m_description.mid(position + 1, 4).toUInt(&ok);
316  if (ok)
317  {
318  event.m_originalairdate = QDate(y, 1, 1);
319  event.m_airdate = y;
320  event.m_previouslyshown = true;
321  }
322 
323  // Get the actors if they exist
324  if (position > 3)
325  {
326  static const QRegularExpression bellActors { R"(\set\s|,)" };
327  QString tmp = event.m_description.left(position-3);
328 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
329  QStringList actors =
330  tmp.split(bellActors, QString::SkipEmptyParts);
331 #else
332  QStringList actors =
333  tmp.split(bellActors, Qt::SkipEmptyParts);
334 #endif
335 
336  /* Possible TODO: if EIT inlcude the priority and/or character
337  * names for the actors, include them in AddPerson call. */
338  for (const auto & actor : qAsConst(actors))
339  event.AddPerson(DBPerson::kActor, actor);
340  }
341  // Remove the year and actors from the description
342  event.m_description = event.m_description.right(
343  event.m_description.length() - position - 7);
344  }
345 
346  // Check for (CC) in the decription and
347  // set the <subtitles type="teletext"> flag
348  position = event.m_description.indexOf("(CC)");
349  if (position != -1)
350  {
351  event.m_subtitleType |= SUB_HARDHEAR;
352  event.m_description = event.m_description.replace("(CC)", "");
353  }
354 
355  // Check for (Stereo) in the decription and set the <audio> tags
356  auto match = kStereo.match(event.m_description);
357  if (match.hasMatch())
358  {
359  event.m_audioProps |= AUD_STEREO;
360  event.m_description.remove(match.capturedStart(0),
361  match.capturedLength(0));
362  }
363 
364  // Check for "title (All Day, HD)" in the title
365  static const QRegularExpression bellPPVTitleAllDayHD { R"(\s*\(All Day\, HD\)\s*$)" };
366  match = bellPPVTitleAllDayHD.match(event.m_title);
367  if (match.hasMatch())
368  {
369  event.m_title.remove(match.capturedStart(), match.capturedLength());
370  event.m_videoProps |= VID_HDTV;
371  }
372 
373  // Check for "title (All Day)" in the title
374  static const QRegularExpression bellPPVTitleAllDay { R"(\s*\(All Day.*\)\s*$)" };
375  match = bellPPVTitleAllDay.match(event.m_title);
376  if (match.hasMatch())
377  event.m_title.remove(match.capturedStart(), match.capturedLength());
378 
379  // Check for "HD - title" in the title
380  static const QRegularExpression bellPPVTitleHD { R"(^HD\s?-\s?)" };
381  match = bellPPVTitleHD.match(event.m_title);
382  if (match.hasMatch())
383  {
384  event.m_title.remove(match.capturedStart(), match.capturedLength());
385  event.m_videoProps |= VID_HDTV;
386  }
387 
388  // Check for (HD) in the decription
389  position = event.m_description.indexOf("(HD)");
390  if (position != -1)
391  {
392  event.m_description = event.m_description.replace("(HD)", "");
393  event.m_videoProps |= VID_HDTV;
394  }
395 
396  // Check for (HD) in the title
397  position = event.m_title.indexOf("(HD)");
398  if (position != -1)
399  {
400  event.m_title = event.m_title.replace("(HD)", "");
401  event.m_videoProps |= VID_HDTV;
402  }
403 
404  // Check for HD at the end of the title
405  static const QRegularExpression dishPPVTitleHD { R"(\sHD\s*$)" };
406  match = dishPPVTitleHD.match(event.m_title);
407  if (match.hasMatch())
408  {
409  event.m_title.remove(match.capturedStart(), match.capturedLength());
410  event.m_videoProps |= VID_HDTV;
411  }
412 
413  // Check for (DD) at the end of the description
414  position = event.m_description.indexOf("(DD)");
415  if (position != -1)
416  {
417  event.m_description = event.m_description.replace("(DD)", "");
418  event.m_audioProps |= AUD_DOLBY;
419  event.m_audioProps |= AUD_STEREO;
420  }
421 
422  // Remove SAP from Dish descriptions
423  position = event.m_description.indexOf("(SAP)");
424  if (position != -1)
425  {
426  event.m_description = event.m_description.replace("(SAP", "");
427  event.m_subtitleType |= SUB_HARDHEAR;
428  }
429 
430  // Remove any trailing colon in title
431  static const QRegularExpression dishPPVTitleColon { R"(\:\s*$)" };
432  match = dishPPVTitleColon.match(event.m_title);
433  if (match.hasMatch())
434  event.m_title.remove(match.capturedStart(), match.capturedLength());
435 
436  // Remove New at the end of the description
437  static const QRegularExpression dishDescriptionNew { R"(\s*New\.\s*)" };
438  match = dishDescriptionNew.match(event.m_description);
439  if (match.hasMatch())
440  {
441  event.m_previouslyshown = false;
442  event.m_description.remove(match.capturedStart(), match.capturedLength());
443  }
444 
445  // Remove Series Finale at the end of the desciption
446  static const QRegularExpression dishDescriptionFinale { R"(\s*(Series|Season)\sFinale\.\s*)" };
447  match = dishDescriptionFinale.match(event.m_description);
448  if (match.hasMatch())
449  {
450  event.m_previouslyshown = false;
451  event.m_description.remove(match.capturedStart(), match.capturedLength());
452  }
453 
454  // Remove Series Finale at the end of the desciption
455  static const QRegularExpression dishDescriptionFinale2 { R"(\s*Finale\.\s*)" };
456  match = dishDescriptionFinale2.match(event.m_description);
457  if (match.hasMatch())
458  {
459  event.m_previouslyshown = false;
460  event.m_description.remove(match.capturedStart(), match.capturedLength());
461  }
462 
463  // Remove Series Premiere at the end of the description
464  static const QRegularExpression dishDescriptionPremiere { R"(\s*(Series|Season)\s(Premier|Premiere)\.\s*)" };
465  match = dishDescriptionPremiere.match(event.m_description);
466  if (match.hasMatch())
467  {
468  event.m_previouslyshown = false;
469  event.m_description.remove(match.capturedStart(), match.capturedLength());
470  }
471 
472  // Remove Series Premiere at the end of the description
473  static const QRegularExpression dishDescriptionPremiere2 { R"(\s*(Premier|Premiere)\.\s*)" };
474  match = dishDescriptionPremiere2.match(event.m_description);
475  if (match.hasMatch())
476  {
477  event.m_previouslyshown = false;
478  event.m_description.remove(match.capturedStart(), match.capturedLength());
479  }
480 
481  // Remove Dish's PPV code at the end of the description
482  static const QRegularExpression ppvcode { R"(\s*\(([A-Z]|[0-9]){5}\)\s*$)",
483  QRegularExpression::CaseInsensitiveOption };
484  match = ppvcode.match(event.m_description);
485  if (match.hasMatch())
486  event.m_description.remove(match.capturedStart(), match.capturedLength());
487 
488  // Remove trailing garbage
489  static const QRegularExpression dishPPVSpacePerenEnd { R"(\s\)\s*$)" };
490  match = dishPPVSpacePerenEnd.match(event.m_description);
491  if (match.hasMatch())
492  event.m_description.remove(match.capturedStart(), match.capturedLength());
493 
494  // Check for subtitle "All Day (... Eastern)" in the subtitle
495  static const QRegularExpression bellPPVSubtitleAllDay { R"(^All Day \(.*\sEastern\)\s*$)" };
496  match = bellPPVSubtitleAllDay.match(event.m_subtitle);
497  if (match.hasMatch())
498  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
499 
500  // Check for description "(... Eastern)" in the description
501  static const QRegularExpression bellPPVDescriptionAllDay { R"(^\(.*\sEastern\))" };
502  match = bellPPVDescriptionAllDay.match(event.m_description);
503  if (match.hasMatch())
504  event.m_description.remove(match.capturedStart(), match.capturedLength());
505 
506  // Check for description "(... ET)" in the description
507  static const QRegularExpression bellPPVDescriptionAllDay2 { R"(^\([0-9].*am-[0-9].*am\sET\))" };
508  match = bellPPVDescriptionAllDay2.match(event.m_description);
509  if (match.hasMatch())
510  event.m_description.remove(match.capturedStart(), match.capturedLength());
511 
512  // Check for description "(nnnnn)" in the description
513  static const QRegularExpression bellPPVDescriptionEventId { R"(\([0-9]{5}\))" };
514  match = bellPPVDescriptionEventId.match(event.m_description);
515  if (match.hasMatch())
516  event.m_description.remove(match.capturedStart(), match.capturedLength());
517 }
518 
523 {
524  QStringList strListColon = event.m_description.split(":");
525  QStringList strListEnd;
526 
527  bool fColon = false;
528  bool fQuotedSubtitle = false;
529  QString strEnd;
530  if (strListColon.count()>1)
531  {
532  bool fDoubleDot = false;
533  bool fSingleDot = true;
534  int nLength = strListColon[0].length();
535 
536  int nPosition1 = event.m_description.indexOf("..");
537  if ((nPosition1 < nLength) && (nPosition1 >= 0))
538  fDoubleDot = true;
539  nPosition1 = event.m_description.indexOf(".");
540  if (nPosition1==-1)
541  fSingleDot = false;
542  if (nPosition1 > nLength)
543  fSingleDot = false;
544  else
545  {
546  QString strTmp = event.m_description.mid(nPosition1+1,
547  nLength-nPosition1);
548 
549  QStringList tmp = strTmp.split(" ");
550  if (((uint) tmp.size()) < kMaxDotToColon)
551  fSingleDot = false;
552  }
553 
554  if (fDoubleDot)
555  {
556  strListEnd = strListColon;
557  fColon = true;
558  }
559  else if (!fSingleDot)
560  {
561  QStringList strListTmp;
562  uint nTitle=0;
563  int nTitleMax=-1;
564  for (int i =0; (i<strListColon.count()) && (nTitleMax==-1);i++)
565  {
566  const QStringList tmp = strListColon[i].split(" ");
567 
568  nTitle += tmp.size();
569 
570  if (nTitle < kMaxToTitle)
571  strListTmp.push_back(strListColon[i]);
572  else
573  nTitleMax=i;
574  }
575  QString strPartial;
576  for (int i=0;i<(nTitleMax-1);i++)
577  strPartial+=strListTmp[i]+":";
578  if (nTitleMax>0)
579  {
580  strPartial+=strListTmp[nTitleMax-1];
581  strListEnd.push_back(strPartial);
582  }
583  for (int i=nTitleMax+1;i<strListColon.count();i++)
584  strListEnd.push_back(strListColon[i]);
585  fColon = true;
586  }
587  }
588  static const QRegularExpression ukQuotedSubtitle { R"(^'([\w\s\-,]+?)\.' )" };
589  auto match = ukQuotedSubtitle.match(event.m_description);
590  if (match.hasMatch())
591  {
592  event.m_subtitle = match.captured(1);
593  event.m_description.remove(match.capturedStart(0),
594  match.capturedLength(0));
595  fQuotedSubtitle = true;
596  }
597  QStringList strListPeriod;
598  QStringList strListQuestion;
599  QStringList strListExcl;
600  if (!(fColon || fQuotedSubtitle))
601  {
602  strListPeriod = event.m_description.split(".");
603  if (strListPeriod.count() >1)
604  {
605  int nPosition1 = event.m_description.indexOf(".");
606  int nPosition2 = event.m_description.indexOf("..");
607  if ((nPosition1 < nPosition2) || (nPosition2==-1))
608  strListEnd = strListPeriod;
609  }
610 
611  strListQuestion = event.m_description.split("?");
612  strListExcl = event.m_description.split("!");
613  if ((strListQuestion.size() > 1) &&
614  ((uint)strListQuestion.size() <= kMaxQuestionExclamation))
615  {
616  strListEnd = strListQuestion;
617  strEnd = "?";
618  }
619  else if ((strListExcl.size() > 1) &&
620  ((uint)strListExcl.size() <= kMaxQuestionExclamation))
621  {
622  strListEnd = strListExcl;
623  strEnd = "!";
624  }
625  else
626  strEnd.clear();
627  }
628 
629  if (!strListEnd.empty())
630  {
631 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
632  QStringList strListSpace = strListEnd[0].split(
633  " ", QString::SkipEmptyParts);
634 #else
635  QStringList strListSpace = strListEnd[0].split(
636  " ", Qt::SkipEmptyParts);
637 #endif
638  if (fColon && ((uint)strListSpace.size() > kMaxToTitle))
639  return;
640  if ((uint)strListSpace.size() > kDotToTitle)
641  return;
642  static const QRegularExpression ukExclusionFromSubtitle {
643  "(starring|stars\\s|drama|seres|sitcom)",
644  QRegularExpression::CaseInsensitiveOption };
645  if (strListSpace.filter(ukExclusionFromSubtitle).empty())
646  {
647  event.m_subtitle = strListEnd[0]+strEnd;
648  event.m_subtitle.remove(kUKSpaceColonStart);
649  event.m_description=
650  event.m_description.mid(strListEnd[0].length()+1);
651  event.m_description.remove(kUKSpaceColonStart);
652  }
653  }
654 }
655 
656 
661 {
662  static const QRegularExpression uk24ep { R"(^\d{1,2}:00[ap]m to \d{1,2}:00[ap]m: )" };
663  static const QRegularExpression ukTime { R"(\d{1,2}[\.:]\d{1,2}\s*(am|pm|))" };
664  QString strFull;
665 
666  bool isMovie = event.m_category.startsWith("Movie",Qt::CaseInsensitive) ||
667  event.m_category.startsWith("Film",Qt::CaseInsensitive);
668  // BBC three case (could add another record here ?)
669  static const QRegularExpression ukThen { R"(\s*?(Then|Followed by) 60 Seconds\.)",
670  QRegularExpression::CaseInsensitiveOption };
671  static const QRegularExpression ukNew { R"((New\.|\s*?(Brand New|New)\s*?(Series|Episode)\s*?[:\.\-]))",
672  QRegularExpression::CaseInsensitiveOption };
673  static const QRegularExpression ukNewTitle { R"(^(Brand New|New:)\s*)",
674  QRegularExpression::CaseInsensitiveOption };
675  event.m_description = event.m_description.remove(ukThen);
676  event.m_description = event.m_description.remove(ukNew);
677  event.m_title = event.m_title.remove(ukNewTitle);
678 
679  // Removal of Class TV, CBBC and CBeebies etc..
680  static const QRegularExpression ukTitleRemove { "^(?:[tT]4:|Schools\\s*?:)" };
681  static const QRegularExpression ukDescriptionRemove { R"(^(?:CBBC\s*?\.|CBeebies\s*?\.|Class TV\s*?:|BBC Switch\.))" };
682  event.m_title = event.m_title.remove(ukTitleRemove);
683  event.m_description = event.m_description.remove(ukDescriptionRemove);
684 
685  // Removal of BBC FOUR and BBC THREE
686  static const QRegularExpression ukBBC34 { R"(BBC (?:THREE|FOUR) on BBC (?:ONE|TWO)\.)",
687  QRegularExpression::CaseInsensitiveOption };
688  event.m_description = event.m_description.remove(ukBBC34);
689 
690  // BBC 7 [Rpt of ...] case.
691  static const QRegularExpression ukBBC7rpt { R"(\[Rptd?[^]]+?\d{1,2}\.\d{1,2}[ap]m\]\.)" };
692  event.m_description = event.m_description.remove(ukBBC7rpt);
693 
694  // "All New To 4Music!
695  static const QRegularExpression ukAllNew { R"(All New To 4Music!\s?)" };
696  event.m_description = event.m_description.remove(ukAllNew);
697 
698  // Removal of 'Also in HD' text
699  static const QRegularExpression ukAlsoInHD { R"(\s*Also in HD\.)",
700  QRegularExpression::CaseInsensitiveOption };
701  event.m_description = event.m_description.remove(ukAlsoInHD);
702 
703  // Remove [AD,S] etc.
704  static const QRegularExpression ukCC { R"(\[(?:(AD|SL|S|W|HD),?)+\])" };
705  auto match = ukCC.match(event.m_description);
706  while (match.hasMatch())
707  {
708  QStringList tmpCCitems = match.captured(0).remove("[").remove("]").split(",");
709  if (tmpCCitems.contains("AD"))
710  event.m_audioProps |= AUD_VISUALIMPAIR;
711  if (tmpCCitems.contains("HD"))
712  event.m_videoProps |= VID_HDTV;
713  if (tmpCCitems.contains("S"))
714  event.m_subtitleType |= SUB_NORMAL;
715  if (tmpCCitems.contains("SL"))
716  event.m_subtitleType |= SUB_SIGNED;
717  if (tmpCCitems.contains("W"))
718  event.m_videoProps |= VID_WIDESCREEN;
719  event.m_description.remove(match.capturedStart(0),
720  match.capturedLength(0));
721  match = ukCC.match(event.m_description, match.capturedStart(0));
722  }
723 
724  event.m_title = event.m_title.trimmed();
725  event.m_description = event.m_description.trimmed();
726 
727  // Constituents of UK season regexp, decomposed for clarity
728 
729  // Matches Season 2, S 2 and "Series 2," etc but not "hits 2"
730  // cap1 = season
731  static const QString seasonStr = R"(\b(?:Season|Series|S)\s*(\d+)\s*,?)";
732 
733  // Work out the season and episode numbers (if any)
734  // Matching pattern "Season 2 Episode|Ep 3 of 14|3/14" etc
735 
736  // Matches Episode 3, Ep 3/4, Ep 3 of 4 etc but not "step 1"
737  // cap1 = ep, cap2 = total
738  static const QString longEp = R"(\b(?:Ep|Episode)\s*(\d+)\s*(?:(?:/|of)\s*(\d*))?)";
739 
740  // Matches S2 Ep 3/4, "Season 2, Ep 3 of 4", Episode 3 etc
741  // cap1 = season, cap2 = ep, cap3 = total
742  static const QString longSeasEp = QString("\\(?(?:%1)?\\s*%2").arg(seasonStr, longEp);
743 
744  // Matches long seas/ep with surrounding parenthesis & trailing period
745  // cap1 = season, cap2 = ep, cap3 = total
746  static const QString longContext = QString(R"(\(*%1\s*\)?\s*\.?)").arg(longSeasEp);
747 
748  // Matches 3/4, 3 of 4
749  // cap1 = ep, cap2 = total
750  static const QString shortEp = R"((\d+)\s*(?:/|of)\s*(\d+))";
751 
752  // Matches short ep/total, ignoring Parts and idioms such as 9/11, 24/7 etc.
753  // ie. x/y in parenthesis or has no leading or trailing text in the sentence.
754  // cap0 may include previous/anchoring period
755  // cap1 = shortEp with surrounding parenthesis & trailing period (to remove)
756  // cap2 = ep, cap3 = total,
757  static const QString shortContext =
758  QString(R"((?:^|\.)(\s*\(*\s*%1[\s)]*(?:[).:]|$)))").arg(shortEp);
759 
760  // Prefer long format resorting to short format
761  // cap0 = long match to remove, cap1 = long season, cap2 = long ep, cap3 = long total,
762  // cap4 = short match to remove, cap5 = short ep, cap6 = short total
763  static const QRegularExpression ukSeries { "(?:" + longContext + "|" + shortContext + ")",
764  QRegularExpression::CaseInsensitiveOption };
765 
766  bool series = false;
767  bool fromTitle = true;
768  match = ukSeries.match(event.m_title);
769  if (!match.hasMatch())
770  {
771  fromTitle = false;
772  match = ukSeries.match(event.m_description);
773  }
774  if (match.hasMatch())
775  {
776  if (!match.captured(1).isEmpty())
777  {
778  event.m_season = match.captured(1).toUInt();
779  series = true;
780  }
781 
782  if (!match.captured(2).isEmpty())
783  {
784  event.m_episode = match.captured(2).toUInt();
785  series = true;
786  }
787  else if (!match.captured(5).isEmpty())
788  {
789  event.m_episode = match.captured(5).toUInt();
790  series = true;
791  }
792 
793  if (!match.captured(3).isEmpty())
794  {
795  event.m_totalepisodes = match.captured(3).toUInt();
796  series = true;
797  }
798  else if (!match.captured(6).isEmpty())
799  {
800  event.m_totalepisodes = match.captured(6).toUInt();
801  series = true;
802  }
803 
804  // Remove long or short match. Short text doesn't start at position2
805  int form = match.captured(4).isEmpty() ? 0 : 4;
806 
807  if (fromTitle)
808  {
809  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from title (%4) \"%5\"")
810  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
811  .arg(event.m_title, event.m_description));
812 
813  event.m_title.remove(match.capturedStart(form),
814  match.capturedLength(form));
815  }
816  else
817  {
818  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from description (%4) \"%5\"")
819  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
820  .arg(event.m_title, event.m_description));
821 
822  if (match.capturedStart(form) == 0)
823  {
824  // Remove from the start of the description.
825  // Otherwise it ends up in the subtitle.
826  event.m_description.remove(match.capturedStart(form),
827  match.capturedLength(form));
828  }
829  }
830  }
831 
832  if (isMovie)
833  event.m_categoryType = ProgramInfo::kCategoryMovie;
834  else if (series)
835  event.m_categoryType = ProgramInfo::kCategorySeries;
836 
837  // Multi-part episodes, or films (e.g. ITV film split by news)
838  // Matches Part 1, Pt 1/2, Part 1 of 2 etc.
839  static const QRegularExpression ukPart { R"([-(\:,.]\s*(?:Part|Pt)\s*(\d+)\s*(?:(?:of|/)\s*(\d+))?\s*[-):,.])",
840  QRegularExpression::CaseInsensitiveOption };
841  match = ukPart.match(event.m_title);
842  auto match2 = ukPart.match(event.m_description);
843  if (match.hasMatch())
844  {
845  event.m_partnumber = match.captured(1).toUInt();
846  event.m_parttotal = match.captured(2).toUInt();
847 
848  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from title (%3)")
849  .arg(event.m_partnumber).arg(event.m_parttotal).arg(event.m_title));
850 
851  // Remove from the title
852  event.m_title.remove(match.capturedStart(0),
853  match.capturedLength(0));
854  }
855  else if (match2.hasMatch())
856  {
857  event.m_partnumber = match2.captured(1).toUInt();
858  event.m_parttotal = match2.captured(2).toUInt();
859 
860  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from description (%3) \"%4\"")
861  .arg(event.m_partnumber).arg(event.m_parttotal)
862  .arg(event.m_title, event.m_description));
863 
864  // Remove from the start of the description.
865  // Otherwise it ends up in the subtitle.
866  if (match2.capturedStart(0) == 0)
867  {
868  // Retain a single colon (subtitle separator) if we remove any
869  QString sub = match2.captured(0).contains(":") ? ":" : "";
870  event.m_description = event.m_description.replace(match2.captured(0), sub);
871  }
872  }
873 
874  static const QRegularExpression ukStarring { R"((?:Western\s)?[Ss]tarring ([\w\s\-']+?)[Aa]nd\s([\w\s\-']+?)[\.|,]\s*(\d{4})?(?:\.\s)?)" };
875  match = ukStarring.match(event.m_description);
876  if (match.hasMatch())
877  {
878  // if we match this we've captured 2 actors and an (optional) airdate
879  /* Possible TODO: if EIT inlcude the priority and/or character
880  * names for the actors, include them in AddPerson call. */
881  event.AddPerson(DBPerson::kActor, match.captured(1));
882  event.AddPerson(DBPerson::kActor, match.captured(2));
883  if (match.captured(3).length() > 0)
884  {
885  bool ok = false;
886  uint y = match.captured(3).toUInt(&ok);
887  if (ok)
888  {
889  event.m_airdate = y;
890  event.m_originalairdate = QDate(y, 1, 1);
891  }
892  }
893  }
894 
895  static const QRegularExpression ukLaONoSplit { "^Law & Order: (?:Criminal Intent|LA|"
896  "Special Victims Unit|Trial by Jury|UK|You the Jury)" };
897  if (!event.m_title.startsWith("CSI:") && !event.m_title.startsWith("CD:") &&
898  !event.m_title.contains(ukLaONoSplit) &&
899  !event.m_title.startsWith("Mission: Impossible"))
900  {
901  static const QRegularExpression ukDoubleDotStart { R"(^\.\.+)" };
902  static const QRegularExpression ukDoubleDotEnd { R"(\.\.+$)" };
903  if ((event.m_title.indexOf(ukDoubleDotEnd) != -1) &&
904  (event.m_description.indexOf(ukDoubleDotStart) != -1))
905  {
906  QString strPart=event.m_title.remove(ukDoubleDotEnd)+" ";
907  strFull = strPart + event.m_description.remove(ukDoubleDotStart);
908  int position1 = -1;
909  static const QRegularExpression ukCEPQ { R"([:\!\.\?]\s)" };
910  static const QRegularExpression ukSpaceStart { "^ " };
911  if (isMovie &&
912  ((position1 = strFull.indexOf(ukCEPQ,strPart.length())) != -1))
913  {
914  if (strFull[position1] == '!' || strFull[position1] == '?'
915  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
916  position1++;
917  event.m_title = strFull.left(position1);
918  event.m_description = strFull.mid(position1 + 1);
919  event.m_description.remove(ukSpaceStart);
920  }
921  else if ((position1 = strFull.indexOf(ukCEPQ)) != -1)
922  {
923  if (strFull[position1] == '!' || strFull[position1] == '?'
924  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
925  position1++;
926  event.m_title = strFull.left(position1);
927  event.m_description = strFull.mid(position1 + 1);
928  event.m_description.remove(ukSpaceStart);
929  SetUKSubtitle(event);
930  }
931  }
932  else if (event.m_description.indexOf(uk24ep) != -1)
933  {
934  auto match24 = uk24ep.match(event.m_description);
935  if (match24.hasMatch())
936  {
937  // Special case for episodes of 24.
938  // -2 from the length cause we don't want ": " on the end
939  event.m_subtitle = event.m_description.mid(match24.capturedStart(0),
940  match24.captured(0).length() - 2);
941  event.m_description = event.m_description.remove(match24.captured(0));
942  }
943  }
944  else if (event.m_description.indexOf(ukTime) == -1)
945  {
946  static const QRegularExpression ukYearColon { R"(^[\d]{4}:)" };
947  if (!isMovie && (event.m_title.indexOf(ukYearColon) < 0))
948  {
949  int position1 = -1;
950  if (((position1 = event.m_title.indexOf(":")) != -1) &&
951  (event.m_description.indexOf(":") < 0 ))
952  {
953  static const QRegularExpression ukCompleteDots { R"(^\.\.+$)" };
954  if (event.m_title.mid(position1+1).indexOf(ukCompleteDots)==0)
955  {
956  SetUKSubtitle(event);
957  QString strTmp = event.m_title.mid(position1+1);
958  event.m_title.resize(position1);
959  event.m_subtitle = strTmp+event.m_subtitle;
960  }
961  else if ((uint)position1 < kSubtitleMaxLen)
962  {
963  event.m_subtitle = event.m_title.mid(position1 + 1);
964  event.m_title = event.m_title.left(position1);
965  }
966  }
967  else
968  SetUKSubtitle(event);
969  }
970  }
971  }
972 
973  if (!isMovie && event.m_subtitle.isEmpty() &&
974  !event.m_title.startsWith("The X-Files"))
975  {
976  int position1 = -1;
977  if ((position1=event.m_description.indexOf(ukTime)) != -1)
978  {
979  static const QRegularExpression ukColonPeriod { R"([:\.])" };
980  int position2 = event.m_description.indexOf(ukColonPeriod);
981  if ((position2>=0) && (position2 < (position1-2)))
982  SetUKSubtitle(event);
983  }
984  else if ((position1=event.m_title.indexOf("-")) != -1)
985  {
986  if ((uint)position1 < kSubtitleMaxLen)
987  {
988  event.m_subtitle = event.m_title.mid(position1 + 1);
989  event.m_subtitle.remove(kUKSpaceColonStart);
990  event.m_title = event.m_title.left(position1);
991  }
992  }
993  else
994  SetUKSubtitle(event);
995  }
996 
997  // Work out the year (if any)
998  static const QRegularExpression ukYear { R"([\[\(]([\d]{4})[\)\]])" };
999  match = ukYear.match(event.m_description);
1000  if (match.hasMatch())
1001  {
1002  event.m_description.remove(match.capturedStart(0),
1003  match.capturedLength(0));
1004  bool ok = false;
1005  uint y = match.captured(1).toUInt(&ok);
1006  if (ok)
1007  {
1008  event.m_airdate = y;
1009  event.m_originalairdate = QDate(y, 1, 1);
1010  }
1011  }
1012 
1013  // Trim leading/trailing '.'
1014  static const QRegularExpression ukDotSpaceStart { R"(^\. )" };
1015  static const QRegularExpression ukDotEnd { R"(\.$)" };
1016  event.m_subtitle.remove(ukDotSpaceStart);
1017  if (event.m_subtitle.lastIndexOf("..") != (event.m_subtitle.length()-2))
1018  event.m_subtitle.remove(ukDotEnd);
1019 
1020  // Reverse the subtitle and empty description
1021  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
1022  {
1023  event.m_description=event.m_subtitle;
1024  event.m_subtitle.clear();
1025  }
1026 }
1027 
1032 {
1033  /* Used for PBS ATSC Subtitles are separated by a colon */
1034  int position = event.m_description.indexOf(':');
1035  if (position != -1)
1036  {
1037  const QString stmp = event.m_description;
1038  event.m_subtitle = stmp.left(position);
1039  event.m_description = stmp.right(stmp.length() - position - 2);
1040  }
1041 }
1042 
1046 void EITFixUp::FixComHem(DBEventEIT &event, bool process_subtitle)
1047 {
1048  static const QRegularExpression comHemPersSeparator { R"((, |\soch\s))" };
1049 
1050  // Reverse what EITFixUp::Fix() did
1051  if (event.m_subtitle.isEmpty() && !event.m_description.isEmpty())
1052  {
1053  event.m_subtitle = event.m_description;
1054  event.m_description = "";
1055  }
1056 
1057  // Remove subtitle, it contains the category and we already know that
1058  event.m_subtitle = "";
1059 
1060  bool isSeries = false;
1061  // Try to find episode numbers
1062  static const QRegularExpression comHemSeries1
1063  { R"(\s?(?:[dD]el|[eE]pisode)\s([0-9]+)(?:\s?(?:/|:|av)\s?([0-9]+))?\.)" };
1064  static const QRegularExpression comHemSeries2 { R"(\s?-?\s?([Dd]el\s+([0-9]+)))" };
1065  auto match = comHemSeries1.match(event.m_description);
1066  auto match2 = comHemSeries2.match(event.m_title);
1067  if (match2.hasMatch())
1068  {
1069  event.m_partnumber = match2.capturedView(2).toUInt();
1070  event.m_title.remove(match2.capturedStart(), match2.capturedLength());
1071  }
1072  else if (match.hasMatch())
1073  {
1074  if (match.capturedStart(1) != -1)
1075  event.m_partnumber = match.capturedView(1).toUInt();
1076  if (match.capturedStart(2) != -1)
1077  event.m_parttotal = match.capturedView(2).toUInt();
1078 
1079  // Remove the episode numbers, but only if it's not at the begining
1080  // of the description (subtitle code might use it)
1081  if (match.capturedStart() > 0)
1082  event.m_description.remove(match.capturedStart(),
1083  match.capturedLength());
1084  isSeries = true;
1085  }
1086 
1087  // Add partnumber/parttotal to subtitle
1088  // This will be overwritten if we find a better subtitle
1089  if (event.m_partnumber > 0)
1090  {
1091  event.m_subtitle = QString("Del %1").arg(event.m_partnumber);
1092  if (event.m_parttotal > 0)
1093  event.m_subtitle += QString(" av %1").arg(event.m_parttotal);
1094  }
1095 
1096  // Move subtitle info from title to subtitle
1097  static const QRegularExpression comHemTSub { R"(\s+-\s+([^\-]+))" };
1098  match = comHemTSub.match(event.m_title);
1099  if (match.hasMatch())
1100  {
1101  event.m_subtitle = match.captured(1);
1102  event.m_title.remove(match.capturedStart(), match.capturedLength());
1103  }
1104 
1105  // No need to continue without a description.
1106  if (event.m_description.length() <= 0)
1107  return;
1108 
1109  // Try to find country category, year and possibly other information
1110  // from the begining of the description
1111  static const QRegularExpression comHemCountry
1112  { R"(^(\(.+\))?\s?([^ ]+)\s([^\.0-9]+)\sfrån\s([0-9]{4})(?:\smed\s([^\.]+))?\.?)" };
1113  match = comHemCountry.match(event.m_description);
1114  if (match.hasMatch())
1115  {
1116  QString replacement;
1117 
1118  // Original title, usually english title
1119  // note: list[1] contains extra () around the text that needs removing
1120  if (!match.capturedView(1).isEmpty())
1121  {
1122  replacement = match.captured(1) + " ";
1123  //store it somewhere?
1124  }
1125 
1126  // Countr(y|ies)
1127  if (!match.capturedView(2).isEmpty())
1128  {
1129  replacement += match.captured(2) + " ";
1130  //store it somewhere?
1131  }
1132 
1133  // Category
1134  if (!match.capturedView(3).isEmpty())
1135  {
1136  replacement += match.captured(3) + ".";
1137  if(event.m_category.isEmpty())
1138  {
1139  event.m_category = match.captured(3);
1140  }
1141 
1142  if(match.captured(3).indexOf("serie")!=-1)
1143  {
1144  isSeries = true;
1145  }
1146  }
1147 
1148  // Year
1149  if (!match.capturedView(4).isEmpty())
1150  {
1151  bool ok = false;
1152  uint y = match.capturedView(4).trimmed().toUInt(&ok);
1153  if (ok)
1154  event.m_airdate = y;
1155  }
1156 
1157  // Actors
1158  if (!match.capturedView(5).isEmpty())
1159  {
1160 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1161  const QStringList actors =
1162  match.captured(5).split(comHemPersSeparator, QString::SkipEmptyParts);
1163 #else
1164  const QStringList actors =
1165  match.captured(5).split(comHemPersSeparator, Qt::SkipEmptyParts);
1166 #endif
1167  /* Possible TODO: if EIT inlcude the priority and/or character
1168  * names for the actors, include them in AddPerson call. */
1169  for (const auto & actor : qAsConst(actors))
1170  event.AddPerson(DBPerson::kActor, actor);
1171  }
1172 
1173  // Remove year and actors.
1174  // The reason category is left in the description is because otherwise
1175  // the country would look wierd like "Amerikansk. Rest of description."
1176  event.m_description = event.m_description.replace(match.captured(0),replacement);
1177  }
1178 
1179  if (isSeries)
1180  event.m_categoryType = ProgramInfo::kCategorySeries;
1181 
1182  // Look for additional persons in the description
1183  static const QRegularExpression comHemPersons
1184  { R"(\s?([Rr]egi|[Ss]kådespelare|[Pp]rogramledare|[Ii] rollerna):\s([^\.]+)\.)" };
1185  auto iter = comHemPersons.globalMatch(event.m_description);
1186  while (iter.hasNext())
1187  {
1188  auto pmatch = iter.next();
1190 
1191  static const QRegularExpression comHemDirector { "[Rr]egi" };
1192  static const QRegularExpression comHemActor { "[Ss]kådespelare|[Ii] rollerna" };
1193  static const QRegularExpression comHemHost { "[Pp]rogramledare" };
1194  auto dmatch = comHemDirector.match(pmatch.capturedView(1));
1195  auto amatch = comHemActor.match(pmatch.capturedView(1));
1196  auto hmatch = comHemHost.match(pmatch.capturedView(1));
1197  if (dmatch.hasMatch())
1198  role = DBPerson::kDirector;
1199  else if (amatch.hasMatch())
1200  role = DBPerson::kActor;
1201  else if (hmatch.hasMatch())
1202  role = DBPerson::kHost;
1203  else
1204  {
1205  event.m_description.remove(pmatch.capturedStart(), pmatch.capturedLength());
1206  continue;
1207  }
1208 
1209 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1210  const QStringList actors =
1211  pmatch.captured(2).split(comHemPersSeparator, QString::SkipEmptyParts);
1212 #else
1213  const QStringList actors =
1214  pmatch.captured(2).split(comHemPersSeparator, Qt::SkipEmptyParts);
1215 #endif
1216  /* Possible TODO: if EIT inlcude the priority and/or character
1217  * names for the actors, include them in AddPerson call. */
1218  for (const auto & actor : qAsConst(actors))
1219  event.AddPerson(role, actor);
1220 
1221  // Remove it
1222  event.m_description=event.m_description.replace(pmatch.captured(0),"");
1223  }
1224 
1225  // Is this event on a channel we shoud look for a subtitle?
1226  // The subtitle is the first sentence in the description, but the
1227  // subtitle can't be the only thing in the description and it must be
1228  // shorter than 55 characters or we risk picking up the wrong thing.
1229  if (process_subtitle)
1230  {
1231  static const QRegularExpression comHemSub { R"([.\?\!] )" };
1232  int pos2 = event.m_description.indexOf(comHemSub);
1233  bool pvalid = pos2 != -1 && pos2 <= 55;
1234  if (pvalid && (event.m_description.length() - (pos2 + 2)) > 0)
1235  {
1236  event.m_subtitle = event.m_description.left(
1237  pos2 + (event.m_description[pos2] == '?' ? 1 : 0));
1238  event.m_description = event.m_description.mid(pos2 + 2);
1239  }
1240  }
1241 
1242  // Teletext subtitles?
1243  static const QRegularExpression comHemTT { "[Tt]ext-[Tt][Vv]" };
1244  if (event.m_description.indexOf(comHemTT) != -1)
1245  event.m_subtitleType |= SUB_NORMAL;
1246 
1247  // Try to findout if this is a rerun and if so the date.
1248  static const QRegularExpression comHemRerun1 { R"([Rr]epris\sfrån\s([^\.]+)(?:\.|$))" };
1249  static const QRegularExpression comHemRerun2 { R"(([0-9]+)/([0-9]+)(?:\s-\s([0-9]{4}))?)" };
1250  match = comHemRerun1.match(event.m_description);
1251  if (!match.hasMatch())
1252  return;
1253 
1254  // Rerun from today
1255  if (match.captured(1) == "i dag")
1256  {
1257  event.m_originalairdate = event.m_starttime.date();
1258  return;
1259  }
1260 
1261  // Rerun from yesterday afternoon
1262  if (match.captured(1) == "eftermiddagen")
1263  {
1264  event.m_originalairdate = event.m_starttime.date().addDays(-1);
1265  return;
1266  }
1267 
1268  // Rerun with day, month and possibly year specified
1269  match2 = comHemRerun2.match(match.capturedView(1));
1270  if (match2.hasMatch())
1271  {
1272  int day = match2.capturedView(1).toInt();
1273  int month = match2.capturedView(2).toInt();
1274  //int year;
1275  //if (match2.capturedLength(3) > 0)
1276  // year = match2.capturedView(3).toInt();
1277  //else
1278  // year = event.m_starttime.date().year();
1279 
1280  if (day > 0 && month > 0)
1281  {
1282  QDate date(event.m_starttime.date().year(), month, day);
1283  // it's a rerun so it must be in the past
1284  if (date > event.m_starttime.date())
1285  date = date.addYears(-1);
1286  event.m_originalairdate = date;
1287  }
1288  return;
1289  }
1290 }
1291 
1296 {
1297  event.m_category = event.m_subtitle;
1298  /* Used for DVB-S Subtitles are separated by a colon */
1299  int position = event.m_description.indexOf(':');
1300  if (position != -1)
1301  {
1302  const QString stmp = event.m_description;
1303  event.m_subtitle = stmp.left(position);
1304  event.m_description = stmp.right(stmp.length() - position - 2);
1305  }
1306 }
1307 
1312 {
1313  if (event.m_description.startsWith("[Program data ") || event.m_description.startsWith("[Program info "))//TEN
1314  {
1315  event.m_description = "";//event.m_subtitle;
1316  }
1317  if (event.m_description.endsWith("Copyright West TV Ltd. 2011)"))
1318  event.m_description.resize(event.m_description.length()-40);
1319 
1320  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())//due to ten's copyright info, this won't be caught before
1321  {
1322  event.m_description = event.m_subtitle;
1323  event.m_subtitle.clear();
1324  }
1325  if (event.m_description.startsWith(event.m_title+" - "))
1326  event.m_description.remove(0,event.m_title.length()+3);
1327  if (event.m_title.startsWith("LIVE: ", Qt::CaseInsensitive))
1328  {
1329  event.m_title.remove(0, 6);
1330  event.m_description.prepend("(Live) ");
1331  }
1332 }
1333 
1338 {
1339  static const QRegularExpression rating { "\\((G|PG|M|MA)\\)" };
1340  auto match = rating.match(event.m_description);
1341  if (match.hasMatch())
1342  {
1343  EventRating prograting;
1344  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1345  event.m_ratings.push_back(prograting);
1346  event.m_description.remove(0,match.capturedLength()+1);
1347  }
1348  if (event.m_description.startsWith("[HD]"))
1349  {
1350  event.m_videoProps |= VID_HDTV;
1351  event.m_description.remove(0,5);
1352  }
1353  if (event.m_description.startsWith("[CC]"))
1354  {
1355  event.m_subtitleType |= SUB_NORMAL;
1356  event.m_description.remove(0,5);
1357  }
1358  if (event.m_subtitle == "Movie")
1359  {
1360  event.m_subtitle.clear();
1361  event.m_categoryType = ProgramInfo::kCategoryMovie;
1362  }
1363  if (event.m_description.startsWith(event.m_title))
1364  event.m_description.remove(0,event.m_title.length()+1);
1365 }
1366 
1371 {
1372  if (event.m_description.endsWith(" Rpt"))
1373  {
1374  event.m_previouslyshown = true;
1375  event.m_description.resize(event.m_description.size()-4);
1376  }
1377  static const QRegularExpression year { "(\\d{4})$" };
1378  auto match = year.match(event.m_description);
1379  if (match.hasMatch())
1380  {
1381  event.m_airdate = match.capturedView(1).toUInt();
1382  event.m_description.resize(event.m_description.size()-5);
1383  }
1384  if (event.m_description.endsWith(" CC"))
1385  {
1386  event.m_subtitleType |= SUB_NORMAL;
1387  event.m_description.resize(event.m_description.size()-3);
1388  }
1389  QString advisories;//store the advisories to append later
1390  static const QRegularExpression adv { "(\\([A-Z,]+\\))$" };
1391  match = adv.match(event.m_description);
1392  if (match.hasMatch())
1393  {
1394  advisories = match.captured(1);
1395  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1396  }
1397  static const QRegularExpression rating { "(C|G|PG|M|MA)$" };
1398  match = rating.match(event.m_description);
1399  if (match.hasMatch())
1400  {
1401  EventRating prograting;
1402  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1403  if (!advisories.isEmpty())
1404  prograting.m_rating.append(" ").append(advisories);
1405  event.m_ratings.push_back(prograting);
1406  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1407  }
1408 }
1413 {
1414  // If the description has been truncated to fit within the
1415  // 'subtitle' eit field, none of the following will work (ABC)
1416  if (event.m_description.endsWith(".."))
1417  return;
1418  event.m_description = event.m_description.trimmed();
1419 
1420  static const QRegularExpression auFreeviewSY { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\)$)" };
1421  auto match = auFreeviewSY.match(event.m_description);
1422  if (match.hasMatch())
1423  {
1424  if (event.m_subtitle.isEmpty())//nine sometimes has an actual subtitle field and the brackets thingo)
1425  event.m_subtitle = match.captured(2);
1426  event.m_airdate = match.capturedView(3).toUInt();
1427  event.m_description = match.captured(1);
1428  return;
1429  }
1430  static const QRegularExpression auFreeviewY { "(.*) \\(([12][0-9][0-9][0-9])\\)$" };
1431  match = auFreeviewY.match(event.m_description);
1432  if (match.hasMatch())
1433  {
1434  event.m_airdate = match.capturedView(2).toUInt();
1435  event.m_description = match.captured(1);
1436  return;
1437  }
1438  static const QRegularExpression auFreeviewSYC { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1439  match = auFreeviewSYC.match(event.m_description);
1440  if (match.hasMatch())
1441  {
1442  if (event.m_subtitle.isEmpty())
1443  event.m_subtitle = match.captured(2);
1444  event.m_airdate = match.capturedView(3).toUInt();
1445  QStringList actors = match.captured(4).split("/");
1446  /* Possible TODO: if EIT inlcude the priority and/or character
1447  * names for the actors, include them in AddPerson call. */
1448  for (const QString& actor : qAsConst(actors))
1449  event.AddPerson(DBPerson::kActor, actor);
1450  event.m_description = match.captured(1);
1451  return;
1452  }
1453  static const QRegularExpression auFreeviewYC { R"((.*) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1454  match = auFreeviewYC.match(event.m_description);
1455  if (match.hasMatch())
1456  {
1457  event.m_airdate = match.capturedView(2).toUInt();
1458  QStringList actors = match.captured(3).split("/");
1459  /* Possible TODO: if EIT inlcude the priority and/or character
1460  * names for the actors, include them in AddPerson call. */
1461  for (const QString& actor : qAsConst(actors))
1462  event.AddPerson(DBPerson::kActor, actor);
1463  event.m_description = match.captured(1);
1464  }
1465 }
1466 
1471 {
1472  const uint SUBTITLE_PCT = 60; // % of description to allow subtitle to
1473  const uint lSUBTITLE_MAX_LEN = 128;// max length of subtitle field in db.
1474 
1475  // Remove subtitle, it contains category information too specific to use
1476  event.m_subtitle = QString("");
1477 
1478  // No need to continue without a description.
1479  if (event.m_description.length() <= 0)
1480  return;
1481 
1482  // Replace incomplete title if the full one is in the description
1483  static const QRegularExpression mcaIncompleteTitle { R"((.*).\.\.\.$)" };
1484  auto match = mcaIncompleteTitle.match(event.m_title);
1485  if (match.hasMatch())
1486  {
1487  static const QString mcaCompleteTitlea { "^'?(" };
1488  static const QString mcaCompleteTitleb { R"([^\.\?]+[^\'])'?[\.\?]\s+(.+))" };
1489  static const QRegularExpression mcaCompleteTitle
1490  { mcaCompleteTitlea + match.captured(1) + mcaCompleteTitleb,
1491  QRegularExpression::CaseInsensitiveOption};
1492  match = mcaCompleteTitle.match(event.m_description);
1493  if (match.hasMatch())
1494  {
1495  event.m_title = match.captured(1).trimmed();
1496  event.m_description = match.captured(2).trimmed();
1497  }
1498  }
1499 
1500  // Try to find subtitle in description
1501  static const QRegularExpression mcaSubtitle { R"(^'([^\.]+)'\.\s+(.+))" };
1502  match = mcaSubtitle.match(event.m_description);
1503  if (match.hasMatch())
1504  {
1505  uint matchLen = match.capturedLength(1);
1506  uint evDescLen = std::max(static_cast<int>(event.m_description.length()), 1);
1507 
1508  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1509  ((matchLen * 100 / evDescLen) < SUBTITLE_PCT))
1510  {
1511  event.m_subtitle = match.captured(1);
1512  event.m_description = match.captured(2);
1513  }
1514  }
1515 
1516  // Try to find episode numbers in subtitle
1517  static const QRegularExpression mcaSeries { R"(^S?(\d+)\/E?(\d+)\s-\s(.*)$)" };
1518  match = mcaSeries.match(event.m_subtitle);
1519  if (match.hasMatch())
1520  {
1521  uint season = match.capturedView(1).toUInt();
1522  uint episode = match.capturedView(2).toUInt();
1523  event.m_subtitle = match.captured(3).trimmed();
1524  event.m_syndicatedepisodenumber =
1525  QString("S%1E%2").arg(season).arg(episode);
1526  event.m_season = season;
1527  event.m_episode = episode;
1528  event.m_categoryType = ProgramInfo::kCategorySeries;
1529  }
1530 
1531  // Closed captioned?
1532  static const QRegularExpression mcaCC { R"(,?\s(HI|English) Subtitles\.?)" };
1533  int position = event.m_description.indexOf(mcaCC);
1534  if (position > 0)
1535  {
1536  event.m_subtitleType |= SUB_HARDHEAR;
1537  event.m_description.remove(mcaCC);
1538  }
1539 
1540  // Dolby Digital 5.1?
1541  static const QRegularExpression mcaDD { R"(,?\sDD\.?)" };
1542  position = event.m_description.indexOf(mcaDD);
1543  if ((position > 0) && (position > event.m_description.length() - 7))
1544  {
1545  event.m_audioProps |= AUD_DOLBY;
1546  event.m_description.remove(mcaDD);
1547  }
1548 
1549  // Remove bouquet tags
1550  static const QRegularExpression mcaAvail { R"(\s(Only available on [^\.]*bouquet|Not available in RSA [^\.]*)\.?)" };
1551  event.m_description.remove(mcaAvail);
1552 
1553  // Try to find year and director from the end of the description
1554  bool isMovie = false;
1555  static const QRegularExpression mcaCredits { R"((.*)\s\((\d{4})\)\s*([^\.]+)\.?\s*$)" };
1556  match = mcaCredits.match(event.m_description);
1557  if (match.hasMatch())
1558  {
1559  isMovie = true;
1560  event.m_description = match.captured(1).trimmed();
1561  bool ok = false;
1562  uint y = match.captured(2).trimmed().toUInt(&ok);
1563  if (ok)
1564  event.m_airdate = y;
1565  event.AddPerson(DBPerson::kDirector, match.captured(3).trimmed());
1566  }
1567  else
1568  {
1569  // Try to find year only from the end of the description
1570  static const QRegularExpression mcaYear { R"((.*)\s\((\d{4})\)\s*$)" };
1571  match = mcaYear.match(event.m_description);
1572  if (match.hasMatch())
1573  {
1574  isMovie = true;
1575  event.m_description = match.captured(1).trimmed();
1576  bool ok = false;
1577  uint y = match.captured(2).trimmed().toUInt(&ok);
1578  if (ok)
1579  event.m_airdate = y;
1580  }
1581  }
1582 
1583  if (isMovie)
1584  {
1585  static const QRegularExpression mcaActors { R"((.*\.)\s+([^\.]+\s[A-Z][^\.]+)\.\s*)" };
1586  match = mcaActors.match(event.m_description);
1587  if (match.hasMatch())
1588  {
1589  static const QRegularExpression mcaActorsSeparator { "(,\\s+)" };
1590 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1591  const QStringList actors = match.captured(2).split(
1592  mcaActorsSeparator, QString::SkipEmptyParts);
1593 #else
1594  const QStringList actors = match.captured(2).split(
1595  mcaActorsSeparator, Qt::SkipEmptyParts);
1596 #endif
1597  /* Possible TODO: if EIT inlcude the priority and/or character
1598  * names for the actors, include them in AddPerson call. */
1599  for (const auto & actor : qAsConst(actors))
1600  event.AddPerson(DBPerson::kActor, actor.trimmed());
1601  event.m_description = match.captured(1).trimmed();
1602  }
1603  event.m_categoryType = ProgramInfo::kCategoryMovie;
1604  }
1605 }
1606 
1611 {
1612  // subtitle with episode number: "Folge *: 'subtitle'
1613  static const QRegularExpression superRTLSubtitle { R"(^Folge\s(\d{1,3}):\s'(.*)')" };
1614  auto match = superRTLSubtitle.match(event.m_subtitle);
1615  if (match.hasMatch())
1616  {
1617  event.m_season = 0;
1618  event.m_episode = match.capturedView(1).toUInt();
1619  event.m_subtitle = match.captured(2);
1620  }
1621 
1622  // No need to continue without a description or with an subtitle.
1623  if (event.m_description.length() <= 0 || event.m_subtitle.length() > 0)
1624  return;
1625 
1626  // Repeat
1627  static const QRegularExpression rtlRepeat
1628  { R"([\s\(]?Wiederholung.+vo[m|n].+(\d{2}\.\d{2}\.\d{4}|\d{2}[:\.]\d{2}\sUhr)\)?)" };
1629  match = rtlRepeat.match(event.m_description);
1630  if (match.hasMatch())
1631  {
1632  // remove '.' if it matches at the beginning of the description
1633  int pos = match.capturedStart(0);
1634  int length = match.capturedLength(0) + (pos ? 0 : 1);
1635  event.m_description = event.m_description.remove(pos, length).trimmed();
1636  }
1637 
1638  // should be (?:\x{8a}|\\.\\s*|$) but 0x8A gets replaced with 0x20
1639  static const QRegularExpression rtlSubtitle1 { R"(^Folge\s(\d{1,4})\s*:\s+'(.*)'(?:\s|\.\s*|$))" };
1640  static const QRegularExpression rtlSubtitle2 { R"(^Folge\s(\d{1,4})\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1641  static const QRegularExpression rtlSubtitle3 { R"(^(?:Folge\s)?(\d{1,4}(?:\/[IVX]+)?)\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1642  static const QRegularExpression rtlSubtitle4 { R"(^Thema.{0,5}:\s([^\.]+)\.\s*)" };
1643  static const QRegularExpression rtlSubtitle5 { "^'(.+)'\\.\\s*" };
1644  static const QRegularExpression rtlEpisodeNo1 { R"(^(Folge\s\d{1,4})\.*\s*)" };
1645  static const QRegularExpression rtlEpisodeNo2 { R"(^(\d{1,2}\/[IVX]+)\.*\s*)" };
1646 
1647  auto match1 = rtlSubtitle1.match(event.m_description);
1648  auto match2 = rtlSubtitle2.match(event.m_description);
1649  auto match3 = rtlSubtitle3.match(event.m_description);
1650  auto match4 = rtlSubtitle4.match(event.m_description);
1651  auto match5 = rtlSubtitle5.match(event.m_description);
1652  auto match6 = rtlEpisodeNo1.match(event.m_description);
1653  auto match7 = rtlEpisodeNo2.match(event.m_description);
1654 
1655  // subtitle with episode number: "Folge *: 'subtitle'. description
1656  if (match1.hasMatch())
1657  {
1658  event.m_syndicatedepisodenumber = match1.captured(1);
1659  event.m_subtitle = match1.captured(2);
1660  event.m_description =
1661  event.m_description.remove(0, match1.capturedLength());
1662  }
1663  // episode number subtitle
1664  else if (match2.hasMatch())
1665  {
1666  event.m_syndicatedepisodenumber = match2.captured(1);
1667  event.m_subtitle = match2.captured(2);
1668  event.m_description =
1669  event.m_description.remove(0, match2.capturedLength());
1670  }
1671  // episode number subtitle
1672  else if (match3.hasMatch())
1673  {
1674  event.m_syndicatedepisodenumber = match3.captured(1);
1675  event.m_subtitle = match3.captured(2);
1676  event.m_description =
1677  event.m_description.remove(0, match3.capturedLength());
1678  }
1679  // "Thema..."
1680  else if (match4.hasMatch())
1681  {
1682  event.m_subtitle = match4.captured(1);
1683  event.m_description =
1684  event.m_description.remove(0, match4.capturedLength());
1685  }
1686  // "'...'"
1687  else if (match5.hasMatch())
1688  {
1689  event.m_subtitle = match5.captured(1);
1690  event.m_description =
1691  event.m_description.remove(0, match5.capturedLength());
1692  }
1693  // episode number
1694  else if (match6.hasMatch())
1695  {
1696  event.m_syndicatedepisodenumber = match6.captured(2);
1697  event.m_subtitle = match6.captured(1);
1698  event.m_description =
1699  event.m_description.remove(0, match6.capturedLength());
1700  }
1701  // episode number
1702  else if (match7.hasMatch())
1703  {
1704  event.m_syndicatedepisodenumber = match7.captured(2);
1705  event.m_subtitle = match7.captured(1);
1706  event.m_description =
1707  event.m_description.remove(0, match7.capturedLength());
1708  }
1709 
1710  /* got an episode title now? (we did not have one at the start of this function) */
1711  if (!event.m_subtitle.isEmpty())
1713 
1714  /* if we do not have an episode title by now try some guessing as last resort */
1715  if (event.m_subtitle.length() == 0)
1716  {
1717  const uint SUBTITLE_PCT = 35; // % of description to allow subtitle up to
1718  const uint lSUBTITLE_MAX_LEN = 50; // max length of subtitle field in db
1719 
1720  static const QRegularExpression rtlSubtitle { R"(^([^\.]{3,})\.\s+(.+))" };
1721  match = rtlSubtitle.match(event.m_description);
1722  if (match.hasMatch())
1723  {
1724  uint matchLen = match.capturedLength(1);
1725  uint evDescLen = std::max(static_cast<int>(event.m_description.length()), 1);
1726 
1727  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1728  (matchLen * 100 / evDescLen < SUBTITLE_PCT))
1729  {
1730  event.m_subtitle = match.captured(1);
1731  event.m_description = match.captured(2);
1732  }
1733  }
1734  }
1735 }
1736 
1737 // FIXME add more jobs
1738 static const QMap<QString,DBPerson::Role> deCrewTitle {
1739  { "Regie", DBPerson::kDirector },
1740  { "Drehbuch", DBPerson::kWriter },
1741  { "Autor", DBPerson::kWriter },
1742 };
1743 
1748 {
1749  static const QRegularExpression pro7Subtitle { R"(,{0,1}([^,]*?),([^,]+?)\s{0,1}(\d{4})$)" };
1750  auto match = pro7Subtitle.match(event.m_subtitle);
1751  if (match.hasMatch())
1752  {
1753  if (event.m_airdate == 0)
1754  {
1755  event.m_airdate = match.captured(3).toUInt();
1756  }
1757  event.m_subtitle.remove(match.capturedStart(0),
1758  match.capturedLength(0));
1759  }
1760 
1761  /* handle cast, the very last in description */
1762  static const QRegularExpression pro7Cast { "\n\nDarsteller:\n(.*)$",
1763  QRegularExpression::DotMatchesEverythingOption };
1764  match = pro7Cast.match(event.m_description);
1765  if (match.hasMatch())
1766  {
1767  QStringList cast = match.captured(1).split("\n");
1768  for (const auto& line : qAsConst(cast))
1769  {
1770  static const QRegularExpression pro7CastOne { R"(^([^\(]*?)\((.*)\)$)" };
1771  auto match2 = pro7CastOne.match(line);
1772  if (match2.hasMatch())
1773  {
1774  /* Possible TODO: if EIT inlcude the priority and/or character
1775  * names for the actors, include them in AddPerson call. */
1776  event.AddPerson (DBPerson::kActor, match2.captured(1).simplified());
1777  }
1778  }
1779  event.m_description.remove(match.capturedStart(0),
1780  match.capturedLength(0));
1781  }
1782 
1783  /* handle crew, the new very last in description
1784  * format: "Role: Name" or "Role: Name1, Name2"
1785  */
1786  static const QRegularExpression pro7Crew { "\n\n(Regie:.*)$",
1787  QRegularExpression::DotMatchesEverythingOption };
1788  match = pro7Crew.match(event.m_description);
1789  if (match.hasMatch())
1790  {
1791  QStringList crew = match.captured(1).split("\n");
1792  for (const auto& line : qAsConst(crew))
1793  {
1794  static const QRegularExpression pro7CrewOne { R"(^(.*?):\s+(.*)$)" };
1795  auto match2 = pro7CrewOne.match(line);
1796  if (match2.hasMatch())
1797  {
1799  if (deCrewTitle.contains(match2.captured(1)))
1800  role = deCrewTitle[match2.captured(1)];
1801  QStringList names = match2.captured(2).simplified().split(R"(\s*,\s*)");
1802  for (const auto & name : qAsConst(names))
1803  {
1804  /* Possible TODO: if EIT inlcude the priority
1805  * and/or character names for the actors, include
1806  * them in AddPerson call. */
1807  event.AddPerson (role, name);
1808  }
1809  }
1810  }
1811  event.m_description.remove(match.capturedStart(0),
1812  match.capturedLength(0));
1813  }
1814 
1815  /* FIXME unless its Jamie Oliver, then there is neither Crew nor Cast only
1816  * \n\nKoch: Jamie Oliver
1817  */
1818 }
1819 
1824 {
1825  static const QRegularExpression deDisneyChannelSubtitle { R"(,([^,]+?)\s{0,1}(\d{4})$)" };
1826  auto match = deDisneyChannelSubtitle.match(event.m_subtitle);
1827  if (match.hasMatch())
1828  {
1829  if (event.m_airdate == 0)
1830  {
1831  event.m_airdate = match.captured(3).toUInt();
1832  }
1833  event.m_subtitle.remove(match.capturedStart(0),
1834  match.capturedLength(0));
1835  }
1836  static const QRegularExpression tmp { R"(\s[^\s]+?-(Serie))" };
1837  match = tmp.match(event.m_subtitle);
1838  if (match.hasMatch())
1839  {
1840  event.m_categoryType = ProgramInfo::kCategorySeries;
1841  event.m_category=match.captured(0).trimmed();
1842  event.m_subtitle.remove(match.capturedStart(0),
1843  match.capturedLength(0));
1844  }
1845 }
1846 
1851 {
1852  static const QRegularExpression atvSubtitle { R"(,{0,1}\sFolge\s(\d{1,3})$)" };
1853  event.m_subtitle.replace(atvSubtitle, "");
1854 }
1855 
1856 
1861 {
1862  static const QRegularExpression fiRerun { R"(\s?Uusinta[a-zA-Z\s]*\.?)" };
1863  auto match = fiRerun.match(event.m_description);
1864  if (match.hasMatch())
1865  {
1866  event.m_previouslyshown = true;
1867  event.m_description.remove(match.capturedStart(), match.capturedLength());
1868  }
1869 
1870  static const QRegularExpression fiRerun2 { R"(\([Uu]\))" };
1871  match = fiRerun2.match(event.m_description);
1872  if (match.hasMatch())
1873  {
1874  event.m_previouslyshown = true;
1875  event.m_description.remove(match.capturedStart(), match.capturedLength());
1876  }
1877 
1878  // Check for (Stereo) in the decription and set the <audio> tags
1879  match = kStereo.match(event.m_description);
1880  if (match.hasMatch())
1881  {
1882  event.m_audioProps |= AUD_STEREO;
1883  event.m_description.remove(match.capturedStart(), match.capturedLength());
1884  }
1885 
1886  // Remove age limit in parenthesis at end of title
1887  static const QRegularExpression fiAgeLimit { R"(\((\d{1,2}|[ST])\)$)" };
1888  match = fiAgeLimit.match(event.m_title);
1889  if (match.hasMatch())
1890  {
1891  EventRating prograting;
1892  prograting.m_system="FI"; prograting.m_rating = match.captured(1);
1893  event.m_ratings.push_back(prograting);
1894  event.m_title.remove(match.capturedStart(), match.capturedLength());
1895  }
1896 
1897  // Remove Film or Elokuva at start of title
1898  static const QRegularExpression fiFilm { "^(Film|Elokuva): " };
1899  match = fiFilm.match(event.m_title);
1900  if (match.hasMatch())
1901  {
1902  event.m_category = "Film";
1903  event.m_categoryType = ProgramInfo::kCategoryMovie;
1904  event.m_title.remove(match.capturedStart(), match.capturedLength());
1905  }
1906 }
1907 
1913 {
1914  QString country = "";
1915 
1916  static const QRegularExpression dePremiereLength { R"(\s?[0-9]+\sMin\.)" };
1917  event.m_description = event.m_description.replace(dePremiereLength, "");
1918 
1919  static const QRegularExpression dePremiereAirdate { R"(\s?([^\s^\.]+)\s((?:1|2)[0-9]{3})\.)" };
1920  auto match = dePremiereAirdate.match(event.m_description);
1921  if ( match.hasMatch())
1922  {
1923  country = match.captured(1).trimmed();
1924  bool ok = false;
1925  uint y = match.captured(2).toUInt(&ok);
1926  if (ok)
1927  event.m_airdate = y;
1928  event.m_description.remove(match.capturedStart(0),
1929  match.capturedLength(0));
1930  }
1931 
1932  static const QRegularExpression dePremiereCredits { R"(\sVon\s([^,]+)(?:,|\su\.\sa\.)\smit\s([^\.]*)\.)" };
1933  match = dePremiereCredits.match(event.m_description);
1934  if (match.hasMatch())
1935  {
1936  event.AddPerson(DBPerson::kDirector, match.captured(1));
1937 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1938  const QStringList actors = match.captured(2).split(
1939  ", ", QString::SkipEmptyParts);
1940 #else
1941  const QStringList actors = match.captured(2).split(
1942  ", ", Qt::SkipEmptyParts);
1943 #endif
1944  /* Possible TODO: if EIT inlcude the priority and/or character
1945  * names for the actors, include them in AddPerson call. */
1946  for (const auto & actor : qAsConst(actors))
1947  event.AddPerson(DBPerson::kActor, actor);
1948  event.m_description.remove(match.capturedStart(0),
1949  match.capturedLength(0));
1950  }
1951 
1952  event.m_description = event.m_description.replace("\u000A$", "");
1953  event.m_description = event.m_description.replace("\u000A", " ");
1954 
1955  // move the original titel from the title to subtitle
1956  static const QRegularExpression dePremiereOTitle { R"(\s*\(([^\)]*)\)$)" };
1957  match = dePremiereOTitle.match(event.m_title);
1958  if (match.hasMatch())
1959  {
1960  event.m_subtitle = QString("%1, %2").arg(match.captured(1), country);
1961  event.m_title.remove(match.capturedStart(0),
1962  match.capturedLength(0));
1963  }
1964 
1965  // Find infos about season and episode number
1966  static const QRegularExpression deSkyDescriptionSeasonEpisode { R"(^(\d{1,2}).\sStaffel,\sFolge\s(\d{1,2}):\s)" };
1967  match = deSkyDescriptionSeasonEpisode.match(event.m_description);
1968  if (match.hasMatch())
1969  {
1970  event.m_season = match.captured(1).trimmed().toUInt();
1971  event.m_episode = match.captured(2).trimmed().toUInt();
1972  event.m_description.remove(match.capturedStart(0),
1973  match.capturedLength(0));
1974  }
1975 }
1976 
1977 /*
1978  * Mapping table from English category names to Dutch names and types
1979  */
1980 struct NLMapResult {
1981  QString name;
1983 };
1984 static const QMap<QString, NLMapResult> categoryTrans = {
1985  { "Documentary", { "Documentaire", ProgramInfo::kCategoryNone } },
1986  { "News", { "Nieuws/actualiteiten", ProgramInfo::kCategoryNone } },
1987  { "Kids", { "Jeugd", ProgramInfo::kCategoryNone } },
1988  { "Show/game Show", { "Amusement", ProgramInfo::kCategoryTVShow } },
1989  { "Music/Ballet/Dance", { "Muziek", ProgramInfo::kCategoryNone } },
1990  { "News magazine", { "Informatief", ProgramInfo::kCategoryNone } },
1991  { "Movie", { "Film", ProgramInfo::kCategoryMovie } },
1992  { "Nature/animals/Environment", { "Natuur", ProgramInfo::kCategoryNone } },
1993  { "Movie - Adult", { "Erotiek", ProgramInfo::kCategoryNone } },
1994  { "Movie - Soap/melodrama/folkloric",
1995  { "Serie/soap", ProgramInfo::kCategorySeries } },
1996  { "Arts/Culture", { "Kunst/Cultuur", ProgramInfo::kCategoryNone } },
1997  { "Sports", { "Sport", ProgramInfo::kCategorySports } },
1998  { "Cartoons/Puppets", { "Animatie", ProgramInfo::kCategoryNone } },
1999  { "Movie - Comedy", { "Comedy", ProgramInfo::kCategorySeries } },
2000  { "Movie - Detective/Thriller", { "Misdaad", ProgramInfo::kCategoryNone } },
2001  { "Social/Spiritual Sciences", { "Religieus", ProgramInfo::kCategoryNone } },
2002 };
2003 
2008 {
2009  QString fullinfo = event.m_subtitle + event.m_description;
2010  event.m_subtitle = "";
2011 
2012  // Convert categories to Dutch categories Myth knows.
2013  // nog invoegen: comedy, sport, misdaad
2014 
2015  if (categoryTrans.contains(event.m_category))
2016  {
2017  auto [name, type] = categoryTrans[event.m_category];
2018  event.m_category = name;
2019  event.m_categoryType = type;
2020  }
2021 
2022  // Film - categories are usually not Films
2023  if (event.m_category.startsWith("Film -"))
2024  event.m_categoryType = ProgramInfo::kCategorySeries;
2025 
2026  // Get stereo info
2027  auto match = kStereo.match(fullinfo);
2028  if (match.hasMatch())
2029  {
2030  event.m_audioProps |= AUD_STEREO;
2031  fullinfo.remove(match.capturedStart(), match.capturedLength());
2032  }
2033 
2034  //Get widescreen info
2035  static const QRegularExpression nlWide { "breedbeeld" };
2036  match = nlWide.match(fullinfo);
2037  if (match.hasMatch())
2038  {
2039  event.m_videoProps |= VID_WIDESCREEN;
2040  fullinfo = fullinfo.replace("breedbeeld", ".");
2041  }
2042 
2043  // Get repeat info
2044  static const QRegularExpression nlRepeat { "herh." };
2045  match = nlRepeat.match(fullinfo);
2046  if (match.hasMatch())
2047  fullinfo = fullinfo.replace("herh.", ".");
2048 
2049  // Get teletext subtitle info
2050  static const QRegularExpression nlTxt { "txt" };
2051  match = nlTxt.match(fullinfo);
2052  if (match.hasMatch())
2053  {
2054  event.m_subtitleType |= SUB_NORMAL;
2055  fullinfo = fullinfo.replace("txt", ".");
2056  }
2057 
2058  // Get HDTV information
2059  static const QRegularExpression nlHD { R"(\sHD$)" };
2060  match = nlHD.match(event.m_title);
2061  if (match.hasMatch())
2062  {
2063  event.m_videoProps |= VID_HDTV;
2064  event.m_title.remove(match.capturedStart(), match.capturedLength());
2065  }
2066 
2067  // Try to make subtitle from Afl.:
2068  static const QRegularExpression nlSub { R"(\sAfl\.:\s([^\.]+)\.)" };
2069  match = nlSub.match(fullinfo);
2070  if (match.hasMatch())
2071  {
2072  QString tmpSubString = match.captured(0);
2073  tmpSubString = tmpSubString.right(match.capturedLength() - 7);
2074  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2075  fullinfo.remove(match.capturedStart(), match.capturedLength());
2076  }
2077 
2078  // Try to make subtitle from " "
2079  static const QRegularExpression nlSub2 { R"(\s\"([^\"]+)\")" };
2080  match = nlSub2.match(fullinfo);
2081  if (match.hasMatch())
2082  {
2083  QString tmpSubString = match.captured(0);
2084  tmpSubString = tmpSubString.right(match.capturedLength() - 2);
2085  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2086  fullinfo.remove(match.capturedStart(), match.capturedLength());
2087  }
2088 
2089 
2090  // This is trying to catch the case where the subtitle is in the main title
2091  // but avoid cases where it isn't a subtitle e.g cd:uk
2092  int position = 0;
2093  if (((position = event.m_title.indexOf(":")) != -1) &&
2094  (event.m_title[position + 1].toUpper() == event.m_title[position + 1]) &&
2095  (event.m_subtitle.isEmpty()))
2096  {
2097  event.m_subtitle = event.m_title.mid(position + 1);
2098  event.m_title = event.m_title.left(position);
2099  }
2100 
2101 
2102  // Get the actors
2103  static const QRegularExpression nlActors { R"(\sMet:\s.+e\.a\.)" };
2104  static const QRegularExpression nlPersSeparator { R"((, |\sen\s))" };
2105  match = nlActors.match(fullinfo);
2106  if (match.hasMatch())
2107  {
2108  QString tmpActorsString = match.captured(0);
2109  tmpActorsString = tmpActorsString.right(tmpActorsString.length() - 6);
2110  tmpActorsString = tmpActorsString.left(tmpActorsString.length() - 5);
2111 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2112  const QStringList actors =
2113  tmpActorsString.split(nlPersSeparator, QString::SkipEmptyParts);
2114 #else
2115  const QStringList actors =
2116  tmpActorsString.split(nlPersSeparator, Qt::SkipEmptyParts);
2117 #endif
2118  /* Possible TODO: if EIT inlcude the priority and/or character
2119  * names for the actors, include them in AddPerson call. */
2120  for (const auto & actor : qAsConst(actors))
2121  event.AddPerson(DBPerson::kActor, actor);
2122  fullinfo.remove(match.capturedStart(), match.capturedLength());
2123  }
2124 
2125  // Try to find presenter
2126  static const QRegularExpression nlPres { R"(\sPresentatie:\s([^\.]+)\.)" };
2127  match = nlPres.match(fullinfo);
2128  if (match.hasMatch())
2129  {
2130  QString tmpPresString = match.captured(0);
2131  tmpPresString = tmpPresString.right(tmpPresString.length() - 14);
2132  tmpPresString = tmpPresString.left(tmpPresString.length() -1);
2133 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2134  const QStringList presenters =
2135  tmpPresString.split(nlPersSeparator, QString::SkipEmptyParts);
2136 #else
2137  const QStringList presenters =
2138  tmpPresString.split(nlPersSeparator, Qt::SkipEmptyParts);
2139 #endif
2140  for (const auto & presenter : qAsConst(presenters))
2141  event.AddPerson(DBPerson::kPresenter, presenter);
2142  fullinfo.remove(match.capturedStart(), match.capturedLength());
2143  }
2144 
2145  // Try to find year
2146  static const QRegularExpression nlYear1 { R"(\suit\s([1-2][0-9]{3}))" };
2147  static const QRegularExpression nlYear2 { R"((\s\([A-Z]{0,3}/?)([1-2][0-9]{3})\))",
2148  QRegularExpression::CaseInsensitiveOption };
2149  match = nlYear1.match(fullinfo);
2150  if (match.hasMatch())
2151  {
2152  bool ok = false;
2153  uint y = match.capturedView(1).toUInt(&ok);
2154  if (ok)
2155  event.m_originalairdate = QDate(y, 1, 1);
2156  }
2157 
2158  match = nlYear2.match(fullinfo);
2159  if (match.hasMatch())
2160  {
2161  bool ok = false;
2162  uint y = match.capturedView(2).toUInt(&ok);
2163  if (ok)
2164  event.m_originalairdate = QDate(y, 1, 1);
2165  }
2166 
2167  // Try to find director
2168  static const QRegularExpression nlDirector { R"(\svan\s(([A-Z][a-z]+\s)|([A-Z]\.\s)))" };
2169  match = nlDirector.match(fullinfo);
2170  if (match.hasMatch())
2171  event.AddPerson(DBPerson::kDirector, match.captured(1));
2172 
2173  // Strip leftovers
2174  static const QRegularExpression nlRub { R"(\s?\(\W+\)\s?)" };
2175  fullinfo.remove(nlRub);
2176 
2177  // Strip category info from description
2178  static const QRegularExpression nlCat { "^(Amusement|Muziek|Informatief|Nieuws/actualiteiten|Jeugd|Animatie|Sport|Serie/soap|Kunst/Cultuur|Documentaire|Film|Natuur|Erotiek|Comedy|Misdaad|Religieus)\\.\\s" };
2179  fullinfo.remove(nlCat);
2180 
2181  // Remove omroep from title
2182  static const QRegularExpression nlOmroep { R"(\s\(([A-Z]+/?)+\)$)" };
2183  event.m_title.remove(nlOmroep);
2184 
2185  // Put information back in description
2186 
2187  event.m_description = fullinfo;
2188 }
2189 
2191 {
2192  // remove category movie from short events
2194  event.m_starttime.secsTo(event.m_endtime) < kMinMovieDuration)
2195  {
2196  /* default taken from ContentDescriptor::GetMythCategory */
2197  event.m_categoryType = ProgramInfo::kCategoryTVShow;
2198  }
2199 }
2200 
2205 {
2206  // Check for "title (R)" in the title
2207  static const QRegularExpression noRerun { "\\(R\\)" };
2208  auto match = noRerun.match(event.m_title);
2209  if (match.hasMatch())
2210  {
2211  event.m_previouslyshown = true;
2212  event.m_title.remove(match.capturedStart(), match.capturedLength());
2213  }
2214  // Check for "subtitle (HD)" in the subtitle
2215  static const QRegularExpression noHD { R"([\(\[]HD[\)\]])" };
2216  match = noHD.match(event.m_subtitle);
2217  if (match.hasMatch())
2218  {
2219  event.m_videoProps |= VID_HDTV;
2220  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
2221  }
2222  // Check for "description (HD)" in the description
2223  match = noHD.match(event.m_description);
2224  if (match.hasMatch())
2225  {
2226  event.m_videoProps |= VID_HDTV;
2227  event.m_description.remove(match.capturedStart(), match.capturedLength());
2228  }
2229 }
2230 
2235 {
2236  // Check for "title (R)" in the title
2237  static const QRegularExpression noRerun { "\\(R\\)" };
2238  auto match = noRerun.match(event.m_title);
2239  if (match.hasMatch())
2240  {
2241  event.m_previouslyshown = true;
2242  event.m_title.remove(match.capturedStart(), match.capturedLength());
2243  }
2244  // Check for "(R)" in the description
2245  match = noRerun.match(event.m_description);
2246  if (match.hasMatch())
2247  {
2248  event.m_previouslyshown = true;
2249  }
2250 
2251  // Move colon separated category from program-titles into description
2252  // Have seen "NRK2s historiekveld: Film: bla-bla"
2253  static const QRegularExpression noNRKCategories
2254  { "^(Superstrek[ea]r|Supersomm[ea]r|Superjul|Barne-tv|Fantorangen|Kuraffen|Supermorg[eo]n|Julemorg[eo]n|Sommermorg[eo]n|"
2255  "Kuraffen-TV|Sport i dag|NRKs sportsl.rdag|NRKs sportss.ndag|Dagens dokumentar|"
2256  "NRK2s historiekveld|Detektimen|Nattkino|Filmklassiker|Film|Kortfilm|P.skemorg[eo]n|"
2257  "Radioteatret|Opera|P2-Akademiet|Nyhetsmorg[eo]n i P2 og Alltid Nyheter:): (.+)" };
2258  match = noNRKCategories.match(event.m_title);
2259  if (match.hasMatch() && (match.capturedLength(2) > 1))
2260  {
2261  event.m_title = match.captured(2);
2262  event.m_description = "(" + match.captured(1) + ") " + event.m_description;
2263  }
2264 
2265  // Remove season premiere markings
2266  static const QRegularExpression noPremiere { "\\s+-\\s+(Sesongpremiere|Premiere|premiere)!?$" };
2267  match = noPremiere.match(event.m_title);
2268  if (match.hasMatch() && (match.capturedStart() >= 3))
2269  event.m_title.remove(match.capturedStart(), match.capturedLength());
2270 
2271  // Try to find colon-delimited subtitle in title, only tested for NRK channels
2272  if (!event.m_title.startsWith("CSI:") &&
2273  !event.m_title.startsWith("CD:") &&
2274  !event.m_title.startsWith("Distriktsnyheter: fra"))
2275  {
2276  static const QRegularExpression noColonSubtitle { "^([^:]+): (.+)" };
2277  match = noColonSubtitle.match(event.m_title);
2278  if (match.hasMatch())
2279  {
2280  if (event.m_subtitle.length() <= 0)
2281  {
2282  event.m_title = match.captured(1);
2283  event.m_subtitle = match.captured(2);
2284  }
2285  else if (event.m_subtitle == match.captured(2))
2286  {
2287  event.m_title = match.captured(1);
2288  }
2289  }
2290  }
2291 }
2292 
2297 {
2298  // Source: YouSee Rules of Operation v1.16
2299  // url: http://yousee.dk/~/media/pdf/CPE/Rules_Operation.ashx
2300  int episode = -1;
2301  int season = -1;
2302 
2303  // Title search
2304  // episode and part/part total
2305  static const QRegularExpression dkEpisode { R"(\(([0-9]+)\))" };
2306  auto match = dkEpisode.match(event.m_title);
2307  if (match.hasMatch())
2308  {
2309  episode = match.capturedView(1).toInt();
2310  event.m_partnumber = match.capturedView(1).toInt();
2311  event.m_title.remove(match.capturedStart(), match.capturedLength());
2312  }
2313 
2314  static const QRegularExpression dkPart { R"(\(([0-9]+):([0-9]+)\))" };
2315  match = dkPart.match(event.m_title);
2316  if (match.hasMatch())
2317  {
2318  episode = match.capturedView(1).toInt();
2319  event.m_partnumber = match.capturedView(1).toInt();
2320  event.m_parttotal = match.capturedView(2).toInt();
2321  event.m_title.remove(match.capturedStart(), match.capturedLength());
2322  }
2323 
2324  // subtitle delimiters
2325  static const QRegularExpression dkSubtitle1 { "^([^:]+): (.+)" };
2326  match = dkSubtitle1.match(event.m_title);
2327  if (match.hasMatch())
2328  {
2329  event.m_title = match.captured(1);
2330  event.m_subtitle = match.captured(2);
2331  }
2332  else
2333  {
2334  static const QRegularExpression dkSubtitle2 { "^([^:]+) - (.+)" };
2335  match = dkSubtitle2.match(event.m_title);
2336  if (match.hasMatch())
2337  {
2338  event.m_title = match.captured(1);
2339  event.m_subtitle = match.captured(2);
2340  }
2341  }
2342 
2343  // Description search
2344  // Season (Sæson [:digit:]+.) => episode = season episode number
2345  // or year (- år [:digit:]+(\\)|:) ) => episode = total episode number
2346  static const QRegularExpression dkSeason1 { "Sæson ([0-9]+)\\." };
2347  match = dkSeason1.match(event.m_description);
2348  if (match.hasMatch())
2349  {
2350  season = match.capturedView(1).toInt();
2351  }
2352  else
2353  {
2354  static const QRegularExpression dkSeason2 { "- år ([0-9]+) :" };
2355  match = dkSeason2.match(event.m_description);
2356  if (match.hasMatch())
2357  {
2358  season = match.capturedView(1).toInt();
2359  }
2360  }
2361 
2362  if (episode > 0)
2363  event.m_episode = episode;
2364 
2365  if (season > 0)
2366  event.m_season = season;
2367 
2368  //Feature:
2369  static const QRegularExpression dkFeatures { "Features:(.+)" };
2370  match = dkFeatures.match(event.m_description);
2371  if (match.hasMatch())
2372  {
2373  QString features = match.captured(1);
2374  event.m_description.remove(match.capturedStart(),
2375  match.capturedLength());
2376  // 16:9
2377  static const QRegularExpression dkWidescreen { " 16:9" };
2378  if (features.indexOf(dkWidescreen) != -1)
2379  event.m_videoProps |= VID_WIDESCREEN;
2380  // HDTV
2381  static const QRegularExpression dkHD { " HD" };
2382  if (features.indexOf(dkHD) != -1)
2383  event.m_videoProps |= VID_HDTV;
2384  // Dolby Digital surround
2385  static const QRegularExpression dkDolby { " 5:1" };
2386  if (features.indexOf(dkDolby) != -1)
2387  event.m_audioProps |= AUD_DOLBY;
2388  // surround
2389  static const QRegularExpression dkSurround { R"( \(\(S\)\))" };
2390  if (features.indexOf(dkSurround) != -1)
2391  event.m_audioProps |= AUD_SURROUND;
2392  // stereo
2393  static const QRegularExpression dkStereo { " S" };
2394  if (features.indexOf(dkStereo) != -1)
2395  event.m_audioProps |= AUD_STEREO;
2396  // (G)
2397  static const QRegularExpression dkReplay { " \\(G\\)" };
2398  if (features.indexOf(dkReplay) != -1)
2399  event.m_previouslyshown = true;
2400  // TTV
2401  static const QRegularExpression dkTxt { " TTV" };
2402  if (features.indexOf(dkTxt) != -1)
2403  event.m_subtitleType |= SUB_NORMAL;
2404  }
2405 
2406  // Series and program id
2407  // programid is currently not transmitted
2408  // YouSee doesn't use a default authority but uses the first byte after
2409  // the / to indicate if the seriesid is global unique or unique on the
2410  // service id
2411  if (event.m_seriesId.length() >= 1 && event.m_seriesId[0] == '/')
2412  {
2413  QString newid;
2414  if (event.m_seriesId[1] == '1')
2415  {
2416  newid = QString("%1%2").arg(event.m_chanid).
2417  arg(event.m_seriesId.mid(2,8));
2418  }
2419  else
2420  {
2421  newid = event.m_seriesId.mid(2,8);
2422  }
2423  event.m_seriesId = newid;
2424  }
2425 
2426  if (event.m_programId.length() >= 1 && event.m_programId[0] == '/')
2427  event.m_programId[0]='_';
2428 
2429  // Add season and episode number to subtitle
2430  if (episode > 0)
2431  {
2432  event.m_subtitle = QString("%1 (%2").arg(event.m_subtitle).arg(episode);
2433  if (event.m_parttotal >0)
2434  event.m_subtitle = QString("%1:%2").arg(event.m_subtitle).
2435  arg(event.m_parttotal);
2436  if (season > 0)
2437  {
2438  event.m_season = season;
2439  event.m_episode = episode;
2440  event.m_syndicatedepisodenumber =
2441  QString("S%1E%2").arg(season).arg(episode);
2442  event.m_subtitle = QString("%1 Sæson %2").arg(event.m_subtitle).
2443  arg(season);
2444  }
2445  event.m_subtitle = QString("%1)").arg(event.m_subtitle);
2446  }
2447 
2448  // Find actors and director in description
2449  static const QRegularExpression dkDirector { "(?:Instr.: |Instrukt.r: )(.+)$" };
2450  static const QRegularExpression dkPersonsSeparator { "(, )|(og )" };
2451  QStringList directors {};
2452  match = dkDirector.match(event.m_description);
2453  if (match.hasMatch())
2454  {
2455  QString tmpDirectorsString = match.captured(1);
2456 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2457  directors = tmpDirectorsString.split(dkPersonsSeparator, QString::SkipEmptyParts);
2458 #else
2459  directors = tmpDirectorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2460 #endif
2461  for (const auto & director : qAsConst(directors))
2462  {
2463  tmpDirectorsString = director.split(":").last().trimmed().
2464  remove(QRegularExpression("\\.$"));
2465  if (tmpDirectorsString != "")
2466  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2467  }
2468  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2469  }
2470 
2471  static const QRegularExpression dkActors { "(?:Medvirkende: |Medv\\.: )(.+)" };
2472  match = dkActors.match(event.m_description);
2473  if (match.hasMatch())
2474  {
2475  QString tmpActorsString = match.captured(1);
2476 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2477  const QStringList actors =
2478  tmpActorsString.split(dkPersonsSeparator, QString::SkipEmptyParts);
2479 #else
2480  const QStringList actors =
2481  tmpActorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2482 #endif
2483  for (const auto & actor : qAsConst(actors))
2484  {
2485  tmpActorsString = actor.split(":").last().trimmed().
2486  remove(QRegularExpression("\\.$"));
2487  if (!tmpActorsString.isEmpty() && !directors.contains(tmpActorsString))
2488  event.AddPerson(DBPerson::kActor, tmpActorsString);
2489  }
2490  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2491  }
2492 
2493  //find year
2494  static const QRegularExpression dkYear { " fra ([0-9]{4})[ \\.]" };
2495  match = dkYear.match(event.m_description);
2496  if (match.hasMatch())
2497  {
2498  bool ok = false;
2499  uint y = match.capturedView(1).toUInt(&ok);
2500  if (ok)
2501  event.m_originalairdate = QDate(y, 1, 1);
2502  }
2503 }
2504 
2509 {
2510  LOG(VB_EIT, LOG_INFO, QString("Applying html strip to %1").arg(event.m_title));
2511  static const QRegularExpression html { "</?EM>", QRegularExpression::CaseInsensitiveOption };
2512  event.m_title.remove(html);
2513 }
2514 
2515 // Moves the subtitle field into the description since it's just used
2516 // as more description field. All the sort-out will happen in the description
2517 // field. Also, sometimes the description is just a repeat of the title. If so,
2518 // we remove it.
2520 {
2521  if (event.m_title == event.m_description)
2522  {
2523  event.m_description = QString("");
2524  }
2525  if (!event.m_subtitle.isEmpty())
2526  {
2527  if (event.m_subtitle.trimmed().right(1) != ".'" )
2528  event.m_subtitle = event.m_subtitle.trimmed() + ".";
2529  event.m_description = event.m_subtitle.trimmed() + QString(" ") + event.m_description;
2530  event.m_subtitle = QString("");
2531  }
2532 }
2533 
2535 {
2536  // Program ratings
2537  static const QRegularExpression grRating { R"(\[(K|Κ|8|12|16|18)\]\s*)",
2538  QRegularExpression::CaseInsensitiveOption };
2539  auto match = grRating.match(event.m_title);
2540  if (match.hasMatch())
2541  {
2542  EventRating prograting;
2543  prograting.m_system="GR"; prograting.m_rating = match.captured(1);
2544  event.m_ratings.push_back(prograting);
2545  event.m_title.remove(match.capturedStart(), match.capturedLength());
2546  event.m_title = event.m_title.trimmed();
2547  }
2548 
2549  //Live show
2550  int position = event.m_title.indexOf("(Ζ)");
2551  if (position != -1)
2552  {
2553  event.m_title = event.m_title.replace("(Ζ)", "");
2554  event.m_description.prepend("Ζωντανή Μετάδοση. ");
2555  }
2556 
2557  // Greek not previously Shown
2558  static const QRegularExpression grNotPreviouslyShown {
2559  R"(\W?(?:-\s*)*(?:\b[Α1]['΄η]?\s*(?:τηλεοπτικ[ηή]\s*)?(?:μετ[αά]δοση|προβολ[ηή]))\W?)",
2560  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2561  match = grNotPreviouslyShown.match(event.m_title);
2562  if (match.hasMatch())
2563  {
2564  event.m_previouslyshown = false;
2565  event.m_title.remove(match.capturedStart(), match.capturedLength());
2566  }
2567 
2568  // Greek Replay (Ε)
2569  // it might look redundant compared to previous check but at least it helps
2570  // remove the (Ε) From the title.
2571  static const QRegularExpression grReplay { R"(\([ΕE]\))" };
2572  match = grReplay.match(event.m_title);
2573  if (match.hasMatch())
2574  {
2575  event.m_previouslyshown = true;
2576  event.m_title.remove(match.capturedStart(), match.capturedLength());
2577  }
2578 
2579  // Check for (HD) in the decription
2580  position = event.m_description.indexOf("(HD)");
2581  if (position != -1)
2582  {
2583  event.m_description = event.m_description.replace("(HD)", "");
2584  event.m_videoProps |= VID_HDTV;
2585  }
2586 
2587  // Check for (Full HD) in the decription
2588  position = event.m_description.indexOf("(Full HD)");
2589  if (position != -1)
2590  {
2591  event.m_description = event.m_description.replace("(Full HD)", "");
2592  event.m_videoProps |= VID_HDTV;
2593  }
2594 
2595  static const QRegularExpression grFixnofullstopActors { R"(\w\s(Παίζουν:|Πρωταγων))" };
2596  match = grFixnofullstopActors.match(event.m_description);
2597  if (match.hasMatch())
2598  event.m_description.insert(match.capturedStart() + 1, ".");
2599 
2600  // If they forgot the "." at the end of the sentence before the actors/directors begin, let's insert it.
2601  static const QRegularExpression grFixnofullstopDirectors { R"(\w\s(Σκηνοθ[εέ]))" };
2602  match = grFixnofullstopDirectors.match(event.m_description);
2603  if (match.hasMatch())
2604  event.m_description.insert(match.capturedStart() + 1, ".");
2605 
2606  // Find actors and director in description
2607  // I am looking for actors first and then for directors/presenters because
2608  // sometimes punctuation is missing and the "Παίζουν:" label is mistaken
2609  // for a director's/presenter's surname (directors/presenters are shown
2610  // before actors in the description field.). So removing the text after
2611  // adding the actors AND THEN looking for dir/pres helps to clear things up.
2612  static const QRegularExpression grActors { R"((?:[Ππ]α[ιί]ζουν:|[ΜMμ]ε τους:|Πρωταγωνιστο[υύ]ν:|Πρωταγωνιστε[ιί]:?)(?:\s+στο ρόλο(?: του| της)?\s(?:\w+\s[οη]\s))?([-\w\s']+(?:,[-\w\s']+)*)(?:κ\.[αά])?\W?)" };
2613  // cap(1) actors, just names
2614  static const QRegularExpression grPeopleSeparator { R"(([,-]\s+))" };
2615  match = grActors.match(event.m_description);
2616  if (match.hasMatch())
2617  {
2618  QString tmpActorsString = match.captured(1);
2619 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2620  const QStringList actors =
2621  tmpActorsString.split(grPeopleSeparator, QString::SkipEmptyParts);
2622 #else
2623  const QStringList actors =
2624  tmpActorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2625 #endif
2626  for (const auto & actor : qAsConst(actors))
2627  {
2628  tmpActorsString = actor.split(":").last().trimmed().
2629  remove(QRegularExpression("\\.$"));
2630  if (tmpActorsString != "")
2631  event.AddPerson(DBPerson::kActor, tmpActorsString);
2632  }
2633  event.m_description.remove(match.capturedStart(), match.capturedLength());
2634  }
2635 
2636  // Director
2637  static const QRegularExpression grDirector { R"((?:Σκηνοθεσία: |Σκηνοθέτης: |Σκηνοθέτης - Επιμέλεια: )(\w+\s\w+\s?)(?:\W?))" };
2638  match = grDirector.match(event.m_description);
2639  if (match.hasMatch())
2640  {
2641  QString tmpDirectorsString = match.captured(1);
2642 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2643  const QStringList directors =
2644  tmpDirectorsString.split(grPeopleSeparator, QString::SkipEmptyParts);
2645 #else
2646  const QStringList directors =
2647  tmpDirectorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2648 #endif
2649  for (const auto & director : qAsConst(directors))
2650  {
2651  tmpDirectorsString = director.split(":").last().trimmed().
2652  remove(QRegularExpression("\\.$"));
2653  if (tmpDirectorsString != "")
2654  {
2655  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2656  }
2657  }
2658  event.m_description.remove(match.capturedStart(), match.capturedLength());
2659  }
2660 
2661  //Try to find presenter
2662  static const QRegularExpression grPres { R"((?:Παρουσ[ιί]αση:(?:\b)*|Παρουσι[αά]ζ(?:ουν|ει)(?::|\sο|\sη)|Παρουσι[αά]στ(?:[ηή]ς|ρια|ριες|[εέ]ς)(?::|\sο|\sη)|Με τ(?:ον |ην )(?:[\s|:|ο|η])*(?:\b)*)([-\w\s]+(?:,[-\w\s]+)*)\W?)",
2663  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2664  match = grPres.match(event.m_description);
2665  if (match.hasMatch())
2666  {
2667  QString tmpPresentersString = match.captured(1);
2668 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2669  const QStringList presenters =
2670  tmpPresentersString.split(grPeopleSeparator, QString::SkipEmptyParts);
2671 #else
2672  const QStringList presenters =
2673  tmpPresentersString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2674 #endif
2675  for (const auto & presenter : qAsConst(presenters))
2676  {
2677  tmpPresentersString = presenter.split(":").last().trimmed().
2678  remove(QRegularExpression("\\.$"));
2679  if (tmpPresentersString != "")
2680  {
2681  event.AddPerson(DBPerson::kPresenter, tmpPresentersString);
2682  }
2683  }
2684  event.m_description.remove(match.capturedStart(), match.capturedLength());
2685  }
2686 
2687  //find year e.g Παραγωγής 1966 ή ΝΤΟΚΙΜΑΝΤΕΡ - 1998 Κατάλληλο για όλους
2688  // Used in Private channels (not 'secret', just not owned by Government!)
2689  static const QRegularExpression grYear { R"(\W?(?:\s?παραγωγ[ηή]ς|\s?-|,)\s*([1-2][0-9]{3})(?:-\d{1,4})?)",
2690  QRegularExpression::CaseInsensitiveOption };
2691  match = grYear.match(event.m_description);
2692  if (match.hasMatch())
2693  {
2694  bool ok = false;
2695  uint y = match.capturedView(1).toUInt(&ok);
2696  if (ok)
2697  {
2698  event.m_originalairdate = QDate(y, 1, 1);
2699  event.m_description.remove(match.capturedStart(), match.capturedLength());
2700  }
2701  }
2702  // Remove " ."
2703  event.m_description = event.m_description.replace(" .",".").trimmed();
2704 
2705  //find country of origin and remove it from description.
2706  static const QRegularExpression grCountry {
2707  R"((?:\W|\b)(?:(ελλην|τουρκ|αμερικ[αά]ν|γαλλ|αγγλ|βρεττ?αν|γερμαν|ρωσσ?|ιταλ|ελβετ|σουηδ|ισπαν|πορτογαλ|μεξικ[αά]ν|κιν[εέ]ζικ|ιαπων|καναδ|βραζιλι[αά]ν)(ικ[ηή][ςσ])))",
2708  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2709  match = grCountry.match(event.m_description);
2710  if (match.hasMatch())
2711  event.m_description.remove(match.capturedStart(), match.capturedLength());
2712 
2713  // Work out the season and episode numbers (if any)
2714  // Matching pattern "Επεισ[όο]διο:?|Επ 3 από 14|3/14" etc
2715  bool series = false;
2716  static const QRegularExpression grSeason {
2717  R"((?:\W-?)*(?:\(-\s*)?\b(([Α-Ω|A|B|E|Z|H|I|K|M|N]{1,2})(?:'|΄)?|(\d{1,2})(?:ος|ου|oς|os)?)(?:\s*[ΚκKk][υύ]κλο(?:[σς]|υ))\s?)",
2718  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2719  // cap(2) is the season for ΑΒΓΔ
2720  // cap(3) is the season for 1234
2721  match = grSeason.match(event.m_title);
2722  if (match.hasMatch())
2723  {
2724  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2725  {
2726  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2727  //must convert them to numbers.
2728  int tmpinteger = match.capturedView(2).toUInt();
2729  if (tmpinteger < 1)
2730  {
2731  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2732  event.m_season = 6;
2733  else
2734  {
2735  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2736  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2737  if (tmpinteger != -1)
2738  event.m_season = tmpinteger;
2739  else
2740  //sometimes they use english letters instead of greek. Compensating:
2741  {
2742  static const QString LettToNumber2 = "0ABΓΔE6ZHΘIKΛMN";
2743  tmpinteger = LettToNumber2.indexOf(match.capturedView(2));
2744  if (tmpinteger != -1)
2745  event.m_season = tmpinteger;
2746  }
2747  }
2748  }
2749  }
2750  else if (!match.capturedView(3).isEmpty()) //number
2751  {
2752  event.m_season = match.capturedView(3).toUInt();
2753  }
2754  series = true;
2755  event.m_title.remove(match.capturedStart(), match.capturedLength());
2756  }
2757 
2758  // I have to search separately for season in title and description because it wouldn't work when in both.
2759  match = grSeason.match(event.m_description);
2760  if (match.hasMatch())
2761  {
2762  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2763  {
2764  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2765  //must convert them to numbers.
2766  int tmpinteger = match.capturedView(2).toUInt();
2767  if (tmpinteger < 1)
2768  {
2769  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2770  event.m_season = 6;
2771  else
2772  {
2773  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2774  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2775  if (tmpinteger != -1)
2776  event.m_season = tmpinteger;
2777  }
2778  }
2779  }
2780  else if (!match.capturedView(3).isEmpty()) //number
2781  {
2782  event.m_season = match.capturedView(3).toUInt();
2783  }
2784  series = true;
2785  event.m_description.remove(match.capturedStart(), match.capturedLength());
2786  }
2787 
2788 
2789  // If Season is in Roman Numerals (I,II,etc)
2790  static const QRegularExpression grSeasonAsRomanNumerals { ",\\s*([MDCLXVIΙΧ]+)$",
2791  QRegularExpression::CaseInsensitiveOption };
2792  match = grSeasonAsRomanNumerals.match(event.m_title);
2793  auto match2 = grSeasonAsRomanNumerals.match(event.m_description);
2794  if (match.hasMatch())
2795  {
2796  if (!match.capturedView(1).isEmpty()) //number
2797  event.m_season = parseRoman(match.captured(1).toUpper());
2798  series = true;
2799  event.m_title.remove(match.capturedStart(), match.capturedLength());
2800  event.m_title = event.m_title.trimmed();
2801  if (event.m_title.right(1) == ",")
2802  event.m_title.chop(1);
2803  }
2804  else if (match2.hasMatch())
2805  {
2806  if (!match2.capturedView(1).isEmpty()) //number
2807  event.m_season = parseRoman(match2.captured(1).toUpper());
2808  series = true;
2809  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2810  event.m_description = event.m_description.trimmed();
2811  if (event.m_description.right(1) == ",")
2812  event.m_description.chop(1);
2813  }
2814 
2815  static const QRegularExpression grlongEp { R"(\b(?:Επ.|επεισ[οό]διο:?)\s*(\d+)\W?)",
2816  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2817  // cap(1) is the Episode No.
2818  match = grlongEp.match(event.m_title);
2819  match2 = grlongEp.match(event.m_description);
2820  if (match.hasMatch() || match2.hasMatch())
2821  {
2822  if (!match.capturedView(1).isEmpty())
2823  {
2824  event.m_episode = match.capturedView(1).toUInt();
2825  series = true;
2826  event.m_title.remove(match.capturedStart(), match.capturedLength());
2827  }
2828  else if (!match2.capturedView(1).isEmpty())
2829  {
2830  event.m_episode = match2.capturedView(1).toUInt();
2831  series = true;
2832  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2833  }
2834  // Sometimes description omits Season if it's 1. We fix this
2835  if (0 == event.m_season)
2836  event.m_season = 1;
2837  }
2838 
2839  // Sometimes, especially on greek national tv, they include comments in the
2840  // title, e.g "connection to ert1", "ert archives".
2841  // Because they obscure the real title, I'll isolate and remove them.
2842 
2843  static const QRegularExpression grCommentsinTitle { R"(\(([Α-Ωα-ω\s\d-]+)\)(?:\s*$)*)" };
2844  // cap1 = real title
2845  // cap0 = real title in parentheses.
2846  match = grCommentsinTitle.match(event.m_title);
2847  if (match.hasMatch()) // found in title instead
2848  event.m_title.remove(match.capturedStart(), match.capturedLength());
2849 
2850  // Sometimes the real (mostly English) title of a movie or series is
2851  // enclosed in parentheses in the event title, subtitle or description.
2852  // Since the subtitle has been moved to the description field by
2853  // EITFixUp::FixGreekSubtitle, I will search for it only in the description.
2854  // It will replace the translated one to get better chances of metadata
2855  // retrieval. The old title will be moved in the description.
2856  static const QRegularExpression grRealTitleInDescription { R"(^\(([A-Za-z\s\d-]+)\)\s*)" };
2857  // cap1 = real title
2858  // cap0 = real title in parentheses.
2859  match = grRealTitleInDescription.match(event.m_description);
2860  if (match.hasMatch())
2861  {
2862  event.m_description.remove(0, match.capturedLength());
2863  if (match.captured(0) != event.m_title.trimmed())
2864  {
2865  event.m_description = "(" + event.m_title.trimmed() + "). " + event.m_description;
2866  }
2867  event.m_title = match.captured(1);
2868  // Remove the real title from the description
2869  }
2870  else // search in title
2871  {
2872  static const QRegularExpression grRealTitleInTitle { R"(\(([A-Za-z\s\d-]+)\)(?:\s*$)?)" };
2873  // cap1 = real title
2874  // cap0 = real title in parentheses.
2875  match = grRealTitleInTitle.match(event.m_title);
2876  if (match.hasMatch()) // found in title instead
2877  {
2878  event.m_title.remove(match.capturedStart(), match.capturedLength());
2879  QString tmpTranslTitle = event.m_title;
2880  //QString tmpTranslTitle = event.m_title.replace(tmptitle.cap(0),"");
2881  event.m_title = match.captured(1);
2882  event.m_description = "(" + tmpTranslTitle.trimmed() + "). " + event.m_description;
2883  }
2884  }
2885 
2886  // Description field: "^Episode: Lion in the cage. (Description follows)"
2887  static const QRegularExpression grEpisodeAsSubtitle { R"(^Επεισ[οό]διο:\s?([\w\s\-,']+)\.\s?)" };
2888  match = grEpisodeAsSubtitle.match(event.m_description);
2889  if (match.hasMatch())
2890  {
2891  event.m_subtitle = match.captured(1).trimmed();
2892  event.m_description.remove(match.capturedStart(), match.capturedLength());
2893  }
2894  static const QRegularExpression grMovie { R"(\bταιν[ιί]α\b)",
2895  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2896  bool isMovie = (event.m_description.indexOf(grMovie) !=-1) ;
2897  if (isMovie)
2898  event.m_categoryType = ProgramInfo::kCategoryMovie;
2899  else if (series)
2900  event.m_categoryType = ProgramInfo::kCategorySeries;
2901  // clear double commas.
2902  event.m_description.replace(",,", ",");
2903 
2904 // να σβήσω τα κομμάτια που περισσεύουν από την περιγραφή πχ παραγωγής χχχχ
2905 }
2906 
2908 {
2909  struct grCategoryEntry {
2910  QRegularExpression expr;
2911  QString category;
2912  };
2913  static const QRegularExpression grCategFood { "\\W?(?:εκπομπ[ηή]\\W)?(Γαστρονομ[ιί]α[σς]?|μαγειρικ[ηή][σς]?|chef|συνταγ[εέηή]|διατροφ|wine|μ[αά]γειρα[σς]?)\\W?",
2914  QRegularExpression::CaseInsensitiveOption };
2915  static const QRegularExpression grCategDrama { "\\W?(κοινωνικ[ηήό]|δραματικ[ηή]|δρ[αά]μα)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2916  QRegularExpression::CaseInsensitiveOption};
2917  static const QRegularExpression grCategComedy { "\\W?(κωμικ[ηήοό]|χιουμοριστικ[ηήοό]|κωμωδ[ιί]α)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2918  QRegularExpression::CaseInsensitiveOption};
2919  static const QRegularExpression grCategChildren { "\\W?(παιδικ[ηήοό]|κινο[υύ]μ[εέ]ν(ων|α)\\sσχ[εέ]δ[ιί](ων|α))\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2920  QRegularExpression::CaseInsensitiveOption};
2921  static const QRegularExpression grCategMystery { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(μυστηρ[ιί]ου)\\W?",
2922  QRegularExpression::CaseInsensitiveOption};
2923  static const QRegularExpression grCategFantasy { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(φαντασ[ιί]ας)\\W?",
2924  QRegularExpression::CaseInsensitiveOption};
2925  static const QRegularExpression grCategHistory { "\\W?(ιστορικ[ηήοό])\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2926  QRegularExpression::CaseInsensitiveOption};
2927  static const QRegularExpression grCategTeleMag { "\\W?(ενημερωτικ[ηή]|ψυχαγωγικ[ηή]|τηλεπεριοδικ[οό]|μαγκαζ[ιί]νο)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2928  QRegularExpression::CaseInsensitiveOption};
2929  static const QRegularExpression grCategTeleShop { "\\W?(οδηγ[οό][σς]?\\sαγορ[ωώ]ν|τηλεπ[ωώ]λ[ηή]σ|τηλεαγορ|τηλεμ[αά]ρκετ|telemarket)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2930  QRegularExpression::CaseInsensitiveOption};
2931  static const QRegularExpression grCategGameShow { "\\W?(τηλεπαιχν[ιί]δι|quiz)\\W?",
2932  QRegularExpression::CaseInsensitiveOption};
2933  static const QRegularExpression grCategDocumentary { "\\W?(ντοκ[ιυ]μαντ[εέ]ρ)\\W?",
2934  QRegularExpression::CaseInsensitiveOption};
2935  static const QRegularExpression grCategBiography { "\\W?(βιογραφ[ιί]α|βιογραφικ[οό][σς]?)\\W?",
2936  QRegularExpression::CaseInsensitiveOption};
2937  static const QRegularExpression grCategNews { "\\W?(δελτ[ιί]ο\\W?|ειδ[ηή]σε(ι[σς]|ων))\\W?",
2938  QRegularExpression::CaseInsensitiveOption};
2939  static const QRegularExpression grCategSports { "\\W?(champion|αθλητικ[αάοόηή]|πρωτ[αά]θλημα|ποδ[οό]σφαιρο(ου)?|κολ[υύ]μβηση|πατιν[αά]ζ|formula|μπ[αά]σκετ|β[οό]λε[ιϊ])\\W?",
2940  QRegularExpression::CaseInsensitiveOption};
2941  static const QRegularExpression grCategMusic { "\\W?(μουσικ[οόηή]|eurovision|τραγο[υύ]δι)\\W?",
2942  QRegularExpression::CaseInsensitiveOption};
2943  static const QRegularExpression grCategReality { "\\W?(ρι[αά]λιτι|reality)\\W?",
2944  QRegularExpression::CaseInsensitiveOption};
2945  static const QRegularExpression grCategReligion { "\\W?(θρησκε[ιί]α|θρησκευτικ|να[οό][σς]?|θε[ιί]α λειτουργ[ιί]α)\\W?",
2946  QRegularExpression::CaseInsensitiveOption};
2947  static const QRegularExpression grCategCulture { "\\W?(τ[εέ]χν(η|ε[σς])|πολιτισμ)\\W?",
2948  QRegularExpression::CaseInsensitiveOption};
2949  static const QRegularExpression grCategNature { "\\W?(φ[υύ]ση|περιβ[αά]λλο|κατασκευ|επιστ[ηή]μ(?!ονικ[ηή]ς φαντασ[ιί]ας))\\W?",
2950  QRegularExpression::CaseInsensitiveOption};
2951  static const QRegularExpression grCategSciFi { "\\W?(επιστ(.|ημονικ[ηή]ς)\\s?φαντασ[ιί]ας)\\W?",
2952  QRegularExpression::CaseInsensitiveOption};
2953  static const QRegularExpression grCategHealth { "\\W?(υγε[ιί]α|υγειιν|ιατρικ|διατροφ)\\W?",
2954  QRegularExpression::CaseInsensitiveOption};
2955  static const QRegularExpression grCategSpecial { "\\W?(αφι[εέ]ρωμα)\\W?",
2956  QRegularExpression::CaseInsensitiveOption};
2957  static const QList<grCategoryEntry> grCategoryDescData = {
2958  { grCategComedy, "Κωμωδία" },
2959  { grCategTeleMag, "Τηλεπεριοδικό" },
2960  { grCategNature, "Επιστήμη/Φύση" },
2961  { grCategHealth, "Υγεία" },
2962  { grCategReality, "Ριάλιτι" },
2963  { grCategDrama, "Κοινωνικό" },
2964  { grCategChildren, "Παιδικό" },
2965  { grCategSciFi, "Επιστ.Φαντασίας" },
2966  { grCategMystery, "Μυστηρίου" },
2967  { grCategFantasy, "Φαντασίας" },
2968  { grCategHistory, "Ιστορικό" },
2969  { grCategTeleShop, "Τηλεπωλήσεις" },
2970  { grCategFood, "Γαστρονομία" },
2971  { grCategGameShow, "Τηλεπαιχνίδι" },
2972  { grCategBiography, "Βιογραφία" },
2973  { grCategSports, "Αθλητικά" },
2974  { grCategMusic, "Μουσική" },
2975  { grCategDocumentary, "Ντοκιμαντέρ" },
2976  { grCategReligion, "Θρησκεία" },
2977  { grCategCulture, "Τέχνες/Πολιτισμός" },
2978  { grCategSpecial, "Αφιέρωμα" },
2979  };
2980  static const QList<grCategoryEntry> grCategoryTitleData = {
2981  { grCategTeleShop, "Τηλεπωλήσεις" },
2982  { grCategGameShow, "Τηλεπαιχνίδι" },
2983  { grCategMusic, "Μουσική" },
2984  { grCategNews, "Ειδήσεις" },
2985  };
2986 
2987  // Handle special cases
2988  if ((event.m_description.indexOf(grCategFantasy) != -1)
2989  && (event.m_description.indexOf(grCategMystery) != -1))
2990  {
2991  event.m_category = "Φαντασίας/Μυστηρίου";
2992  return;
2993  }
2994 
2995  // Find categories in the description
2996  for (const auto& [expression, category] : grCategoryDescData)
2997  {
2998  if (event.m_description.indexOf(expression) != -1) {
2999  event.m_category = category;
3000  return;
3001  }
3002  }
3003 
3004  // Find categories in the title
3005  for (const auto& [expression, category] : grCategoryTitleData)
3006  {
3007  if (event.m_title.indexOf(expression) != -1) {
3008  event.m_category = category;
3009  return;
3010  }
3011  }
3012 }
3013 
3015 {
3016  // TODO handle scraping the category and category_type from localized text in the short/long description
3017  // TODO remove short description (stored as episode title) which is just the beginning of the long description (actual description)
3018 
3019  // drop the short description if its copy the start of the long description
3020  if (event.m_description.startsWith (event.m_subtitle))
3021  {
3022  event.m_subtitle = "";
3023  }
3024 
3025  // handle cast and crew in items in the DVB Extended Event Descriptor
3026  // remove handled items from the map, so the left overs can be reported
3027  auto i = event.m_items.begin();
3028  while (i != event.m_items.end())
3029  {
3030  /* Possible TODO: if EIT inlcude the priority and/or character
3031  * names for the actors, include them in AddPerson call. */
3032  if ((QString::compare (i.key(), "Role Player") == 0) ||
3033  (QString::compare (i.key(), "Performing Artist") == 0))
3034  {
3035  event.AddPerson (DBPerson::kActor, i.value());
3036  i = event.m_items.erase (i);
3037  }
3038  else if (QString::compare (i.key(), "Director") == 0)
3039  {
3040  event.AddPerson (DBPerson::kDirector, i.value());
3041  i = event.m_items.erase (i);
3042  }
3043  else if (QString::compare (i.key(), "Commentary or Commentator") == 0)
3044  {
3045  event.AddPerson (DBPerson::kCommentator, i.value());
3046  i = event.m_items.erase (i);
3047  }
3048  else if (QString::compare (i.key(), "Presenter") == 0)
3049  {
3050  event.AddPerson (DBPerson::kPresenter, i.value());
3051  i = event.m_items.erase (i);
3052  }
3053  else if (QString::compare (i.key(), "Producer") == 0)
3054  {
3055  event.AddPerson (DBPerson::kProducer, i.value());
3056  i = event.m_items.erase (i);
3057  }
3058  else if (QString::compare (i.key(), "Scriptwriter") == 0)
3059  {
3060  event.AddPerson (DBPerson::kWriter, i.value());
3061  i = event.m_items.erase (i);
3062  }
3063  else
3064  {
3065  ++i;
3066  }
3067  }
3068 
3069  // handle star rating in the description
3070  static const QRegularExpression unitymediaImdbrating { R"(\s*IMDb Rating: (\d\.\d)\s?/10$)" };
3071  auto match = unitymediaImdbrating.match(event.m_description);
3072  if (match.hasMatch())
3073  {
3074  float stars = match.captured(1).toFloat();
3075  event.m_stars = stars / 10.0F;
3076  event.m_description.remove(match.capturedStart(0),
3077  match.capturedLength(0));
3078  }
3079 }
EITFixUp::FixGreekEIT
static void FixGreekEIT(DBEventEIT &event)
Definition: eitfixup.cpp:2534
DBEvent::m_season
uint m_season
Definition: programdata.h:172
EITFixUp::FixDK
static void FixDK(DBEventEIT &event)
Use this to clean YouSee's DVB-C guide in Denmark.
Definition: eitfixup.cpp:2296
EITFixUp::kFixUK
@ kFixUK
Definition: eitfixup.h:35
EITFixUp::kFixBell
@ kFixBell
Definition: eitfixup.h:34
NLMapResult::type
ProgramInfo::CategoryType type
Definition: eitfixup.cpp:1982
EITFixUp::kFixAUDescription
@ kFixAUDescription
Definition: eitfixup.h:52
EITFixUp::FixAUNine
static void FixAUNine(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1337
EventRating::m_system
QString m_system
Definition: programdata.h:78
kUKSpaceColonStart
static const QRegularExpression kUKSpaceColonStart
Definition: eitfixup.cpp:19
EITFixUp::kFixNO
@ kFixNO
Definition: eitfixup.h:47
DBEvent::m_totalepisodes
uint m_totalepisodes
Definition: programdata.h:174
EITFixUp::kFixNRK_DVBT
@ kFixNRK_DVBT
Definition: eitfixup.h:48
EITFixUp::FixBellExpressVu
static void FixBellExpressVu(DBEventEIT &event)
Use this for the Canadian BellExpressVu to standardize DVB-S guide.
Definition: eitfixup.cpp:234
EITFixUp::Fix
static void Fix(DBEventEIT &event)
Definition: eitfixup.cpp:51
EITFixUp::FixFI
static void FixFI(DBEventEIT &event)
Use this to clean DVB-T guide in Finland.
Definition: eitfixup.cpp:1860
EITFixUp::kFixAUStar
@ kFixAUStar
Definition: eitfixup.h:39
EITFixUp::kFixPremiere
@ kFixPremiere
Definition: eitfixup.h:43
EITFixUp::kFixATV
@ kFixATV
Definition: eitfixup.h:58
DBEventEIT::m_fixup
FixupValue m_fixup
Definition: programdata.h:222
EITFixUp::kFixHTML
@ kFixHTML
Definition: eitfixup.h:56
EventRating
Definition: programdata.h:75
EITFixUp::FixATV
static void FixATV(DBEventEIT &event)
Use this to standardise the ATV/ATV2 guide in Germany.
Definition: eitfixup.cpp:1850
DBEvent::m_starttime
QDateTime m_starttime
Definition: programdata.h:152
EITFixUp::kDotToTitle
static const uint kDotToTitle
Definition: eitfixup.h:19
EITFixUp::FixComHem
static void FixComHem(DBEventEIT &event, bool process_subtitle)
Use this to standardize ComHem DVB-C service in Sweden.
Definition: eitfixup.cpp:1046
DBEvent::m_partnumber
uint16_t m_partnumber
Definition: programdata.h:157
LOG
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:23
EITFixUp::kFixSubtitle
@ kFixSubtitle
Definition: eitfixup.h:38
EITFixUp::kMaxToTitle
static const uint kMaxToTitle
Definition: eitfixup.h:17
dish_theme_type_to_string
QString dish_theme_type_to_string(uint theme_type)
Definition: dishdescriptors.cpp:301
EITFixUp::FixGreekCategories
static void FixGreekCategories(DBEventEIT &event)
Definition: eitfixup.cpp:2907
DBPerson::kPresenter
@ kPresenter
Definition: programdata.h:39
EITFixUp::FixAUStar
static void FixAUStar(DBEventEIT &event)
Use this to standardize DVB-S guide in Australia.
Definition: eitfixup.cpp:1295
DBEvent::m_category
QString m_category
Definition: programdata.h:151
NLMapResult::name
QString name
Definition: eitfixup.cpp:1981
kStereo
static const QRegularExpression kStereo
Definition: eitfixup.cpp:18
EITFixUp::parseRoman
static int parseRoman(QString roman)
Definition: eitfixup.cpp:31
DBPerson::kDirector
@ kDirector
Definition: programdata.h:32
EITFixUp::FixNRK_DVBT
static void FixNRK_DVBT(DBEventEIT &event)
Use this to clean DVB-T guide in Norway (NRK)
Definition: eitfixup.cpp:2234
tmp
static guint32 * tmp
Definition: goom_core.cpp:31
r2v
static const QMap< QChar, quint16 > r2v
Definition: eitfixup.cpp:25
EITFixUp::FixCategory
static void FixCategory(DBEventEIT &event)
Definition: eitfixup.cpp:2190
DBEvent::m_seriesId
QString m_seriesId
Definition: programdata.h:165
ProgramInfo::kCategoryMovie
@ kCategoryMovie
Definition: programinfo.h:75
DBEvent::m_parttotal
uint16_t m_parttotal
Definition: programdata.h:158
EITFixUp::kFixCategory
@ kFixCategory
Definition: eitfixup.h:46
DBEvent::m_programId
QString m_programId
Definition: programdata.h:166
DBPerson::kUnknown
@ kUnknown
Definition: programdata.h:30
programinfo.h
DBEventEIT::m_chanid
uint32_t m_chanid
Definition: programdata.h:221
ProgramInfo::kCategoryTVShow
@ kCategoryTVShow
Definition: programinfo.h:76
mythlogging.h
DBEvent::m_categoryType
ProgramInfo::CategoryType m_categoryType
Definition: programdata.h:164
EITFixUp::SetUKSubtitle
static void SetUKSubtitle(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:522
deCrewTitle
static const QMap< QString, DBPerson::Role > deCrewTitle
Definition: eitfixup.cpp:1738
DBEvent::m_title
QString m_title
Definition: programdata.h:148
EITFixUp::FixAUSeven
static void FixAUSeven(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1370
ProgramInfo::kCategorySports
@ kCategorySports
Definition: programinfo.h:76
EITFixUp::kMinMovieDuration
static const int kMinMovieDuration
Definition: eitfixup.h:25
DBPerson::kProducer
@ kProducer
Definition: programdata.h:33
DBEvent::m_subtitle
QString m_subtitle
Definition: programdata.h:149
EITFixUp::kFixNL
@ kFixNL
Definition: eitfixup.h:45
EITFixUp::kFixGreekEIT
@ kFixGreekEIT
Definition: eitfixup.h:69
EITFixUp::kFixDisneyChannel
@ kFixDisneyChannel
Definition: eitfixup.h:59
hardwareprofile.scan.rating
def rating(profile, smoonURL, gate)
Definition: scan.py:39
EITFixUp::kSubtitleMaxLen
static const uint kSubtitleMaxLen
Definition: eitfixup.h:15
EITFixUp::FixNO
static void FixNO(DBEventEIT &event)
Use this to clean DVB-S guide in Norway.
Definition: eitfixup.cpp:2204
EITFixUp::FixNL
static void FixNL(DBEventEIT &event)
Use this to standardize @Home DVB-C guide in the Netherlands.
Definition: eitfixup.cpp:2007
EITFixUp::kFixDK
@ kFixDK
Definition: eitfixup.h:50
EITFixUp::kFixGreekCategories
@ kFixGreekCategories
Definition: eitfixup.h:70
EITFixUp::FixPremiere
static void FixPremiere(DBEventEIT &event)
Use this to standardize DVB-C guide in Germany for the providers Kabel Deutschland and Premiere.
Definition: eitfixup.cpp:1912
categoryTrans
static const QMap< QString, NLMapResult > categoryTrans
Definition: eitfixup.cpp:1984
uint
unsigned int uint
Definition: compat.h:140
DBPerson::kHost
@ kHost
Definition: programdata.h:37
ProgramInfo::CategoryType
CategoryType
Definition: programinfo.h:75
DBEvent::m_episode
uint m_episode
Definition: programdata.h:173
EITFixUp::kFixAUFreeview
@ kFixAUFreeview
Definition: eitfixup.h:51
channelutil.h
EITFixUp::kFixFI
@ kFixFI
Definition: eitfixup.h:42
EITFixUp::kFixComHem
@ kFixComHem
Definition: eitfixup.h:37
EITFixUp::kFixRTL
@ kFixRTL
Definition: eitfixup.h:41
EITFixUp::kFixDish
@ kFixDish
Definition: eitfixup.h:49
EITFixUp::FixAUDescription
static void FixAUDescription(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1311
EITFixUp::kFixGenericDVB
@ kFixGenericDVB
Definition: eitfixup.h:33
eitfixup.h
DBEventEIT
Definition: programdata.h:177
EITFixUp::kFixPBS
@ kFixPBS
Definition: eitfixup.h:36
DBEvent::m_description
QString m_description
Definition: programdata.h:150
EITFixUp::kFixAUSeven
@ kFixAUSeven
Definition: eitfixup.h:54
DBEventEIT::m_items
QMultiMap< QString, QString > m_items
Definition: programdata.h:223
EITFixUp::FixAUFreeview
static void FixAUFreeview(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1412
DBPerson::Role
Role
Definition: programdata.h:28
EITFixUp::AddDVBEITAuthority
static QString AddDVBEITAuthority(uint chanid, const QString &id)
This adds a DVB EIT default authority to series id or program id if one exists in the DB for that cha...
Definition: eitfixup.cpp:207
DBPerson::kCommentator
@ kCommentator
Definition: programdata.h:40
EITFixUp::FixPBS
static void FixPBS(DBEventEIT &event)
Use this to standardize PBS ATSC guide in the USA.
Definition: eitfixup.cpp:1031
EITFixUp::FixDisneyChannel
static void FixDisneyChannel(DBEventEIT &event)
Use this to standardise the Disney Channel guide in Germany.
Definition: eitfixup.cpp:1823
EITFixUp::kMaxDotToColon
static const uint kMaxDotToColon
Definition: eitfixup.h:23
EITFixUp::kFixHDTV
@ kFixHDTV
Definition: eitfixup.h:44
EITFixUp::kFixGreekSubtitle
@ kFixGreekSubtitle
Definition: eitfixup.h:68
DBEvent::m_endtime
QDateTime m_endtime
Definition: programdata.h:153
EITFixUp::kFixMCA
@ kFixMCA
Definition: eitfixup.h:40
NLMapResult
Definition: eitfixup.cpp:1980
EITFixUp::kFixP7S1
@ kFixP7S1
Definition: eitfixup.h:55
EITFixUp::FixUK
static void FixUK(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:660
dishdescriptors.h
EITFixUp::kFixUnitymedia
@ kFixUnitymedia
Definition: eitfixup.h:57
EITFixUp::FixStripHTML
static void FixStripHTML(DBEventEIT &event)
Use this to clean HTML Tags from EIT Data.
Definition: eitfixup.cpp:2508
ProgramInfo::kCategoryNone
@ kCategoryNone
Definition: programinfo.h:75
ProgramInfo::kCategorySeries
@ kCategorySeries
Definition: programinfo.h:75
EITFixUp::FixGreekSubtitle
static void FixGreekSubtitle(DBEventEIT &event)
Definition: eitfixup.cpp:2519
EITFixUp::kFixAUNine
@ kFixAUNine
Definition: eitfixup.h:53
DBEvent::m_airdate
uint16_t m_airdate
movie year / production year
Definition: programdata.h:154
DBPerson::kWriter
@ kWriter
Definition: programdata.h:35
EventRating::m_rating
QString m_rating
Definition: programdata.h:79
EITFixUp::FixRTL
static void FixRTL(DBEventEIT &event)
Use this to standardise the RTL group guide in Germany.
Definition: eitfixup.cpp:1610
ChannelUtil::GetDefaultAuthority
static QString GetDefaultAuthority(uint chanid)
Returns the DVB default authority for the chanid given.
Definition: channelutil.cpp:1176
EITFixUp::FixMCA
static void FixMCA(DBEventEIT &event)
Use this to standardise the MultiChoice Africa DVB-S guide.
Definition: eitfixup.cpp:1470
DBPerson::kActor
@ kActor
Definition: programdata.h:31
EITFixUp::kMaxQuestionExclamation
static const uint kMaxQuestionExclamation
Definition: eitfixup.h:21
EITFixUp::FixUnitymedia
static void FixUnitymedia(DBEventEIT &event)
Definition: eitfixup.cpp:3014
EITFixUp::FixPRO7
static void FixPRO7(DBEventEIT &event)
Use this to standardise the PRO7/Sat1 group guide in Germany.
Definition: eitfixup.cpp:1747