MythTV  master
eitfixup.cpp
Go to the documentation of this file.
1 // C++ headers
2 #include <algorithm>
3 #include <array>
4 
5 // Qt Headers
6 #include <QRegularExpression>
7 
8 // MythTV headers
10 #include "libmythbase/programinfo.h" // for CategoryType, subtitle types and audio and video properties
11 
12 #include "channelutil.h" // for GetDefaultAuthority()
13 #include "eitfixup.h"
14 #include "mpeg/dishdescriptors.h" // for dish_theme_type_to_string
15 
16 /*------------------------------------------------------------------------
17  * Event Fix Up Scripts - Turned on by entry in dtv_privatetype table
18  *------------------------------------------------------------------------*/
19 
20 static const QRegularExpression kStereo { R"(\b\(?[sS]tereo\)?\b)" };
21 static const QRegularExpression kUKSpaceColonStart { R"(^[ |:]*)" };
22 static const QRegularExpression kDotAtEnd { "\\.$" };
23 
24 #if QT_VERSION < QT_VERSION_CHECK(5,15,2)
25 #define capturedView capturedRef
26 #endif
27 
28 static const QMap<QChar,quint16> r2v = {
29  {'I' , 1}, {'V' , 5}, {'X' , 10}, {'L' , 50},
30  {'C' , 100}, {'D' , 500}, {'M' , 1000},
31  {QChar(0x399), 1}, // Greek Ι
32 };
33 
34 int EITFixUp::parseRoman (QString roman)
35 {
36  if (roman.isEmpty())
37  return 0;
38 
39  uint result = 0;
40  for (int i = 0; i < roman.size() - 1; i++)
41  {
42  int v1 = r2v[roman.at(i)];
43  int v2 = r2v[roman.at(i+1)];
44  result += (v1 >= v2) ? v1 : -v1;
45  }
46  return result + r2v[roman.back()];
47 }
48 
49 
51 {
52  if (event.m_fixup)
53  {
54  if (event.m_subtitle == event.m_title)
55  event.m_subtitle = QString("");
56 
57  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
58  {
59  event.m_description = event.m_subtitle;
60  event.m_subtitle = QString("");
61  }
62  }
63 
64  if (kFixHTML & event.m_fixup)
65  FixStripHTML(event);
66 
67  if (kFixHDTV & event.m_fixup)
68  event.m_videoProps |= VID_HDTV;
69 
70  if (kFixBell & event.m_fixup)
71  FixBellExpressVu(event);
72 
73  if (kFixDish & event.m_fixup)
74  FixBellExpressVu(event);
75 
76  if (kFixUK & event.m_fixup)
77  FixUK(event);
78 
79  if (kFixPBS & event.m_fixup)
80  FixPBS(event);
81 
82  if (kFixComHem & event.m_fixup)
83  FixComHem(event, (kFixSubtitle & event.m_fixup) != 0U);
84 
85  if (kFixAUStar & event.m_fixup)
86  FixAUStar(event);
87 
88  if (kFixAUDescription & event.m_fixup)
89  FixAUDescription(event);
90 
91  if (kFixAUFreeview & event.m_fixup)
92  FixAUFreeview(event);
93 
94  if (kFixAUNine & event.m_fixup)
95  FixAUNine(event);
96 
97  if (kFixAUSeven & event.m_fixup)
98  FixAUSeven(event);
99 
100  if (kFixMCA & event.m_fixup)
101  FixMCA(event);
102 
103  if (kFixRTL & event.m_fixup)
104  FixRTL(event);
105 
106  if (kFixP7S1 & event.m_fixup)
107  FixPRO7(event);
108 
109  if (kFixATV & event.m_fixup)
110  FixATV(event);
111 
112  if (kFixDisneyChannel & event.m_fixup)
113  FixDisneyChannel(event);
114 
115  if (kFixFI & event.m_fixup)
116  FixFI(event);
117 
118  if (kFixPremiere & event.m_fixup)
119  FixPremiere(event);
120 
121  if (kFixNL & event.m_fixup)
122  FixNL(event);
123 
124  if (kFixNO & event.m_fixup)
125  FixNO(event);
126 
127  if (kFixNRK_DVBT & event.m_fixup)
128  FixNRK_DVBT(event);
129 
130  if (kFixDK & event.m_fixup)
131  FixDK(event);
132 
133  if (kFixCategory & event.m_fixup)
134  FixCategory(event);
135 
136  if (kFixGreekSubtitle & event.m_fixup)
137  FixGreekSubtitle(event);
138 
139  if (kFixGreekEIT & event.m_fixup)
140  FixGreekEIT(event);
141 
142  if (kFixGreekCategories & event.m_fixup)
143  FixGreekCategories(event);
144 
145  if (kFixUnitymedia & event.m_fixup)
146  FixUnitymedia(event);
147 
148  // Clean up text strings after all fixups have been applied.
149  if (event.m_fixup)
150  {
151  static const QRegularExpression emptyParens { R"(\(\s*\))" };
152  if (!event.m_title.isEmpty())
153  {
154  event.m_title.remove(QChar('\0')).remove(emptyParens);
155  event.m_title = event.m_title.simplified();
156  }
157 
158  if (!event.m_subtitle.isEmpty())
159  {
160  event.m_subtitle.remove(QChar('\0'));
161  event.m_subtitle.remove(emptyParens);
162  event.m_subtitle = event.m_subtitle.simplified();
163  }
164 
165  if (!event.m_description.isEmpty())
166  {
167  event.m_description.remove(QChar('\0'));
168  event.m_description.remove(emptyParens);
169  event.m_description = event.m_description.simplified();
170  }
171  }
172 
173  if (kFixGenericDVB & event.m_fixup)
174  {
175  event.m_programId = AddDVBEITAuthority(event.m_chanid, event.m_programId);
176  event.m_seriesId = AddDVBEITAuthority(event.m_chanid, event.m_seriesId);
177  }
178 
179  // Are any items left unhandled? report them to allow fixups improvements
180  if (!event.m_items.empty())
181  {
182  for (auto i = event.m_items.begin(); i != event.m_items.end(); ++i)
183  {
184  LOG(VB_EIT, LOG_DEBUG, QString("Unhandled item in EIT for"
185  " channel id \"%1\", \"%2\": %3").arg(event.m_chanid)
186  .arg(i.key(), i.value()));
187  }
188  }
189 }
190 
206 QString EITFixUp::AddDVBEITAuthority(uint chanid, const QString &id)
207 {
208  if (id.isEmpty())
209  return id;
210 
211  // CRIDs are not case sensitive, so change all to lower case
212  QString crid = id.toLower();
213 
214  // remove "crid://"
215  if (crid.startsWith("crid://"))
216  crid.remove(0,7);
217 
218  // if id is a CRID with authority, return it
219  if (crid.length() >= 1 && crid[0] != '/')
220  return crid;
221 
222  QString authority = ChannelUtil::GetDefaultAuthority(chanid);
223  if (authority.isEmpty())
224  return ""; // no authority, not a valid CRID, return empty
225 
226  return authority + crid;
227 }
228 
234 {
235  // A 0x0D character is present between the content
236  // and the subtitle if its present
237  int position = event.m_description.indexOf('\r');
238 
239  if (position != -1)
240  {
241  // Subtitle present in the title, so get
242  // it and adjust the description
243  event.m_subtitle = event.m_description.left(position);
244  event.m_description = event.m_description.right(
245  event.m_description.length() - position - 2);
246  }
247 
248  // Take out the content description which is
249  // always next with a period after it
250  position = event.m_description.indexOf(".");
251  // Make sure they didn't leave it out and
252  // you come up with an odd category
253  if (position < 10)
254  {
255  }
256  else
257  {
258  event.m_category = "Unknown";
259  }
260 
261  // If the content descriptor didn't come up with anything, try parsing the category
262  // out of the description.
263  if (event.m_category.isEmpty())
264  {
265  // Take out the content description which is
266  // always next with a period after it
267  position = event.m_description.indexOf(".");
268  if ((position + 1) < event.m_description.length())
269  position = event.m_description.indexOf(". ");
270  // Make sure they didn't leave it out and
271  // you come up with an odd category
272  if ((position > -1) && position < 20)
273  {
274  const QString stmp = event.m_description;
275  event.m_description = stmp.right(stmp.length() - position - 2);
276  event.m_category = stmp.left(position);
277 
278  int position_p = event.m_category.indexOf("(");
279  if (position_p == -1)
280  event.m_description = stmp.right(stmp.length() - position - 2);
281  else
282  event.m_category = "Unknown";
283  }
284  else
285  {
286  event.m_category = "Unknown";
287  }
288 
289  // When a channel is off air the category is "-"
290  // so leave the category as blank
291  if (event.m_category == "-")
292  event.m_category = "OffAir";
293 
294  if (event.m_category.length() > 20)
295  event.m_category = "Unknown";
296  }
297  else if (event.m_categoryType)
298  {
299  QString theme = dish_theme_type_to_string(event.m_categoryType);
300  event.m_description = event.m_description.replace(theme, "");
301  if (event.m_description.startsWith("."))
302  event.m_description = event.m_description.right(event.m_description.length() - 1);
303  if (event.m_description.startsWith(" "))
304  event.m_description = event.m_description.right(event.m_description.length() - 1);
305  }
306 
307  // See if a year is present as (xxxx)
308  static const QRegularExpression bellYear { R"(\([0-9]{4}\))" };
309  position = event.m_description.indexOf(bellYear);
310  if (position != -1 && !event.m_category.isEmpty())
311  {
312  // Parse out the year
313  bool ok = false;
314  uint y = event.m_description.mid(position + 1, 4).toUInt(&ok);
315  if (ok)
316  {
317  event.m_originalairdate = QDate(y, 1, 1);
318  event.m_airdate = y;
319  event.m_previouslyshown = true;
320  }
321 
322  // Get the actors if they exist
323  if (position > 3)
324  {
325  static const QRegularExpression bellActors { R"(\set\s|,)" };
326  QString tmp = event.m_description.left(position-3);
327 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
328  QStringList actors =
329  tmp.split(bellActors, QString::SkipEmptyParts);
330 #else
331  QStringList actors =
332  tmp.split(bellActors, Qt::SkipEmptyParts);
333 #endif
334 
335  /* Possible TODO: if EIT inlcude the priority and/or character
336  * names for the actors, include them in AddPerson call. */
337  for (const auto & actor : qAsConst(actors))
338  event.AddPerson(DBPerson::kActor, actor);
339  }
340  // Remove the year and actors from the description
341  event.m_description = event.m_description.right(
342  event.m_description.length() - position - 7);
343  }
344 
345  // Check for (CC) in the decription and
346  // set the <subtitles type="teletext"> flag
347  position = event.m_description.indexOf("(CC)");
348  if (position != -1)
349  {
350  event.m_subtitleType |= SUB_HARDHEAR;
351  event.m_description = event.m_description.replace("(CC)", "");
352  }
353 
354  // Check for (Stereo) in the decription and set the <audio> tags
355  auto match = kStereo.match(event.m_description);
356  if (match.hasMatch())
357  {
358  event.m_audioProps |= AUD_STEREO;
359  event.m_description.remove(match.capturedStart(0),
360  match.capturedLength(0));
361  }
362 
363  // Check for "title (All Day, HD)" in the title
364  static const QRegularExpression bellPPVTitleAllDayHD { R"(\s*\(All Day\, HD\)\s*$)" };
365  match = bellPPVTitleAllDayHD.match(event.m_title);
366  if (match.hasMatch())
367  {
368  event.m_title.remove(match.capturedStart(), match.capturedLength());
369  event.m_videoProps |= VID_HDTV;
370  }
371 
372  // Check for "title (All Day)" in the title
373  static const QRegularExpression bellPPVTitleAllDay { R"(\s*\(All Day.*\)\s*$)" };
374  match = bellPPVTitleAllDay.match(event.m_title);
375  if (match.hasMatch())
376  event.m_title.remove(match.capturedStart(), match.capturedLength());
377 
378  // Check for "HD - title" in the title
379  static const QRegularExpression bellPPVTitleHD { R"(^HD\s?-\s?)" };
380  match = bellPPVTitleHD.match(event.m_title);
381  if (match.hasMatch())
382  {
383  event.m_title.remove(match.capturedStart(), match.capturedLength());
384  event.m_videoProps |= VID_HDTV;
385  }
386 
387  // Check for (HD) in the decription
388  position = event.m_description.indexOf("(HD)");
389  if (position != -1)
390  {
391  event.m_description = event.m_description.replace("(HD)", "");
392  event.m_videoProps |= VID_HDTV;
393  }
394 
395  // Check for (HD) in the title
396  position = event.m_title.indexOf("(HD)");
397  if (position != -1)
398  {
399  event.m_title = event.m_title.replace("(HD)", "");
400  event.m_videoProps |= VID_HDTV;
401  }
402 
403  // Check for HD at the end of the title
404  static const QRegularExpression dishPPVTitleHD { R"(\sHD\s*$)" };
405  match = dishPPVTitleHD.match(event.m_title);
406  if (match.hasMatch())
407  {
408  event.m_title.remove(match.capturedStart(), match.capturedLength());
409  event.m_videoProps |= VID_HDTV;
410  }
411 
412  // Check for (DD) at the end of the description
413  position = event.m_description.indexOf("(DD)");
414  if (position != -1)
415  {
416  event.m_description = event.m_description.replace("(DD)", "");
417  event.m_audioProps |= AUD_DOLBY;
418  event.m_audioProps |= AUD_STEREO;
419  }
420 
421  // Remove SAP from Dish descriptions
422  position = event.m_description.indexOf("(SAP)");
423  if (position != -1)
424  {
425  event.m_description = event.m_description.replace("(SAP", "");
426  event.m_subtitleType |= SUB_HARDHEAR;
427  }
428 
429  // Remove any trailing colon in title
430  static const QRegularExpression dishPPVTitleColon { R"(\:\s*$)" };
431  match = dishPPVTitleColon.match(event.m_title);
432  if (match.hasMatch())
433  event.m_title.remove(match.capturedStart(), match.capturedLength());
434 
435  // Remove New at the end of the description
436  static const QRegularExpression dishDescriptionNew { R"(\s*New\.\s*)" };
437  match = dishDescriptionNew.match(event.m_description);
438  if (match.hasMatch())
439  {
440  event.m_previouslyshown = false;
441  event.m_description.remove(match.capturedStart(), match.capturedLength());
442  }
443 
444  // Remove Series Finale at the end of the desciption
445  static const QRegularExpression dishDescriptionFinale { R"(\s*(Series|Season)\sFinale\.\s*)" };
446  match = dishDescriptionFinale.match(event.m_description);
447  if (match.hasMatch())
448  {
449  event.m_previouslyshown = false;
450  event.m_description.remove(match.capturedStart(), match.capturedLength());
451  }
452 
453  // Remove Series Finale at the end of the desciption
454  static const QRegularExpression dishDescriptionFinale2 { R"(\s*Finale\.\s*)" };
455  match = dishDescriptionFinale2.match(event.m_description);
456  if (match.hasMatch())
457  {
458  event.m_previouslyshown = false;
459  event.m_description.remove(match.capturedStart(), match.capturedLength());
460  }
461 
462  // Remove Series Premiere at the end of the description
463  static const QRegularExpression dishDescriptionPremiere { R"(\s*(Series|Season)\s(Premier|Premiere)\.\s*)" };
464  match = dishDescriptionPremiere.match(event.m_description);
465  if (match.hasMatch())
466  {
467  event.m_previouslyshown = false;
468  event.m_description.remove(match.capturedStart(), match.capturedLength());
469  }
470 
471  // Remove Series Premiere at the end of the description
472  static const QRegularExpression dishDescriptionPremiere2 { R"(\s*(Premier|Premiere)\.\s*)" };
473  match = dishDescriptionPremiere2.match(event.m_description);
474  if (match.hasMatch())
475  {
476  event.m_previouslyshown = false;
477  event.m_description.remove(match.capturedStart(), match.capturedLength());
478  }
479 
480  // Remove Dish's PPV code at the end of the description
481  static const QRegularExpression ppvcode { R"(\s*\(([A-Z]|[0-9]){5}\)\s*$)",
482  QRegularExpression::CaseInsensitiveOption };
483  match = ppvcode.match(event.m_description);
484  if (match.hasMatch())
485  event.m_description.remove(match.capturedStart(), match.capturedLength());
486 
487  // Remove trailing garbage
488  static const QRegularExpression dishPPVSpacePerenEnd { R"(\s\)\s*$)" };
489  match = dishPPVSpacePerenEnd.match(event.m_description);
490  if (match.hasMatch())
491  event.m_description.remove(match.capturedStart(), match.capturedLength());
492 
493  // Check for subtitle "All Day (... Eastern)" in the subtitle
494  static const QRegularExpression bellPPVSubtitleAllDay { R"(^All Day \(.*\sEastern\)\s*$)" };
495  match = bellPPVSubtitleAllDay.match(event.m_subtitle);
496  if (match.hasMatch())
497  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
498 
499  // Check for description "(... Eastern)" in the description
500  static const QRegularExpression bellPPVDescriptionAllDay { R"(^\(.*\sEastern\))" };
501  match = bellPPVDescriptionAllDay.match(event.m_description);
502  if (match.hasMatch())
503  event.m_description.remove(match.capturedStart(), match.capturedLength());
504 
505  // Check for description "(... ET)" in the description
506  static const QRegularExpression bellPPVDescriptionAllDay2 { R"(^\([0-9].*am-[0-9].*am\sET\))" };
507  match = bellPPVDescriptionAllDay2.match(event.m_description);
508  if (match.hasMatch())
509  event.m_description.remove(match.capturedStart(), match.capturedLength());
510 
511  // Check for description "(nnnnn)" in the description
512  static const QRegularExpression bellPPVDescriptionEventId { R"(\([0-9]{5}\))" };
513  match = bellPPVDescriptionEventId.match(event.m_description);
514  if (match.hasMatch())
515  event.m_description.remove(match.capturedStart(), match.capturedLength());
516 }
517 
522 {
523  QStringList strListColon = event.m_description.split(":");
524  QStringList strListEnd;
525 
526  bool fColon = false;
527  bool fQuotedSubtitle = false;
528  QString strEnd;
529  if (strListColon.count()>1)
530  {
531  bool fDoubleDot = false;
532  bool fSingleDot = true;
533  int nLength = strListColon[0].length();
534 
535  int nPosition1 = event.m_description.indexOf("..");
536  if ((nPosition1 < nLength) && (nPosition1 >= 0))
537  fDoubleDot = true;
538  nPosition1 = event.m_description.indexOf(".");
539  if (nPosition1==-1)
540  fSingleDot = false;
541  if (nPosition1 > nLength)
542  fSingleDot = false;
543  else
544  {
545  QString strTmp = event.m_description.mid(nPosition1+1,
546  nLength-nPosition1);
547 
548  QStringList tmp = strTmp.split(" ");
549  if (((uint) tmp.size()) < kMaxDotToColon)
550  fSingleDot = false;
551  }
552 
553  if (fDoubleDot)
554  {
555  strListEnd = strListColon;
556  fColon = true;
557  }
558  else if (!fSingleDot)
559  {
560  QStringList strListTmp;
561  uint nTitle=0;
562  int nTitleMax=-1;
563  for (int i =0; (i<strListColon.count()) && (nTitleMax==-1);i++)
564  {
565  const QStringList tmp = strListColon[i].split(" ");
566 
567  nTitle += tmp.size();
568 
569  if (nTitle < kMaxToTitle)
570  strListTmp.push_back(strListColon[i]);
571  else
572  nTitleMax=i;
573  }
574  QString strPartial;
575  for (int i=0;i<(nTitleMax-1);i++)
576  strPartial+=strListTmp[i]+":";
577  if (nTitleMax>0)
578  {
579  strPartial+=strListTmp[nTitleMax-1];
580  strListEnd.push_back(strPartial);
581  }
582  for (int i=nTitleMax+1;i<strListColon.count();i++)
583  strListEnd.push_back(strListColon[i]);
584  fColon = true;
585  }
586  }
587  static const QRegularExpression ukQuotedSubtitle { R"(^'([\w\s\-,]+?)\.' )" };
588  auto match = ukQuotedSubtitle.match(event.m_description);
589  if (match.hasMatch())
590  {
591  event.m_subtitle = match.captured(1);
592  event.m_description.remove(match.capturedStart(0),
593  match.capturedLength(0));
594  fQuotedSubtitle = true;
595  }
596  QStringList strListPeriod;
597  QStringList strListQuestion;
598  QStringList strListExcl;
599  if (!(fColon || fQuotedSubtitle))
600  {
601  strListPeriod = event.m_description.split(".");
602  if (strListPeriod.count() >1)
603  {
604  int nPosition1 = event.m_description.indexOf(".");
605  int nPosition2 = event.m_description.indexOf("..");
606  if ((nPosition1 < nPosition2) || (nPosition2==-1))
607  strListEnd = strListPeriod;
608  }
609 
610  strListQuestion = event.m_description.split("?");
611  strListExcl = event.m_description.split("!");
612  if ((strListQuestion.size() > 1) &&
613  ((uint)strListQuestion.size() <= kMaxQuestionExclamation))
614  {
615  strListEnd = strListQuestion;
616  strEnd = "?";
617  }
618  else if ((strListExcl.size() > 1) &&
619  ((uint)strListExcl.size() <= kMaxQuestionExclamation))
620  {
621  strListEnd = strListExcl;
622  strEnd = "!";
623  }
624  else
625  strEnd.clear();
626  }
627 
628  if (!strListEnd.empty())
629  {
630 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
631  QStringList strListSpace = strListEnd[0].split(
632  " ", QString::SkipEmptyParts);
633 #else
634  QStringList strListSpace = strListEnd[0].split(
635  " ", Qt::SkipEmptyParts);
636 #endif
637  if (fColon && ((uint)strListSpace.size() > kMaxToTitle))
638  return;
639  if ((uint)strListSpace.size() > kDotToTitle)
640  return;
641  static const QRegularExpression ukExclusionFromSubtitle {
642  "(starring|stars\\s|drama|seres|sitcom)",
643  QRegularExpression::CaseInsensitiveOption };
644  if (strListSpace.filter(ukExclusionFromSubtitle).empty())
645  {
646  event.m_subtitle = strListEnd[0]+strEnd;
647  event.m_subtitle.remove(kUKSpaceColonStart);
648  event.m_description=
649  event.m_description.mid(strListEnd[0].length()+1);
650  event.m_description.remove(kUKSpaceColonStart);
651  }
652  }
653 }
654 
655 
660 {
661  static const QRegularExpression uk24ep { R"(^\d{1,2}:00[ap]m to \d{1,2}:00[ap]m: )" };
662  static const QRegularExpression ukTime { R"(\d{1,2}[\.:]\d{1,2}\s*(am|pm|))" };
663  QString strFull;
664 
665  bool isMovie = event.m_category.startsWith("Movie",Qt::CaseInsensitive) ||
666  event.m_category.startsWith("Film",Qt::CaseInsensitive);
667  // BBC three case (could add another record here ?)
668  static const QRegularExpression ukThen { R"(\s*?(Then|Followed by) 60 Seconds\.)",
669  QRegularExpression::CaseInsensitiveOption };
670  static const QRegularExpression ukNew { R"((New\.|\s*?(Brand New|New)\s*?(Series|Episode)\s*?[:\.\-]))",
671  QRegularExpression::CaseInsensitiveOption };
672  static const QRegularExpression ukNewTitle { R"(^(Brand New|New:)\s*)",
673  QRegularExpression::CaseInsensitiveOption };
674  event.m_description = event.m_description.remove(ukThen);
675  event.m_description = event.m_description.remove(ukNew);
676  event.m_title = event.m_title.remove(ukNewTitle);
677 
678  // Removal of Class TV, CBBC and CBeebies etc..
679  static const QRegularExpression ukTitleRemove { "^(?:[tT]4:|Schools\\s*?:)" };
680  static const QRegularExpression ukDescriptionRemove { R"(^(?:CBBC\s*?\.|CBeebies\s*?\.|Class TV\s*?:|BBC Switch\.))" };
681  event.m_title = event.m_title.remove(ukTitleRemove);
682  event.m_description = event.m_description.remove(ukDescriptionRemove);
683 
684  // Removal of BBC FOUR and BBC THREE
685  static const QRegularExpression ukBBC34 { R"(BBC (?:THREE|FOUR) on BBC (?:ONE|TWO)\.)",
686  QRegularExpression::CaseInsensitiveOption };
687  event.m_description = event.m_description.remove(ukBBC34);
688 
689  // BBC 7 [Rpt of ...] case.
690  static const QRegularExpression ukBBC7rpt { R"(\[Rptd?[^]]+?\d{1,2}\.\d{1,2}[ap]m\]\.)" };
691  event.m_description = event.m_description.remove(ukBBC7rpt);
692 
693  // "All New To 4Music!
694  static const QRegularExpression ukAllNew { R"(All New To 4Music!\s?)" };
695  event.m_description = event.m_description.remove(ukAllNew);
696 
697  // Removal of 'Also in HD' text
698  static const QRegularExpression ukAlsoInHD { R"(\s*Also in HD\.)",
699  QRegularExpression::CaseInsensitiveOption };
700  event.m_description = event.m_description.remove(ukAlsoInHD);
701 
702  // Remove [AD,S] etc.
703  static const QRegularExpression ukCC { R"(\[(?:(AD|SL|S|W|HD),?)+\])" };
704  auto match = ukCC.match(event.m_description);
705  while (match.hasMatch())
706  {
707  QStringList tmpCCitems = match.captured(0).remove("[").remove("]").split(",");
708  if (tmpCCitems.contains("AD"))
709  event.m_audioProps |= AUD_VISUALIMPAIR;
710  if (tmpCCitems.contains("HD"))
711  event.m_videoProps |= VID_HDTV;
712  if (tmpCCitems.contains("S"))
713  event.m_subtitleType |= SUB_NORMAL;
714  if (tmpCCitems.contains("SL"))
715  event.m_subtitleType |= SUB_SIGNED;
716  if (tmpCCitems.contains("W"))
717  event.m_videoProps |= VID_WIDESCREEN;
718  event.m_description.remove(match.capturedStart(0),
719  match.capturedLength(0));
720  match = ukCC.match(event.m_description, match.capturedStart(0));
721  }
722 
723  event.m_title = event.m_title.trimmed();
724  event.m_description = event.m_description.trimmed();
725 
726  // Constituents of UK season regexp, decomposed for clarity
727 
728  // Matches Season 2, S 2 and "Series 2," etc but not "hits 2"
729  // cap1 = season
730  static const QString seasonStr = R"(\b(?:Season|Series|S)\s*(\d+)\s*,?)";
731 
732  // Work out the season and episode numbers (if any)
733  // Matching pattern "Season 2 Episode|Ep 3 of 14|3/14" etc
734 
735  // Matches Episode 3, Ep 3/4, Ep 3 of 4 etc but not "step 1"
736  // cap1 = ep, cap2 = total
737  static const QString longEp = R"(\b(?:Ep|Episode)\s*(\d+)\s*(?:(?:/|of)\s*(\d*))?)";
738 
739  // Matches S2 Ep 3/4, "Season 2, Ep 3 of 4", Episode 3 etc
740  // cap1 = season, cap2 = ep, cap3 = total
741  static const QString longSeasEp = QString("\\(?(?:%1)?\\s*%2").arg(seasonStr, longEp);
742 
743  // Matches long seas/ep with surrounding parenthesis & trailing period
744  // cap1 = season, cap2 = ep, cap3 = total
745  static const QString longContext = QString(R"(\(*%1\s*\)?\s*\.?)").arg(longSeasEp);
746 
747  // Matches 3/4, 3 of 4
748  // cap1 = ep, cap2 = total
749  static const QString shortEp = R"((\d+)\s*(?:/|of)\s*(\d+))";
750 
751  // Matches short ep/total, ignoring Parts and idioms such as 9/11, 24/7 etc.
752  // ie. x/y in parenthesis or has no leading or trailing text in the sentence.
753  // cap0 may include previous/anchoring period
754  // cap1 = shortEp with surrounding parenthesis & trailing period (to remove)
755  // cap2 = ep, cap3 = total,
756  static const QString shortContext =
757  QString(R"((?:^|\.)(\s*\(*\s*%1[\s)]*(?:[).:]|$)))").arg(shortEp);
758 
759  // Prefer long format resorting to short format
760  // cap0 = long match to remove, cap1 = long season, cap2 = long ep, cap3 = long total,
761  // cap4 = short match to remove, cap5 = short ep, cap6 = short total
762  static const QRegularExpression ukSeries { "(?:" + longContext + "|" + shortContext + ")",
763  QRegularExpression::CaseInsensitiveOption };
764 
765  bool series = false;
766  bool fromTitle = true;
767  match = ukSeries.match(event.m_title);
768  if (!match.hasMatch())
769  {
770  fromTitle = false;
771  match = ukSeries.match(event.m_description);
772  }
773  if (match.hasMatch())
774  {
775  if (!match.captured(1).isEmpty())
776  {
777  event.m_season = match.captured(1).toUInt();
778  series = true;
779  }
780 
781  if (!match.captured(2).isEmpty())
782  {
783  event.m_episode = match.captured(2).toUInt();
784  series = true;
785  }
786  else if (!match.captured(5).isEmpty())
787  {
788  event.m_episode = match.captured(5).toUInt();
789  series = true;
790  }
791 
792  if (!match.captured(3).isEmpty())
793  {
794  event.m_totalepisodes = match.captured(3).toUInt();
795  series = true;
796  }
797  else if (!match.captured(6).isEmpty())
798  {
799  event.m_totalepisodes = match.captured(6).toUInt();
800  series = true;
801  }
802 
803  // Remove long or short match. Short text doesn't start at position2
804  int form = match.captured(4).isEmpty() ? 0 : 4;
805 
806  if (fromTitle)
807  {
808  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from title (%4) \"%5\"")
809  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
810  .arg(event.m_title, event.m_description));
811 
812  event.m_title.remove(match.capturedStart(form),
813  match.capturedLength(form));
814  }
815  else
816  {
817  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from description (%4) \"%5\"")
818  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
819  .arg(event.m_title, event.m_description));
820 
821  if (match.capturedStart(form) == 0)
822  {
823  // Remove from the start of the description.
824  // Otherwise it ends up in the subtitle.
825  event.m_description.remove(match.capturedStart(form),
826  match.capturedLength(form));
827  }
828  }
829  }
830 
831  if (isMovie)
832  event.m_categoryType = ProgramInfo::kCategoryMovie;
833  else if (series)
834  event.m_categoryType = ProgramInfo::kCategorySeries;
835 
836  // Multi-part episodes, or films (e.g. ITV film split by news)
837  // Matches Part 1, Pt 1/2, Part 1 of 2 etc.
838  static const QRegularExpression ukPart { R"([-(\:,.]\s*(?:Part|Pt)\s*(\d+)\s*(?:(?:of|/)\s*(\d+))?\s*[-):,.])",
839  QRegularExpression::CaseInsensitiveOption };
840  match = ukPart.match(event.m_title);
841  auto match2 = ukPart.match(event.m_description);
842  if (match.hasMatch())
843  {
844  event.m_partnumber = match.captured(1).toUInt();
845  event.m_parttotal = match.captured(2).toUInt();
846 
847  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from title (%3)")
848  .arg(event.m_partnumber).arg(event.m_parttotal).arg(event.m_title));
849 
850  // Remove from the title
851  event.m_title.remove(match.capturedStart(0),
852  match.capturedLength(0));
853  }
854  else if (match2.hasMatch())
855  {
856  event.m_partnumber = match2.captured(1).toUInt();
857  event.m_parttotal = match2.captured(2).toUInt();
858 
859  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from description (%3) \"%4\"")
860  .arg(event.m_partnumber).arg(event.m_parttotal)
861  .arg(event.m_title, event.m_description));
862 
863  // Remove from the start of the description.
864  // Otherwise it ends up in the subtitle.
865  if (match2.capturedStart(0) == 0)
866  {
867  // Retain a single colon (subtitle separator) if we remove any
868  QString sub = match2.captured(0).contains(":") ? ":" : "";
869  event.m_description = event.m_description.replace(match2.captured(0), sub);
870  }
871  }
872 
873  static const QRegularExpression ukStarring { R"((?:Western\s)?[Ss]tarring ([\w\s\-']+?)[Aa]nd\s([\w\s\-']+?)[\.|,]\s*(\d{4})?(?:\.\s)?)" };
874  match = ukStarring.match(event.m_description);
875  if (match.hasMatch())
876  {
877  // if we match this we've captured 2 actors and an (optional) airdate
878  /* Possible TODO: if EIT inlcude the priority and/or character
879  * names for the actors, include them in AddPerson call. */
880  event.AddPerson(DBPerson::kActor, match.captured(1));
881  event.AddPerson(DBPerson::kActor, match.captured(2));
882  if (match.captured(3).length() > 0)
883  {
884  bool ok = false;
885  uint y = match.captured(3).toUInt(&ok);
886  if (ok)
887  {
888  event.m_airdate = y;
889  event.m_originalairdate = QDate(y, 1, 1);
890  }
891  }
892  }
893 
894  static const QRegularExpression ukLaONoSplit { "^Law & Order: (?:Criminal Intent|LA|"
895  "Special Victims Unit|Trial by Jury|UK|You the Jury)" };
896  if (!event.m_title.startsWith("CSI:") && !event.m_title.startsWith("CD:") &&
897  !event.m_title.contains(ukLaONoSplit) &&
898  !event.m_title.startsWith("Mission: Impossible"))
899  {
900  static const QRegularExpression ukDoubleDotStart { R"(^\.\.+)" };
901  static const QRegularExpression ukDoubleDotEnd { R"(\.\.+$)" };
902  if ((event.m_title.indexOf(ukDoubleDotEnd) != -1) &&
903  (event.m_description.indexOf(ukDoubleDotStart) != -1))
904  {
905  QString strPart=event.m_title.remove(ukDoubleDotEnd)+" ";
906  strFull = strPart + event.m_description.remove(ukDoubleDotStart);
907  int position1 = -1;
908  static const QRegularExpression ukCEPQ { R"([:\!\.\?]\s)" };
909  static const QRegularExpression ukSpaceStart { "^ " };
910  if (isMovie &&
911  ((position1 = strFull.indexOf(ukCEPQ,strPart.length())) != -1))
912  {
913  if (strFull[position1] == '!' || strFull[position1] == '?'
914  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
915  position1++;
916  event.m_title = strFull.left(position1);
917  event.m_description = strFull.mid(position1 + 1);
918  event.m_description.remove(ukSpaceStart);
919  }
920  else if ((position1 = strFull.indexOf(ukCEPQ)) != -1)
921  {
922  if (strFull[position1] == '!' || strFull[position1] == '?'
923  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
924  position1++;
925  event.m_title = strFull.left(position1);
926  event.m_description = strFull.mid(position1 + 1);
927  event.m_description.remove(ukSpaceStart);
928  SetUKSubtitle(event);
929  }
930  }
931  else if (event.m_description.indexOf(uk24ep) != -1)
932  {
933  auto match24 = uk24ep.match(event.m_description);
934  if (match24.hasMatch())
935  {
936  // Special case for episodes of 24.
937  // -2 from the length cause we don't want ": " on the end
938  event.m_subtitle = event.m_description.mid(match24.capturedStart(0),
939  match24.captured(0).length() - 2);
940  event.m_description = event.m_description.remove(match24.captured(0));
941  }
942  }
943  else if (event.m_description.indexOf(ukTime) == -1)
944  {
945  static const QRegularExpression ukYearColon { R"(^[\d]{4}:)" };
946  if (!isMovie && (event.m_title.indexOf(ukYearColon) < 0))
947  {
948  int position1 = -1;
949  if (((position1 = event.m_title.indexOf(":")) != -1) &&
950  (event.m_description.indexOf(":") < 0 ))
951  {
952  static const QRegularExpression ukCompleteDots { R"(^\.\.+$)" };
953  if (event.m_title.mid(position1+1).indexOf(ukCompleteDots)==0)
954  {
955  SetUKSubtitle(event);
956  QString strTmp = event.m_title.mid(position1+1);
957  event.m_title.resize(position1);
958  event.m_subtitle = strTmp+event.m_subtitle;
959  }
960  else if ((uint)position1 < kSubtitleMaxLen)
961  {
962  event.m_subtitle = event.m_title.mid(position1 + 1);
963  event.m_title = event.m_title.left(position1);
964  }
965  }
966  else
967  SetUKSubtitle(event);
968  }
969  }
970  }
971 
972  if (!isMovie && event.m_subtitle.isEmpty() &&
973  !event.m_title.startsWith("The X-Files"))
974  {
975  int position1 = -1;
976  if ((position1=event.m_description.indexOf(ukTime)) != -1)
977  {
978  static const QRegularExpression ukColonPeriod { R"([:\.])" };
979  int position2 = event.m_description.indexOf(ukColonPeriod);
980  if ((position2>=0) && (position2 < (position1-2)))
981  SetUKSubtitle(event);
982  }
983  else if ((position1=event.m_title.indexOf("-")) != -1)
984  {
985  if ((uint)position1 < kSubtitleMaxLen)
986  {
987  event.m_subtitle = event.m_title.mid(position1 + 1);
988  event.m_subtitle.remove(kUKSpaceColonStart);
989  event.m_title = event.m_title.left(position1);
990  }
991  }
992  else
993  SetUKSubtitle(event);
994  }
995 
996  // Work out the year (if any)
997  static const QRegularExpression ukYear { R"([\[\(]([\d]{4})[\)\]])" };
998  match = ukYear.match(event.m_description);
999  if (match.hasMatch())
1000  {
1001  event.m_description.remove(match.capturedStart(0),
1002  match.capturedLength(0));
1003  bool ok = false;
1004  uint y = match.captured(1).toUInt(&ok);
1005  if (ok)
1006  {
1007  event.m_airdate = y;
1008  event.m_originalairdate = QDate(y, 1, 1);
1009  }
1010  }
1011 
1012  // Trim leading/trailing '.'
1013  static const QRegularExpression ukDotSpaceStart { R"(^\. )" };
1014  static const QRegularExpression ukDotEnd { R"(\.$)" };
1015  event.m_subtitle.remove(ukDotSpaceStart);
1016  if (event.m_subtitle.lastIndexOf("..") != (event.m_subtitle.length()-2))
1017  event.m_subtitle.remove(ukDotEnd);
1018 
1019  // Reverse the subtitle and empty description
1020  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
1021  {
1022  event.m_description=event.m_subtitle;
1023  event.m_subtitle.clear();
1024  }
1025 }
1026 
1031 {
1032  /* Used for PBS ATSC Subtitles are separated by a colon */
1033  int position = event.m_description.indexOf(':');
1034  if (position != -1)
1035  {
1036  const QString stmp = event.m_description;
1037  event.m_subtitle = stmp.left(position);
1038  event.m_description = stmp.right(stmp.length() - position - 2);
1039  }
1040 }
1041 
1045 void EITFixUp::FixComHem(DBEventEIT &event, bool process_subtitle)
1046 {
1047  static const QRegularExpression comHemPersSeparator { R"((, |\soch\s))" };
1048 
1049  // Reverse what EITFixUp::Fix() did
1050  if (event.m_subtitle.isEmpty() && !event.m_description.isEmpty())
1051  {
1052  event.m_subtitle = event.m_description;
1053  event.m_description = "";
1054  }
1055 
1056  // Remove subtitle, it contains the category and we already know that
1057  event.m_subtitle = "";
1058 
1059  bool isSeries = false;
1060  // Try to find episode numbers
1061  static const QRegularExpression comHemSeries1
1062  { R"(\s?(?:[dD]el|[eE]pisode)\s([0-9]+)(?:\s?(?:/|:|av)\s?([0-9]+))?\.)" };
1063  static const QRegularExpression comHemSeries2 { R"(\s?-?\s?([Dd]el\s+([0-9]+)))" };
1064  auto match = comHemSeries1.match(event.m_description);
1065  auto match2 = comHemSeries2.match(event.m_title);
1066  if (match2.hasMatch())
1067  {
1068  event.m_partnumber = match2.capturedView(2).toUInt();
1069  event.m_title.remove(match2.capturedStart(), match2.capturedLength());
1070  }
1071  else if (match.hasMatch())
1072  {
1073  if (match.capturedStart(1) != -1)
1074  event.m_partnumber = match.capturedView(1).toUInt();
1075  if (match.capturedStart(2) != -1)
1076  event.m_parttotal = match.capturedView(2).toUInt();
1077 
1078  // Remove the episode numbers, but only if it's not at the begining
1079  // of the description (subtitle code might use it)
1080  if (match.capturedStart() > 0)
1081  event.m_description.remove(match.capturedStart(),
1082  match.capturedLength());
1083  isSeries = true;
1084  }
1085 
1086  // Add partnumber/parttotal to subtitle
1087  // This will be overwritten if we find a better subtitle
1088  if (event.m_partnumber > 0)
1089  {
1090  event.m_subtitle = QString("Del %1").arg(event.m_partnumber);
1091  if (event.m_parttotal > 0)
1092  event.m_subtitle += QString(" av %1").arg(event.m_parttotal);
1093  }
1094 
1095  // Move subtitle info from title to subtitle
1096  static const QRegularExpression comHemTSub { R"(\s+-\s+([^\-]+))" };
1097  match = comHemTSub.match(event.m_title);
1098  if (match.hasMatch())
1099  {
1100  event.m_subtitle = match.captured(1);
1101  event.m_title.remove(match.capturedStart(), match.capturedLength());
1102  }
1103 
1104  // No need to continue without a description.
1105  if (event.m_description.length() <= 0)
1106  return;
1107 
1108  // Try to find country category, year and possibly other information
1109  // from the begining of the description
1110  static const QRegularExpression comHemCountry
1111  { R"(^(\(.+\))?\s?([^ ]+)\s([^\.0-9]+)\sfrån\s([0-9]{4})(?:\smed\s([^\.]+))?\.?)" };
1112  match = comHemCountry.match(event.m_description);
1113  if (match.hasMatch())
1114  {
1115  QString replacement;
1116 
1117  // Original title, usually english title
1118  // note: list[1] contains extra () around the text that needs removing
1119  if (!match.capturedView(1).isEmpty())
1120  {
1121  replacement = match.captured(1) + " ";
1122  //store it somewhere?
1123  }
1124 
1125  // Countr(y|ies)
1126  if (!match.capturedView(2).isEmpty())
1127  {
1128  replacement += match.captured(2) + " ";
1129  //store it somewhere?
1130  }
1131 
1132  // Category
1133  if (!match.capturedView(3).isEmpty())
1134  {
1135  replacement += match.captured(3) + ".";
1136  if(event.m_category.isEmpty())
1137  {
1138  event.m_category = match.captured(3);
1139  }
1140 
1141  if(match.captured(3).indexOf("serie")!=-1)
1142  {
1143  isSeries = true;
1144  }
1145  }
1146 
1147  // Year
1148  if (!match.capturedView(4).isEmpty())
1149  {
1150  bool ok = false;
1151  uint y = match.capturedView(4).trimmed().toUInt(&ok);
1152  if (ok)
1153  event.m_airdate = y;
1154  }
1155 
1156  // Actors
1157  if (!match.capturedView(5).isEmpty())
1158  {
1159 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1160  const QStringList actors =
1161  match.captured(5).split(comHemPersSeparator, QString::SkipEmptyParts);
1162 #else
1163  const QStringList actors =
1164  match.captured(5).split(comHemPersSeparator, Qt::SkipEmptyParts);
1165 #endif
1166  /* Possible TODO: if EIT inlcude the priority and/or character
1167  * names for the actors, include them in AddPerson call. */
1168  for (const auto & actor : qAsConst(actors))
1169  event.AddPerson(DBPerson::kActor, actor);
1170  }
1171 
1172  // Remove year and actors.
1173  // The reason category is left in the description is because otherwise
1174  // the country would look wierd like "Amerikansk. Rest of description."
1175  event.m_description = event.m_description.replace(match.captured(0),replacement);
1176  }
1177 
1178  if (isSeries)
1179  event.m_categoryType = ProgramInfo::kCategorySeries;
1180 
1181  // Look for additional persons in the description
1182  static const QRegularExpression comHemPersons
1183  { R"(\s?([Rr]egi|[Ss]kådespelare|[Pp]rogramledare|[Ii] rollerna):\s([^\.]+)\.)" };
1184  auto iter = comHemPersons.globalMatch(event.m_description);
1185  while (iter.hasNext())
1186  {
1187  auto pmatch = iter.next();
1189 
1190  static const QRegularExpression comHemDirector { "[Rr]egi" };
1191  static const QRegularExpression comHemActor { "[Ss]kådespelare|[Ii] rollerna" };
1192  static const QRegularExpression comHemHost { "[Pp]rogramledare" };
1193  auto dmatch = comHemDirector.match(pmatch.capturedView(1));
1194  auto amatch = comHemActor.match(pmatch.capturedView(1));
1195  auto hmatch = comHemHost.match(pmatch.capturedView(1));
1196  if (dmatch.hasMatch())
1197  role = DBPerson::kDirector;
1198  else if (amatch.hasMatch())
1199  role = DBPerson::kActor;
1200  else if (hmatch.hasMatch())
1201  role = DBPerson::kHost;
1202  else
1203  {
1204  event.m_description.remove(pmatch.capturedStart(), pmatch.capturedLength());
1205  continue;
1206  }
1207 
1208 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1209  const QStringList actors =
1210  pmatch.captured(2).split(comHemPersSeparator, QString::SkipEmptyParts);
1211 #else
1212  const QStringList actors =
1213  pmatch.captured(2).split(comHemPersSeparator, Qt::SkipEmptyParts);
1214 #endif
1215  /* Possible TODO: if EIT inlcude the priority and/or character
1216  * names for the actors, include them in AddPerson call. */
1217  for (const auto & actor : qAsConst(actors))
1218  event.AddPerson(role, actor);
1219 
1220  // Remove it
1221  event.m_description=event.m_description.replace(pmatch.captured(0),"");
1222  }
1223 
1224  // Is this event on a channel we shoud look for a subtitle?
1225  // The subtitle is the first sentence in the description, but the
1226  // subtitle can't be the only thing in the description and it must be
1227  // shorter than 55 characters or we risk picking up the wrong thing.
1228  if (process_subtitle)
1229  {
1230  static const QRegularExpression comHemSub { R"([.\?\!] )" };
1231  int pos2 = event.m_description.indexOf(comHemSub);
1232  bool pvalid = pos2 != -1 && pos2 <= 55;
1233  if (pvalid && (event.m_description.length() - (pos2 + 2)) > 0)
1234  {
1235  event.m_subtitle = event.m_description.left(
1236  pos2 + (event.m_description[pos2] == '?' ? 1 : 0));
1237  event.m_description = event.m_description.mid(pos2 + 2);
1238  }
1239  }
1240 
1241  // Teletext subtitles?
1242  static const QRegularExpression comHemTT { "[Tt]ext-[Tt][Vv]" };
1243  if (event.m_description.indexOf(comHemTT) != -1)
1244  event.m_subtitleType |= SUB_NORMAL;
1245 
1246  // Try to findout if this is a rerun and if so the date.
1247  static const QRegularExpression comHemRerun1 { R"([Rr]epris\sfrån\s([^\.]+)(?:\.|$))" };
1248  static const QRegularExpression comHemRerun2 { R"(([0-9]+)/([0-9]+)(?:\s-\s([0-9]{4}))?)" };
1249  match = comHemRerun1.match(event.m_description);
1250  if (!match.hasMatch())
1251  return;
1252 
1253  // Rerun from today
1254  if (match.captured(1) == "i dag")
1255  {
1256  event.m_originalairdate = event.m_starttime.date();
1257  return;
1258  }
1259 
1260  // Rerun from yesterday afternoon
1261  if (match.captured(1) == "eftermiddagen")
1262  {
1263  event.m_originalairdate = event.m_starttime.date().addDays(-1);
1264  return;
1265  }
1266 
1267  // Rerun with day, month and possibly year specified
1268  match2 = comHemRerun2.match(match.capturedView(1));
1269  if (match2.hasMatch())
1270  {
1271  int day = match2.capturedView(1).toInt();
1272  int month = match2.capturedView(2).toInt();
1273  //int year;
1274  //if (match2.capturedLength(3) > 0)
1275  // year = match2.capturedView(3).toInt();
1276  //else
1277  // year = event.m_starttime.date().year();
1278 
1279  if (day > 0 && month > 0)
1280  {
1281  QDate date(event.m_starttime.date().year(), month, day);
1282  // it's a rerun so it must be in the past
1283  if (date > event.m_starttime.date())
1284  date = date.addYears(-1);
1285  event.m_originalairdate = date;
1286  }
1287  return;
1288  }
1289 }
1290 
1295 {
1296  event.m_category = event.m_subtitle;
1297  /* Used for DVB-S Subtitles are separated by a colon */
1298  int position = event.m_description.indexOf(':');
1299  if (position != -1)
1300  {
1301  const QString stmp = event.m_description;
1302  event.m_subtitle = stmp.left(position);
1303  event.m_description = stmp.right(stmp.length() - position - 2);
1304  }
1305 }
1306 
1311 {
1312  if (event.m_description.startsWith("[Program data ") || event.m_description.startsWith("[Program info "))//TEN
1313  {
1314  event.m_description = "";//event.m_subtitle;
1315  }
1316  if (event.m_description.endsWith("Copyright West TV Ltd. 2011)"))
1317  event.m_description.resize(event.m_description.length()-40);
1318 
1319  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())//due to ten's copyright info, this won't be caught before
1320  {
1321  event.m_description = event.m_subtitle;
1322  event.m_subtitle.clear();
1323  }
1324  if (event.m_description.startsWith(event.m_title+" - "))
1325  event.m_description.remove(0,event.m_title.length()+3);
1326  if (event.m_title.startsWith("LIVE: ", Qt::CaseInsensitive))
1327  {
1328  event.m_title.remove(0, 6);
1329  event.m_description.prepend("(Live) ");
1330  }
1331 }
1332 
1337 {
1338  static const QRegularExpression rating { "\\((G|PG|M|MA)\\)" };
1339  auto match = rating.match(event.m_description);
1340  if (match.hasMatch())
1341  {
1342  EventRating prograting;
1343  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1344  event.m_ratings.push_back(prograting);
1345  event.m_description.remove(0,match.capturedLength()+1);
1346  }
1347  if (event.m_description.startsWith("[HD]"))
1348  {
1349  event.m_videoProps |= VID_HDTV;
1350  event.m_description.remove(0,5);
1351  }
1352  if (event.m_description.startsWith("[CC]"))
1353  {
1354  event.m_subtitleType |= SUB_NORMAL;
1355  event.m_description.remove(0,5);
1356  }
1357  if (event.m_subtitle == "Movie")
1358  {
1359  event.m_subtitle.clear();
1360  event.m_categoryType = ProgramInfo::kCategoryMovie;
1361  }
1362  if (event.m_description.startsWith(event.m_title))
1363  event.m_description.remove(0,event.m_title.length()+1);
1364 }
1365 
1370 {
1371  if (event.m_description.endsWith(" Rpt"))
1372  {
1373  event.m_previouslyshown = true;
1374  event.m_description.resize(event.m_description.size()-4);
1375  }
1376  static const QRegularExpression year { "(\\d{4})$" };
1377  auto match = year.match(event.m_description);
1378  if (match.hasMatch())
1379  {
1380  event.m_airdate = match.capturedView(1).toUInt();
1381  event.m_description.resize(event.m_description.size()-5);
1382  }
1383  if (event.m_description.endsWith(" CC"))
1384  {
1385  event.m_subtitleType |= SUB_NORMAL;
1386  event.m_description.resize(event.m_description.size()-3);
1387  }
1388  QString advisories;//store the advisories to append later
1389  static const QRegularExpression adv { "(\\([A-Z,]+\\))$" };
1390  match = adv.match(event.m_description);
1391  if (match.hasMatch())
1392  {
1393  advisories = match.captured(1);
1394  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1395  }
1396  static const QRegularExpression rating { "(C|G|PG|M|MA)$" };
1397  match = rating.match(event.m_description);
1398  if (match.hasMatch())
1399  {
1400  EventRating prograting;
1401  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1402  if (!advisories.isEmpty())
1403  prograting.m_rating.append(" ").append(advisories);
1404  event.m_ratings.push_back(prograting);
1405  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1406  }
1407 }
1412 {
1413  // If the description has been truncated to fit within the
1414  // 'subtitle' eit field, none of the following will work (ABC)
1415  if (event.m_description.endsWith(".."))
1416  return;
1417  event.m_description = event.m_description.trimmed();
1418 
1419  static const QRegularExpression auFreeviewSY { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\)$)" };
1420  auto match = auFreeviewSY.match(event.m_description);
1421  if (match.hasMatch())
1422  {
1423  if (event.m_subtitle.isEmpty())//nine sometimes has an actual subtitle field and the brackets thingo)
1424  event.m_subtitle = match.captured(2);
1425  event.m_airdate = match.capturedView(3).toUInt();
1426  event.m_description = match.captured(1);
1427  return;
1428  }
1429  static const QRegularExpression auFreeviewY { "(.*) \\(([12][0-9][0-9][0-9])\\)$" };
1430  match = auFreeviewY.match(event.m_description);
1431  if (match.hasMatch())
1432  {
1433  event.m_airdate = match.capturedView(2).toUInt();
1434  event.m_description = match.captured(1);
1435  return;
1436  }
1437  static const QRegularExpression auFreeviewSYC { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1438  match = auFreeviewSYC.match(event.m_description);
1439  if (match.hasMatch())
1440  {
1441  if (event.m_subtitle.isEmpty())
1442  event.m_subtitle = match.captured(2);
1443  event.m_airdate = match.capturedView(3).toUInt();
1444  QStringList actors = match.captured(4).split("/");
1445  /* Possible TODO: if EIT inlcude the priority and/or character
1446  * names for the actors, include them in AddPerson call. */
1447  for (const QString& actor : qAsConst(actors))
1448  event.AddPerson(DBPerson::kActor, actor);
1449  event.m_description = match.captured(1);
1450  return;
1451  }
1452  static const QRegularExpression auFreeviewYC { R"((.*) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1453  match = auFreeviewYC.match(event.m_description);
1454  if (match.hasMatch())
1455  {
1456  event.m_airdate = match.capturedView(2).toUInt();
1457  QStringList actors = match.captured(3).split("/");
1458  /* Possible TODO: if EIT inlcude the priority and/or character
1459  * names for the actors, include them in AddPerson call. */
1460  for (const QString& actor : qAsConst(actors))
1461  event.AddPerson(DBPerson::kActor, actor);
1462  event.m_description = match.captured(1);
1463  }
1464 }
1465 
1470 {
1471  const uint SUBTITLE_PCT = 60; // % of description to allow subtitle to
1472  const uint lSUBTITLE_MAX_LEN = 128;// max length of subtitle field in db.
1473 
1474  // Remove subtitle, it contains category information too specific to use
1475  event.m_subtitle = QString("");
1476 
1477  // No need to continue without a description.
1478  if (event.m_description.length() <= 0)
1479  return;
1480 
1481  // Replace incomplete title if the full one is in the description
1482  static const QRegularExpression mcaIncompleteTitle { R"((.*).\.\.\.$)" };
1483  auto match = mcaIncompleteTitle.match(event.m_title);
1484  if (match.hasMatch())
1485  {
1486  static const QString mcaCompleteTitlea { "^'?(" };
1487  static const QString mcaCompleteTitleb { R"([^\.\?]+[^\'])'?[\.\?]\s+(.+))" };
1488  static const QRegularExpression mcaCompleteTitle
1489  { mcaCompleteTitlea + match.captured(1) + mcaCompleteTitleb,
1490  QRegularExpression::CaseInsensitiveOption};
1491  match = mcaCompleteTitle.match(event.m_description);
1492  if (match.hasMatch())
1493  {
1494  event.m_title = match.captured(1).trimmed();
1495  event.m_description = match.captured(2).trimmed();
1496  }
1497  }
1498 
1499  // Try to find subtitle in description
1500  static const QRegularExpression mcaSubtitle { R"(^'([^\.]+)'\.\s+(.+))" };
1501  match = mcaSubtitle.match(event.m_description);
1502  if (match.hasMatch())
1503  {
1504  uint matchLen = match.capturedLength(1);
1505  uint evDescLen = std::max(static_cast<int>(event.m_description.length()), 1);
1506 
1507  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1508  ((matchLen * 100 / evDescLen) < SUBTITLE_PCT))
1509  {
1510  event.m_subtitle = match.captured(1);
1511  event.m_description = match.captured(2);
1512  }
1513  }
1514 
1515  // Try to find episode numbers in subtitle
1516  static const QRegularExpression mcaSeries { R"(^S?(\d+)\/E?(\d+)\s-\s(.*)$)" };
1517  match = mcaSeries.match(event.m_subtitle);
1518  if (match.hasMatch())
1519  {
1520  uint season = match.capturedView(1).toUInt();
1521  uint episode = match.capturedView(2).toUInt();
1522  event.m_subtitle = match.captured(3).trimmed();
1523  event.m_syndicatedepisodenumber =
1524  QString("S%1E%2").arg(season).arg(episode);
1525  event.m_season = season;
1526  event.m_episode = episode;
1527  event.m_categoryType = ProgramInfo::kCategorySeries;
1528  }
1529 
1530  // Closed captioned?
1531  static const QRegularExpression mcaCC { R"(,?\s(HI|English) Subtitles\.?)" };
1532  int position = event.m_description.indexOf(mcaCC);
1533  if (position > 0)
1534  {
1535  event.m_subtitleType |= SUB_HARDHEAR;
1536  event.m_description.remove(mcaCC);
1537  }
1538 
1539  // Dolby Digital 5.1?
1540  static const QRegularExpression mcaDD { R"(,?\sDD\.?)" };
1541  position = event.m_description.indexOf(mcaDD);
1542  if ((position > 0) && (position > event.m_description.length() - 7))
1543  {
1544  event.m_audioProps |= AUD_DOLBY;
1545  event.m_description.remove(mcaDD);
1546  }
1547 
1548  // Remove bouquet tags
1549  static const QRegularExpression mcaAvail { R"(\s(Only available on [^\.]*bouquet|Not available in RSA [^\.]*)\.?)" };
1550  event.m_description.remove(mcaAvail);
1551 
1552  // Try to find year and director from the end of the description
1553  bool isMovie = false;
1554  static const QRegularExpression mcaCredits { R"((.*)\s\((\d{4})\)\s*([^\.]+)\.?\s*$)" };
1555  match = mcaCredits.match(event.m_description);
1556  if (match.hasMatch())
1557  {
1558  isMovie = true;
1559  event.m_description = match.captured(1).trimmed();
1560  bool ok = false;
1561  uint y = match.captured(2).trimmed().toUInt(&ok);
1562  if (ok)
1563  event.m_airdate = y;
1564  event.AddPerson(DBPerson::kDirector, match.captured(3).trimmed());
1565  }
1566  else
1567  {
1568  // Try to find year only from the end of the description
1569  static const QRegularExpression mcaYear { R"((.*)\s\((\d{4})\)\s*$)" };
1570  match = mcaYear.match(event.m_description);
1571  if (match.hasMatch())
1572  {
1573  isMovie = true;
1574  event.m_description = match.captured(1).trimmed();
1575  bool ok = false;
1576  uint y = match.captured(2).trimmed().toUInt(&ok);
1577  if (ok)
1578  event.m_airdate = y;
1579  }
1580  }
1581 
1582  if (isMovie)
1583  {
1584  static const QRegularExpression mcaActors { R"((.*\.)\s+([^\.]+\s[A-Z][^\.]+)\.\s*)" };
1585  match = mcaActors.match(event.m_description);
1586  if (match.hasMatch())
1587  {
1588  static const QRegularExpression mcaActorsSeparator { "(,\\s+)" };
1589 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1590  const QStringList actors = match.captured(2).split(
1591  mcaActorsSeparator, QString::SkipEmptyParts);
1592 #else
1593  const QStringList actors = match.captured(2).split(
1594  mcaActorsSeparator, Qt::SkipEmptyParts);
1595 #endif
1596  /* Possible TODO: if EIT inlcude the priority and/or character
1597  * names for the actors, include them in AddPerson call. */
1598  for (const auto & actor : qAsConst(actors))
1599  event.AddPerson(DBPerson::kActor, actor.trimmed());
1600  event.m_description = match.captured(1).trimmed();
1601  }
1602  event.m_categoryType = ProgramInfo::kCategoryMovie;
1603  }
1604 }
1605 
1610 {
1611  // subtitle with episode number: "Folge *: 'subtitle'
1612  static const QRegularExpression superRTLSubtitle { R"(^Folge\s(\d{1,3}):\s'(.*)')" };
1613  auto match = superRTLSubtitle.match(event.m_subtitle);
1614  if (match.hasMatch())
1615  {
1616  event.m_season = 0;
1617  event.m_episode = match.capturedView(1).toUInt();
1618  event.m_subtitle = match.captured(2);
1619  }
1620 
1621  // No need to continue without a description or with an subtitle.
1622  if (event.m_description.length() <= 0 || event.m_subtitle.length() > 0)
1623  return;
1624 
1625  // Repeat
1626  static const QRegularExpression rtlRepeat
1627  { R"([\s\(]?Wiederholung.+vo[m|n].+(\d{2}\.\d{2}\.\d{4}|\d{2}[:\.]\d{2}\sUhr)\)?)" };
1628  match = rtlRepeat.match(event.m_description);
1629  if (match.hasMatch())
1630  {
1631  // remove '.' if it matches at the beginning of the description
1632  int pos = match.capturedStart(0);
1633  int length = match.capturedLength(0) + (pos ? 0 : 1);
1634  event.m_description = event.m_description.remove(pos, length).trimmed();
1635  }
1636 
1637  // should be (?:\x{8a}|\\.\\s*|$) but 0x8A gets replaced with 0x20
1638  static const QRegularExpression rtlSubtitle1 { R"(^Folge\s(\d{1,4})\s*:\s+'(.*)'(?:\s|\.\s*|$))" };
1639  static const QRegularExpression rtlSubtitle2 { R"(^Folge\s(\d{1,4})\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1640  static const QRegularExpression rtlSubtitle3 { R"(^(?:Folge\s)?(\d{1,4}(?:\/[IVX]+)?)\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1641  static const QRegularExpression rtlSubtitle4 { R"(^Thema.{0,5}:\s([^\.]+)\.\s*)" };
1642  static const QRegularExpression rtlSubtitle5 { "^'(.+)'\\.\\s*" };
1643  static const QRegularExpression rtlEpisodeNo1 { R"(^(Folge\s\d{1,4})\.*\s*)" };
1644  static const QRegularExpression rtlEpisodeNo2 { R"(^(\d{1,2}\/[IVX]+)\.*\s*)" };
1645 
1646  auto match1 = rtlSubtitle1.match(event.m_description);
1647  auto match2 = rtlSubtitle2.match(event.m_description);
1648  auto match3 = rtlSubtitle3.match(event.m_description);
1649  auto match4 = rtlSubtitle4.match(event.m_description);
1650  auto match5 = rtlSubtitle5.match(event.m_description);
1651  auto match6 = rtlEpisodeNo1.match(event.m_description);
1652  auto match7 = rtlEpisodeNo2.match(event.m_description);
1653 
1654  // subtitle with episode number: "Folge *: 'subtitle'. description
1655  if (match1.hasMatch())
1656  {
1657  event.m_syndicatedepisodenumber = match1.captured(1);
1658  event.m_subtitle = match1.captured(2);
1659  event.m_description =
1660  event.m_description.remove(0, match1.capturedLength());
1661  }
1662  // episode number subtitle
1663  else if (match2.hasMatch())
1664  {
1665  event.m_syndicatedepisodenumber = match2.captured(1);
1666  event.m_subtitle = match2.captured(2);
1667  event.m_description =
1668  event.m_description.remove(0, match2.capturedLength());
1669  }
1670  // episode number subtitle
1671  else if (match3.hasMatch())
1672  {
1673  event.m_syndicatedepisodenumber = match3.captured(1);
1674  event.m_subtitle = match3.captured(2);
1675  event.m_description =
1676  event.m_description.remove(0, match3.capturedLength());
1677  }
1678  // "Thema..."
1679  else if (match4.hasMatch())
1680  {
1681  event.m_subtitle = match4.captured(1);
1682  event.m_description =
1683  event.m_description.remove(0, match4.capturedLength());
1684  }
1685  // "'...'"
1686  else if (match5.hasMatch())
1687  {
1688  event.m_subtitle = match5.captured(1);
1689  event.m_description =
1690  event.m_description.remove(0, match5.capturedLength());
1691  }
1692  // episode number
1693  else if (match6.hasMatch())
1694  {
1695  event.m_syndicatedepisodenumber = match6.captured(2);
1696  event.m_subtitle = match6.captured(1);
1697  event.m_description =
1698  event.m_description.remove(0, match6.capturedLength());
1699  }
1700  // episode number
1701  else if (match7.hasMatch())
1702  {
1703  event.m_syndicatedepisodenumber = match7.captured(2);
1704  event.m_subtitle = match7.captured(1);
1705  event.m_description =
1706  event.m_description.remove(0, match7.capturedLength());
1707  }
1708 
1709  /* got an episode title now? (we did not have one at the start of this function) */
1710  if (!event.m_subtitle.isEmpty())
1712 
1713  /* if we do not have an episode title by now try some guessing as last resort */
1714  if (event.m_subtitle.length() == 0)
1715  {
1716  const uint SUBTITLE_PCT = 35; // % of description to allow subtitle up to
1717  const uint lSUBTITLE_MAX_LEN = 50; // max length of subtitle field in db
1718 
1719  static const QRegularExpression rtlSubtitle { R"(^([^\.]{3,})\.\s+(.+))" };
1720  match = rtlSubtitle.match(event.m_description);
1721  if (match.hasMatch())
1722  {
1723  uint matchLen = match.capturedLength(1);
1724  uint evDescLen = std::max(static_cast<int>(event.m_description.length()), 1);
1725 
1726  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1727  (matchLen * 100 / evDescLen < SUBTITLE_PCT))
1728  {
1729  event.m_subtitle = match.captured(1);
1730  event.m_description = match.captured(2);
1731  }
1732  }
1733  }
1734 }
1735 
1736 // FIXME add more jobs
1737 static const QMap<QString,DBPerson::Role> deCrewTitle {
1738  { "Regie", DBPerson::kDirector },
1739  { "Drehbuch", DBPerson::kWriter },
1740  { "Autor", DBPerson::kWriter },
1741 };
1742 
1747 {
1748  static const QRegularExpression pro7Subtitle { R"(,{0,1}([^,]*?),([^,]+?)\s{0,1}(\d{4})$)" };
1749  auto match = pro7Subtitle.match(event.m_subtitle);
1750  if (match.hasMatch())
1751  {
1752  if (event.m_airdate == 0)
1753  {
1754  event.m_airdate = match.captured(3).toUInt();
1755  }
1756  event.m_subtitle.remove(match.capturedStart(0),
1757  match.capturedLength(0));
1758  }
1759 
1760  /* handle cast, the very last in description */
1761  static const QRegularExpression pro7Cast { "\n\nDarsteller:\n(.*)$",
1762  QRegularExpression::DotMatchesEverythingOption };
1763  match = pro7Cast.match(event.m_description);
1764  if (match.hasMatch())
1765  {
1766  QStringList cast = match.captured(1).split("\n");
1767  for (const auto& line : qAsConst(cast))
1768  {
1769  static const QRegularExpression pro7CastOne { R"(^([^\(]*?)\((.*)\)$)" };
1770  auto match2 = pro7CastOne.match(line);
1771  if (match2.hasMatch())
1772  {
1773  /* Possible TODO: if EIT inlcude the priority and/or character
1774  * names for the actors, include them in AddPerson call. */
1775  event.AddPerson (DBPerson::kActor, match2.captured(1).simplified());
1776  }
1777  }
1778  event.m_description.remove(match.capturedStart(0),
1779  match.capturedLength(0));
1780  }
1781 
1782  /* handle crew, the new very last in description
1783  * format: "Role: Name" or "Role: Name1, Name2"
1784  */
1785  static const QRegularExpression pro7Crew { "\n\n(Regie:.*)$",
1786  QRegularExpression::DotMatchesEverythingOption };
1787  match = pro7Crew.match(event.m_description);
1788  if (match.hasMatch())
1789  {
1790  QStringList crew = match.captured(1).split("\n");
1791  for (const auto& line : qAsConst(crew))
1792  {
1793  static const QRegularExpression pro7CrewOne { R"(^(.*?):\s+(.*)$)" };
1794  auto match2 = pro7CrewOne.match(line);
1795  if (match2.hasMatch())
1796  {
1798  if (deCrewTitle.contains(match2.captured(1)))
1799  role = deCrewTitle[match2.captured(1)];
1800  QStringList names = match2.captured(2).simplified().split(R"(\s*,\s*)");
1801  for (const auto & name : qAsConst(names))
1802  {
1803  /* Possible TODO: if EIT inlcude the priority
1804  * and/or character names for the actors, include
1805  * them in AddPerson call. */
1806  event.AddPerson (role, name);
1807  }
1808  }
1809  }
1810  event.m_description.remove(match.capturedStart(0),
1811  match.capturedLength(0));
1812  }
1813 
1814  /* FIXME unless its Jamie Oliver, then there is neither Crew nor Cast only
1815  * \n\nKoch: Jamie Oliver
1816  */
1817 }
1818 
1823 {
1824  static const QRegularExpression deDisneyChannelSubtitle { R"(,([^,]+?)\s{0,1}(\d{4})$)" };
1825  auto match = deDisneyChannelSubtitle.match(event.m_subtitle);
1826  if (match.hasMatch())
1827  {
1828  if (event.m_airdate == 0)
1829  {
1830  event.m_airdate = match.captured(3).toUInt();
1831  }
1832  event.m_subtitle.remove(match.capturedStart(0),
1833  match.capturedLength(0));
1834  }
1835  static const QRegularExpression tmp { R"(\s[^\s]+?-(Serie))" };
1836  match = tmp.match(event.m_subtitle);
1837  if (match.hasMatch())
1838  {
1839  event.m_categoryType = ProgramInfo::kCategorySeries;
1840  event.m_category=match.captured(0).trimmed();
1841  event.m_subtitle.remove(match.capturedStart(0),
1842  match.capturedLength(0));
1843  }
1844 }
1845 
1850 {
1851  static const QRegularExpression atvSubtitle { R"(,{0,1}\sFolge\s(\d{1,3})$)" };
1852  event.m_subtitle.replace(atvSubtitle, "");
1853 }
1854 
1855 
1860 {
1861  static const QRegularExpression fiRerun { R"(\s?Uusinta[a-zA-Z\s]*\.?)" };
1862  auto match = fiRerun.match(event.m_description);
1863  if (match.hasMatch())
1864  {
1865  event.m_previouslyshown = true;
1866  event.m_description.remove(match.capturedStart(), match.capturedLength());
1867  }
1868 
1869  static const QRegularExpression fiRerun2 { R"(\([Uu]\))" };
1870  match = fiRerun2.match(event.m_description);
1871  if (match.hasMatch())
1872  {
1873  event.m_previouslyshown = true;
1874  event.m_description.remove(match.capturedStart(), match.capturedLength());
1875  }
1876 
1877  // Check for (Stereo) in the decription and set the <audio> tags
1878  match = kStereo.match(event.m_description);
1879  if (match.hasMatch())
1880  {
1881  event.m_audioProps |= AUD_STEREO;
1882  event.m_description.remove(match.capturedStart(), match.capturedLength());
1883  }
1884 
1885  // Remove age limit in parenthesis at end of title
1886  static const QRegularExpression fiAgeLimit { R"(\((\d{1,2}|[ST])\)$)" };
1887  match = fiAgeLimit.match(event.m_title);
1888  if (match.hasMatch())
1889  {
1890  EventRating prograting;
1891  prograting.m_system="FI"; prograting.m_rating = match.captured(1);
1892  event.m_ratings.push_back(prograting);
1893  event.m_title.remove(match.capturedStart(), match.capturedLength());
1894  }
1895 
1896  // Remove Film or Elokuva at start of title
1897  static const QRegularExpression fiFilm { "^(Film|Elokuva): " };
1898  match = fiFilm.match(event.m_title);
1899  if (match.hasMatch())
1900  {
1901  event.m_category = "Film";
1902  event.m_categoryType = ProgramInfo::kCategoryMovie;
1903  event.m_title.remove(match.capturedStart(), match.capturedLength());
1904  }
1905 }
1906 
1912 {
1913  QString country = "";
1914 
1915  static const QRegularExpression dePremiereLength { R"(\s?[0-9]+\sMin\.)" };
1916  event.m_description = event.m_description.replace(dePremiereLength, "");
1917 
1918  static const QRegularExpression dePremiereAirdate { R"(\s?([^\s^\.]+)\s((?:1|2)[0-9]{3})\.)" };
1919  auto match = dePremiereAirdate.match(event.m_description);
1920  if ( match.hasMatch())
1921  {
1922  country = match.captured(1).trimmed();
1923  bool ok = false;
1924  uint y = match.captured(2).toUInt(&ok);
1925  if (ok)
1926  event.m_airdate = y;
1927  event.m_description.remove(match.capturedStart(0),
1928  match.capturedLength(0));
1929  }
1930 
1931  static const QRegularExpression dePremiereCredits { R"(\sVon\s([^,]+)(?:,|\su\.\sa\.)\smit\s([^\.]*)\.)" };
1932  match = dePremiereCredits.match(event.m_description);
1933  if (match.hasMatch())
1934  {
1935  event.AddPerson(DBPerson::kDirector, match.captured(1));
1936 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
1937  const QStringList actors = match.captured(2).split(
1938  ", ", QString::SkipEmptyParts);
1939 #else
1940  const QStringList actors = match.captured(2).split(
1941  ", ", Qt::SkipEmptyParts);
1942 #endif
1943  /* Possible TODO: if EIT inlcude the priority and/or character
1944  * names for the actors, include them in AddPerson call. */
1945  for (const auto & actor : qAsConst(actors))
1946  event.AddPerson(DBPerson::kActor, actor);
1947  event.m_description.remove(match.capturedStart(0),
1948  match.capturedLength(0));
1949  }
1950 
1951  event.m_description = event.m_description.replace("\u000A$", "");
1952  event.m_description = event.m_description.replace("\u000A", " ");
1953 
1954  // move the original titel from the title to subtitle
1955  static const QRegularExpression dePremiereOTitle { R"(\s*\(([^\)]*)\)$)" };
1956  match = dePremiereOTitle.match(event.m_title);
1957  if (match.hasMatch())
1958  {
1959  event.m_subtitle = QString("%1, %2").arg(match.captured(1), country);
1960  event.m_title.remove(match.capturedStart(0),
1961  match.capturedLength(0));
1962  }
1963 
1964  // Find infos about season and episode number
1965  static const QRegularExpression deSkyDescriptionSeasonEpisode { R"(^(\d{1,2}).\sStaffel,\sFolge\s(\d{1,2}):\s)" };
1966  match = deSkyDescriptionSeasonEpisode.match(event.m_description);
1967  if (match.hasMatch())
1968  {
1969  event.m_season = match.captured(1).trimmed().toUInt();
1970  event.m_episode = match.captured(2).trimmed().toUInt();
1971  event.m_description.remove(match.capturedStart(0),
1972  match.capturedLength(0));
1973  }
1974 }
1975 
1976 /*
1977  * Mapping table from English category names to Dutch names and types
1978  */
1979 struct NLMapResult {
1980  QString name;
1982 };
1983 static const QMap<QString, NLMapResult> categoryTrans = {
1984  { "Documentary", { "Documentaire", ProgramInfo::kCategoryNone } },
1985  { "News", { "Nieuws/actualiteiten", ProgramInfo::kCategoryNone } },
1986  { "Kids", { "Jeugd", ProgramInfo::kCategoryNone } },
1987  { "Show/game Show", { "Amusement", ProgramInfo::kCategoryTVShow } },
1988  { "Music/Ballet/Dance", { "Muziek", ProgramInfo::kCategoryNone } },
1989  { "News magazine", { "Informatief", ProgramInfo::kCategoryNone } },
1990  { "Movie", { "Film", ProgramInfo::kCategoryMovie } },
1991  { "Nature/animals/Environment", { "Natuur", ProgramInfo::kCategoryNone } },
1992  { "Movie - Adult", { "Erotiek", ProgramInfo::kCategoryNone } },
1993  { "Movie - Soap/melodrama/folkloric",
1994  { "Serie/soap", ProgramInfo::kCategorySeries } },
1995  { "Arts/Culture", { "Kunst/Cultuur", ProgramInfo::kCategoryNone } },
1996  { "Sports", { "Sport", ProgramInfo::kCategorySports } },
1997  { "Cartoons/Puppets", { "Animatie", ProgramInfo::kCategoryNone } },
1998  { "Movie - Comedy", { "Comedy", ProgramInfo::kCategorySeries } },
1999  { "Movie - Detective/Thriller", { "Misdaad", ProgramInfo::kCategoryNone } },
2000  { "Social/Spiritual Sciences", { "Religieus", ProgramInfo::kCategoryNone } },
2001 };
2002 
2007 {
2008  QString fullinfo = event.m_subtitle + event.m_description;
2009  event.m_subtitle = "";
2010 
2011  // Convert categories to Dutch categories Myth knows.
2012  // nog invoegen: comedy, sport, misdaad
2013 
2014  if (categoryTrans.contains(event.m_category))
2015  {
2016  auto [name, type] = categoryTrans[event.m_category];
2017  event.m_category = name;
2018  event.m_categoryType = type;
2019  }
2020 
2021  // Film - categories are usually not Films
2022  if (event.m_category.startsWith("Film -"))
2023  event.m_categoryType = ProgramInfo::kCategorySeries;
2024 
2025  // Get stereo info
2026  auto match = kStereo.match(fullinfo);
2027  if (match.hasMatch())
2028  {
2029  event.m_audioProps |= AUD_STEREO;
2030  fullinfo.remove(match.capturedStart(), match.capturedLength());
2031  }
2032 
2033  //Get widescreen info
2034  static const QRegularExpression nlWide { "breedbeeld" };
2035  match = nlWide.match(fullinfo);
2036  if (match.hasMatch())
2037  {
2038  event.m_videoProps |= VID_WIDESCREEN;
2039  fullinfo = fullinfo.replace("breedbeeld", ".");
2040  }
2041 
2042  // Get repeat info
2043  static const QRegularExpression nlRepeat { "herh." };
2044  match = nlRepeat.match(fullinfo);
2045  if (match.hasMatch())
2046  fullinfo = fullinfo.replace("herh.", ".");
2047 
2048  // Get teletext subtitle info
2049  static const QRegularExpression nlTxt { "txt" };
2050  match = nlTxt.match(fullinfo);
2051  if (match.hasMatch())
2052  {
2053  event.m_subtitleType |= SUB_NORMAL;
2054  fullinfo = fullinfo.replace("txt", ".");
2055  }
2056 
2057  // Get HDTV information
2058  static const QRegularExpression nlHD { R"(\sHD$)" };
2059  match = nlHD.match(event.m_title);
2060  if (match.hasMatch())
2061  {
2062  event.m_videoProps |= VID_HDTV;
2063  event.m_title.remove(match.capturedStart(), match.capturedLength());
2064  }
2065 
2066  // Try to make subtitle from Afl.:
2067  static const QRegularExpression nlSub { R"(\sAfl\.:\s([^\.]+)\.)" };
2068  match = nlSub.match(fullinfo);
2069  if (match.hasMatch())
2070  {
2071  QString tmpSubString = match.captured(0);
2072  tmpSubString = tmpSubString.right(match.capturedLength() - 7);
2073  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2074  fullinfo.remove(match.capturedStart(), match.capturedLength());
2075  }
2076 
2077  // Try to make subtitle from " "
2078  static const QRegularExpression nlSub2 { R"(\s\"([^\"]+)\")" };
2079  match = nlSub2.match(fullinfo);
2080  if (match.hasMatch())
2081  {
2082  QString tmpSubString = match.captured(0);
2083  tmpSubString = tmpSubString.right(match.capturedLength() - 2);
2084  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2085  fullinfo.remove(match.capturedStart(), match.capturedLength());
2086  }
2087 
2088 
2089  // This is trying to catch the case where the subtitle is in the main title
2090  // but avoid cases where it isn't a subtitle e.g cd:uk
2091  int position = 0;
2092  if (((position = event.m_title.indexOf(":")) != -1) &&
2093  (event.m_title[position + 1].toUpper() == event.m_title[position + 1]) &&
2094  (event.m_subtitle.isEmpty()))
2095  {
2096  event.m_subtitle = event.m_title.mid(position + 1);
2097  event.m_title = event.m_title.left(position);
2098  }
2099 
2100 
2101  // Get the actors
2102  static const QRegularExpression nlActors { R"(\sMet:\s.+e\.a\.)" };
2103  static const QRegularExpression nlPersSeparator { R"((, |\sen\s))" };
2104  match = nlActors.match(fullinfo);
2105  if (match.hasMatch())
2106  {
2107  QString tmpActorsString = match.captured(0);
2108  tmpActorsString = tmpActorsString.right(tmpActorsString.length() - 6);
2109  tmpActorsString = tmpActorsString.left(tmpActorsString.length() - 5);
2110 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2111  const QStringList actors =
2112  tmpActorsString.split(nlPersSeparator, QString::SkipEmptyParts);
2113 #else
2114  const QStringList actors =
2115  tmpActorsString.split(nlPersSeparator, Qt::SkipEmptyParts);
2116 #endif
2117  /* Possible TODO: if EIT inlcude the priority and/or character
2118  * names for the actors, include them in AddPerson call. */
2119  for (const auto & actor : qAsConst(actors))
2120  event.AddPerson(DBPerson::kActor, actor);
2121  fullinfo.remove(match.capturedStart(), match.capturedLength());
2122  }
2123 
2124  // Try to find presenter
2125  static const QRegularExpression nlPres { R"(\sPresentatie:\s([^\.]+)\.)" };
2126  match = nlPres.match(fullinfo);
2127  if (match.hasMatch())
2128  {
2129  QString tmpPresString = match.captured(0);
2130  tmpPresString = tmpPresString.right(tmpPresString.length() - 14);
2131  tmpPresString = tmpPresString.left(tmpPresString.length() -1);
2132 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2133  const QStringList presenters =
2134  tmpPresString.split(nlPersSeparator, QString::SkipEmptyParts);
2135 #else
2136  const QStringList presenters =
2137  tmpPresString.split(nlPersSeparator, Qt::SkipEmptyParts);
2138 #endif
2139  for (const auto & presenter : qAsConst(presenters))
2140  event.AddPerson(DBPerson::kPresenter, presenter);
2141  fullinfo.remove(match.capturedStart(), match.capturedLength());
2142  }
2143 
2144  // Try to find year
2145  static const QRegularExpression nlYear1 { R"(\suit\s([1-2][0-9]{3}))" };
2146  static const QRegularExpression nlYear2 { R"((\s\([A-Z]{0,3}/?)([1-2][0-9]{3})\))",
2147  QRegularExpression::CaseInsensitiveOption };
2148  match = nlYear1.match(fullinfo);
2149  if (match.hasMatch())
2150  {
2151  bool ok = false;
2152  uint y = match.capturedView(1).toUInt(&ok);
2153  if (ok)
2154  event.m_originalairdate = QDate(y, 1, 1);
2155  }
2156 
2157  match = nlYear2.match(fullinfo);
2158  if (match.hasMatch())
2159  {
2160  bool ok = false;
2161  uint y = match.capturedView(2).toUInt(&ok);
2162  if (ok)
2163  event.m_originalairdate = QDate(y, 1, 1);
2164  }
2165 
2166  // Try to find director
2167  static const QRegularExpression nlDirector { R"(\svan\s(([A-Z][a-z]+\s)|([A-Z]\.\s)))" };
2168  match = nlDirector.match(fullinfo);
2169  if (match.hasMatch())
2170  event.AddPerson(DBPerson::kDirector, match.captured(1));
2171 
2172  // Strip leftovers
2173  static const QRegularExpression nlRub { R"(\s?\(\W+\)\s?)" };
2174  fullinfo.remove(nlRub);
2175 
2176  // Strip category info from description
2177  static const QRegularExpression nlCat { "^(Amusement|Muziek|Informatief|Nieuws/actualiteiten|Jeugd|Animatie|Sport|Serie/soap|Kunst/Cultuur|Documentaire|Film|Natuur|Erotiek|Comedy|Misdaad|Religieus)\\.\\s" };
2178  fullinfo.remove(nlCat);
2179 
2180  // Remove omroep from title
2181  static const QRegularExpression nlOmroep { R"(\s\(([A-Z]+/?)+\)$)" };
2182  event.m_title.remove(nlOmroep);
2183 
2184  // Put information back in description
2185 
2186  event.m_description = fullinfo;
2187 }
2188 
2190 {
2191  // remove category movie from short events
2193  event.m_starttime.secsTo(event.m_endtime) < kMinMovieDuration)
2194  {
2195  /* default taken from ContentDescriptor::GetMythCategory */
2196  event.m_categoryType = ProgramInfo::kCategoryTVShow;
2197  }
2198 }
2199 
2204 {
2205  // Check for "title (R)" in the title
2206  static const QRegularExpression noRerun { "\\(R\\)" };
2207  auto match = noRerun.match(event.m_title);
2208  if (match.hasMatch())
2209  {
2210  event.m_previouslyshown = true;
2211  event.m_title.remove(match.capturedStart(), match.capturedLength());
2212  }
2213  // Check for "subtitle (HD)" in the subtitle
2214  static const QRegularExpression noHD { R"([\(\[]HD[\)\]])" };
2215  match = noHD.match(event.m_subtitle);
2216  if (match.hasMatch())
2217  {
2218  event.m_videoProps |= VID_HDTV;
2219  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
2220  }
2221  // Check for "description (HD)" in the description
2222  match = noHD.match(event.m_description);
2223  if (match.hasMatch())
2224  {
2225  event.m_videoProps |= VID_HDTV;
2226  event.m_description.remove(match.capturedStart(), match.capturedLength());
2227  }
2228 }
2229 
2234 {
2235  // Check for "title (R)" in the title
2236  static const QRegularExpression noRerun { "\\(R\\)" };
2237  auto match = noRerun.match(event.m_title);
2238  if (match.hasMatch())
2239  {
2240  event.m_previouslyshown = true;
2241  event.m_title.remove(match.capturedStart(), match.capturedLength());
2242  }
2243  // Check for "(R)" in the description
2244  match = noRerun.match(event.m_description);
2245  if (match.hasMatch())
2246  {
2247  event.m_previouslyshown = true;
2248  }
2249 
2250  // Move colon separated category from program-titles into description
2251  // Have seen "NRK2s historiekveld: Film: bla-bla"
2252  static const QRegularExpression noNRKCategories
2253  { "^(Superstrek[ea]r|Supersomm[ea]r|Superjul|Barne-tv|Fantorangen|Kuraffen|Supermorg[eo]n|Julemorg[eo]n|Sommermorg[eo]n|"
2254  "Kuraffen-TV|Sport i dag|NRKs sportsl.rdag|NRKs sportss.ndag|Dagens dokumentar|"
2255  "NRK2s historiekveld|Detektimen|Nattkino|Filmklassiker|Film|Kortfilm|P.skemorg[eo]n|"
2256  "Radioteatret|Opera|P2-Akademiet|Nyhetsmorg[eo]n i P2 og Alltid Nyheter:): (.+)" };
2257  match = noNRKCategories.match(event.m_title);
2258  if (match.hasMatch() && (match.capturedLength(2) > 1))
2259  {
2260  event.m_title = match.captured(2);
2261  event.m_description = "(" + match.captured(1) + ") " + event.m_description;
2262  }
2263 
2264  // Remove season premiere markings
2265  static const QRegularExpression noPremiere { "\\s+-\\s+(Sesongpremiere|Premiere|premiere)!?$" };
2266  match = noPremiere.match(event.m_title);
2267  if (match.hasMatch() && (match.capturedStart() >= 3))
2268  event.m_title.remove(match.capturedStart(), match.capturedLength());
2269 
2270  // Try to find colon-delimited subtitle in title, only tested for NRK channels
2271  if (!event.m_title.startsWith("CSI:") &&
2272  !event.m_title.startsWith("CD:") &&
2273  !event.m_title.startsWith("Distriktsnyheter: fra"))
2274  {
2275  static const QRegularExpression noColonSubtitle { "^([^:]+): (.+)" };
2276  match = noColonSubtitle.match(event.m_title);
2277  if (match.hasMatch())
2278  {
2279  if (event.m_subtitle.length() <= 0)
2280  {
2281  event.m_title = match.captured(1);
2282  event.m_subtitle = match.captured(2);
2283  }
2284  else if (event.m_subtitle == match.captured(2))
2285  {
2286  event.m_title = match.captured(1);
2287  }
2288  }
2289  }
2290 }
2291 
2296 {
2297  // Source: YouSee Rules of Operation v1.16
2298  // url: http://yousee.dk/~/media/pdf/CPE/Rules_Operation.ashx
2299  int episode = -1;
2300  int season = -1;
2301 
2302  // Title search
2303  // episode and part/part total
2304  static const QRegularExpression dkEpisode { R"(\(([0-9]+)\))" };
2305  auto match = dkEpisode.match(event.m_title);
2306  if (match.hasMatch())
2307  {
2308  episode = match.capturedView(1).toInt();
2309  event.m_partnumber = match.capturedView(1).toInt();
2310  event.m_title.remove(match.capturedStart(), match.capturedLength());
2311  }
2312 
2313  static const QRegularExpression dkPart { R"(\(([0-9]+):([0-9]+)\))" };
2314  match = dkPart.match(event.m_title);
2315  if (match.hasMatch())
2316  {
2317  episode = match.capturedView(1).toInt();
2318  event.m_partnumber = match.capturedView(1).toInt();
2319  event.m_parttotal = match.capturedView(2).toInt();
2320  event.m_title.remove(match.capturedStart(), match.capturedLength());
2321  }
2322 
2323  // subtitle delimiters
2324  static const QRegularExpression dkSubtitle1 { "^([^:]+): (.+)" };
2325  match = dkSubtitle1.match(event.m_title);
2326  if (match.hasMatch())
2327  {
2328  event.m_title = match.captured(1);
2329  event.m_subtitle = match.captured(2);
2330  }
2331  else
2332  {
2333  static const QRegularExpression dkSubtitle2 { "^([^:]+) - (.+)" };
2334  match = dkSubtitle2.match(event.m_title);
2335  if (match.hasMatch())
2336  {
2337  event.m_title = match.captured(1);
2338  event.m_subtitle = match.captured(2);
2339  }
2340  }
2341 
2342  // Description search
2343  // Season (Sæson [:digit:]+.) => episode = season episode number
2344  // or year (- år [:digit:]+(\\)|:) ) => episode = total episode number
2345  static const QRegularExpression dkSeason1 { "Sæson ([0-9]+)\\." };
2346  match = dkSeason1.match(event.m_description);
2347  if (match.hasMatch())
2348  {
2349  season = match.capturedView(1).toInt();
2350  }
2351  else
2352  {
2353  static const QRegularExpression dkSeason2 { "- år ([0-9]+) :" };
2354  match = dkSeason2.match(event.m_description);
2355  if (match.hasMatch())
2356  {
2357  season = match.capturedView(1).toInt();
2358  }
2359  }
2360 
2361  if (episode > 0)
2362  event.m_episode = episode;
2363 
2364  if (season > 0)
2365  event.m_season = season;
2366 
2367  //Feature:
2368  static const QRegularExpression dkFeatures { "Features:(.+)" };
2369  match = dkFeatures.match(event.m_description);
2370  if (match.hasMatch())
2371  {
2372  QString features = match.captured(1);
2373  event.m_description.remove(match.capturedStart(),
2374  match.capturedLength());
2375  // 16:9
2376  static const QRegularExpression dkWidescreen { " 16:9" };
2377  if (features.indexOf(dkWidescreen) != -1)
2378  event.m_videoProps |= VID_WIDESCREEN;
2379  // HDTV
2380  static const QRegularExpression dkHD { " HD" };
2381  if (features.indexOf(dkHD) != -1)
2382  event.m_videoProps |= VID_HDTV;
2383  // Dolby Digital surround
2384  static const QRegularExpression dkDolby { " 5:1" };
2385  if (features.indexOf(dkDolby) != -1)
2386  event.m_audioProps |= AUD_DOLBY;
2387  // surround
2388  static const QRegularExpression dkSurround { R"( \(\(S\)\))" };
2389  if (features.indexOf(dkSurround) != -1)
2390  event.m_audioProps |= AUD_SURROUND;
2391  // stereo
2392  static const QRegularExpression dkStereo { " S" };
2393  if (features.indexOf(dkStereo) != -1)
2394  event.m_audioProps |= AUD_STEREO;
2395  // (G)
2396  static const QRegularExpression dkReplay { " \\(G\\)" };
2397  if (features.indexOf(dkReplay) != -1)
2398  event.m_previouslyshown = true;
2399  // TTV
2400  static const QRegularExpression dkTxt { " TTV" };
2401  if (features.indexOf(dkTxt) != -1)
2402  event.m_subtitleType |= SUB_NORMAL;
2403  }
2404 
2405  // Series and program id
2406  // programid is currently not transmitted
2407  // YouSee doesn't use a default authority but uses the first byte after
2408  // the / to indicate if the seriesid is global unique or unique on the
2409  // service id
2410  if (event.m_seriesId.length() >= 1 && event.m_seriesId[0] == '/')
2411  {
2412  QString newid;
2413  if (event.m_seriesId[1] == '1')
2414  {
2415  newid = QString("%1%2").arg(event.m_chanid).
2416  arg(event.m_seriesId.mid(2,8));
2417  }
2418  else
2419  {
2420  newid = event.m_seriesId.mid(2,8);
2421  }
2422  event.m_seriesId = newid;
2423  }
2424 
2425  if (event.m_programId.length() >= 1 && event.m_programId[0] == '/')
2426  event.m_programId[0]='_';
2427 
2428  // Add season and episode number to subtitle
2429  if (episode > 0)
2430  {
2431  event.m_subtitle = QString("%1 (%2").arg(event.m_subtitle).arg(episode);
2432  if (event.m_parttotal >0)
2433  event.m_subtitle = QString("%1:%2").arg(event.m_subtitle).
2434  arg(event.m_parttotal);
2435  if (season > 0)
2436  {
2437  event.m_season = season;
2438  event.m_episode = episode;
2439  event.m_syndicatedepisodenumber =
2440  QString("S%1E%2").arg(season).arg(episode);
2441  event.m_subtitle = QString("%1 Sæson %2").arg(event.m_subtitle).
2442  arg(season);
2443  }
2444  event.m_subtitle = QString("%1)").arg(event.m_subtitle);
2445  }
2446 
2447  // Find actors and director in description
2448  static const QRegularExpression dkDirector { "(?:Instr.: |Instrukt.r: )(.+)$" };
2449  static const QRegularExpression dkPersonsSeparator { "(, )|(og )" };
2450  QStringList directors {};
2451  match = dkDirector.match(event.m_description);
2452  if (match.hasMatch())
2453  {
2454  QString tmpDirectorsString = match.captured(1);
2455 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2456  directors = tmpDirectorsString.split(dkPersonsSeparator, QString::SkipEmptyParts);
2457 #else
2458  directors = tmpDirectorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2459 #endif
2460  for (const auto & director : qAsConst(directors))
2461  {
2462  tmpDirectorsString = director.split(":").last().trimmed().
2463  remove(kDotAtEnd);
2464  if (tmpDirectorsString != "")
2465  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2466  }
2467  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2468  }
2469 
2470  static const QRegularExpression dkActors { "(?:Medvirkende: |Medv\\.: )(.+)" };
2471  match = dkActors.match(event.m_description);
2472  if (match.hasMatch())
2473  {
2474  QString tmpActorsString = match.captured(1);
2475 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2476  const QStringList actors =
2477  tmpActorsString.split(dkPersonsSeparator, QString::SkipEmptyParts);
2478 #else
2479  const QStringList actors =
2480  tmpActorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2481 #endif
2482  for (const auto & actor : qAsConst(actors))
2483  {
2484  tmpActorsString = actor.split(":").last().trimmed().remove(kDotAtEnd);
2485  if (!tmpActorsString.isEmpty() && !directors.contains(tmpActorsString))
2486  event.AddPerson(DBPerson::kActor, tmpActorsString);
2487  }
2488  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2489  }
2490 
2491  //find year
2492  static const QRegularExpression dkYear { " fra ([0-9]{4})[ \\.]" };
2493  match = dkYear.match(event.m_description);
2494  if (match.hasMatch())
2495  {
2496  bool ok = false;
2497  uint y = match.capturedView(1).toUInt(&ok);
2498  if (ok)
2499  event.m_originalairdate = QDate(y, 1, 1);
2500  }
2501 }
2502 
2507 {
2508  LOG(VB_EIT, LOG_INFO, QString("Applying html strip to %1").arg(event.m_title));
2509  static const QRegularExpression html { "</?EM>", QRegularExpression::CaseInsensitiveOption };
2510  event.m_title.remove(html);
2511 }
2512 
2513 // Moves the subtitle field into the description since it's just used
2514 // as more description field. All the sort-out will happen in the description
2515 // field. Also, sometimes the description is just a repeat of the title. If so,
2516 // we remove it.
2518 {
2519  if (event.m_title == event.m_description)
2520  {
2521  event.m_description = QString("");
2522  }
2523  if (!event.m_subtitle.isEmpty())
2524  {
2525  if (event.m_subtitle.trimmed().right(1) != ".'" )
2526  event.m_subtitle = event.m_subtitle.trimmed() + ".";
2527  event.m_description = event.m_subtitle.trimmed() + QString(" ") + event.m_description;
2528  event.m_subtitle = QString("");
2529  }
2530 }
2531 
2533 {
2534  // Program ratings
2535  static const QRegularExpression grRating { R"(\[(K|Κ|8|12|16|18)\]\s*)",
2536  QRegularExpression::CaseInsensitiveOption };
2537  auto match = grRating.match(event.m_title);
2538  if (match.hasMatch())
2539  {
2540  EventRating prograting;
2541  prograting.m_system="GR"; prograting.m_rating = match.captured(1);
2542  event.m_ratings.push_back(prograting);
2543  event.m_title.remove(match.capturedStart(), match.capturedLength());
2544  event.m_title = event.m_title.trimmed();
2545  }
2546 
2547  //Live show
2548  int position = event.m_title.indexOf("(Ζ)");
2549  if (position != -1)
2550  {
2551  event.m_title = event.m_title.replace("(Ζ)", "");
2552  event.m_description.prepend("Ζωντανή Μετάδοση. ");
2553  }
2554 
2555  // Greek not previously Shown
2556  static const QRegularExpression grNotPreviouslyShown {
2557  R"(\W?(?:-\s*)*(?:\b[Α1]['΄η]?\s*(?:τηλεοπτικ[ηή]\s*)?(?:μετ[αά]δοση|προβολ[ηή]))\W?)",
2558  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2559  match = grNotPreviouslyShown.match(event.m_title);
2560  if (match.hasMatch())
2561  {
2562  event.m_previouslyshown = false;
2563  event.m_title.remove(match.capturedStart(), match.capturedLength());
2564  }
2565 
2566  // Greek Replay (Ε)
2567  // it might look redundant compared to previous check but at least it helps
2568  // remove the (Ε) From the title.
2569  static const QRegularExpression grReplay { R"(\([ΕE]\))" };
2570  match = grReplay.match(event.m_title);
2571  if (match.hasMatch())
2572  {
2573  event.m_previouslyshown = true;
2574  event.m_title.remove(match.capturedStart(), match.capturedLength());
2575  }
2576 
2577  // Check for (HD) in the decription
2578  position = event.m_description.indexOf("(HD)");
2579  if (position != -1)
2580  {
2581  event.m_description = event.m_description.replace("(HD)", "");
2582  event.m_videoProps |= VID_HDTV;
2583  }
2584 
2585  // Check for (Full HD) in the decription
2586  position = event.m_description.indexOf("(Full HD)");
2587  if (position != -1)
2588  {
2589  event.m_description = event.m_description.replace("(Full HD)", "");
2590  event.m_videoProps |= VID_HDTV;
2591  }
2592 
2593  static const QRegularExpression grFixnofullstopActors { R"(\w\s(Παίζουν:|Πρωταγων))" };
2594  match = grFixnofullstopActors.match(event.m_description);
2595  if (match.hasMatch())
2596  event.m_description.insert(match.capturedStart() + 1, ".");
2597 
2598  // If they forgot the "." at the end of the sentence before the actors/directors begin, let's insert it.
2599  static const QRegularExpression grFixnofullstopDirectors { R"(\w\s(Σκηνοθ[εέ]))" };
2600  match = grFixnofullstopDirectors.match(event.m_description);
2601  if (match.hasMatch())
2602  event.m_description.insert(match.capturedStart() + 1, ".");
2603 
2604  // Find actors and director in description
2605  // I am looking for actors first and then for directors/presenters because
2606  // sometimes punctuation is missing and the "Παίζουν:" label is mistaken
2607  // for a director's/presenter's surname (directors/presenters are shown
2608  // before actors in the description field.). So removing the text after
2609  // adding the actors AND THEN looking for dir/pres helps to clear things up.
2610  static const QRegularExpression grActors { R"((?:[Ππ]α[ιί]ζουν:|[ΜMμ]ε τους:|Πρωταγωνιστο[υύ]ν:|Πρωταγωνιστε[ιί]:?)(?:\s+στο ρόλο(?: του| της)?\s(?:\w+\s[οη]\s))?([-\w\s']+(?:,[-\w\s']+)*)(?:κ\.[αά])?\W?)" };
2611  // cap(1) actors, just names
2612  static const QRegularExpression grPeopleSeparator { R"(([,-]\s+))" };
2613  match = grActors.match(event.m_description);
2614  if (match.hasMatch())
2615  {
2616  QString tmpActorsString = match.captured(1);
2617 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2618  const QStringList actors =
2619  tmpActorsString.split(grPeopleSeparator, QString::SkipEmptyParts);
2620 #else
2621  const QStringList actors =
2622  tmpActorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2623 #endif
2624  for (const auto & actor : qAsConst(actors))
2625  {
2626  tmpActorsString = actor.split(":").last().trimmed().remove(kDotAtEnd);
2627  if (tmpActorsString != "")
2628  event.AddPerson(DBPerson::kActor, tmpActorsString);
2629  }
2630  event.m_description.remove(match.capturedStart(), match.capturedLength());
2631  }
2632 
2633  // Director
2634  static const QRegularExpression grDirector { R"((?:Σκηνοθεσία: |Σκηνοθέτης: |Σκηνοθέτης - Επιμέλεια: )(\w+\s\w+\s?)(?:\W?))" };
2635  match = grDirector.match(event.m_description);
2636  if (match.hasMatch())
2637  {
2638  QString tmpDirectorsString = match.captured(1);
2639 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2640  const QStringList directors =
2641  tmpDirectorsString.split(grPeopleSeparator, QString::SkipEmptyParts);
2642 #else
2643  const QStringList directors =
2644  tmpDirectorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2645 #endif
2646  for (const auto & director : qAsConst(directors))
2647  {
2648  tmpDirectorsString = director.split(":").last().trimmed().
2649  remove(kDotAtEnd);
2650  if (tmpDirectorsString != "")
2651  {
2652  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2653  }
2654  }
2655  event.m_description.remove(match.capturedStart(), match.capturedLength());
2656  }
2657 
2658  //Try to find presenter
2659  static const QRegularExpression grPres { R"((?:Παρουσ[ιί]αση:(?:\b)*|Παρουσι[αά]ζ(?:ουν|ει)(?::|\sο|\sη)|Παρουσι[αά]στ(?:[ηή]ς|ρια|ριες|[εέ]ς)(?::|\sο|\sη)|Με τ(?:ον |ην )(?:[\s|:|ο|η])*(?:\b)*)([-\w\s]+(?:,[-\w\s]+)*)\W?)",
2660  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2661  match = grPres.match(event.m_description);
2662  if (match.hasMatch())
2663  {
2664  QString tmpPresentersString = match.captured(1);
2665 #if QT_VERSION < QT_VERSION_CHECK(5,14,0)
2666  const QStringList presenters =
2667  tmpPresentersString.split(grPeopleSeparator, QString::SkipEmptyParts);
2668 #else
2669  const QStringList presenters =
2670  tmpPresentersString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2671 #endif
2672  for (const auto & presenter : qAsConst(presenters))
2673  {
2674  tmpPresentersString = presenter.split(":").last().trimmed().
2675  remove(kDotAtEnd);
2676  if (tmpPresentersString != "")
2677  {
2678  event.AddPerson(DBPerson::kPresenter, tmpPresentersString);
2679  }
2680  }
2681  event.m_description.remove(match.capturedStart(), match.capturedLength());
2682  }
2683 
2684  //find year e.g Παραγωγής 1966 ή ΝΤΟΚΙΜΑΝΤΕΡ - 1998 Κατάλληλο για όλους
2685  // Used in Private channels (not 'secret', just not owned by Government!)
2686  static const QRegularExpression grYear { R"(\W?(?:\s?παραγωγ[ηή]ς|\s?-|,)\s*([1-2][0-9]{3})(?:-\d{1,4})?)",
2687  QRegularExpression::CaseInsensitiveOption };
2688  match = grYear.match(event.m_description);
2689  if (match.hasMatch())
2690  {
2691  bool ok = false;
2692  uint y = match.capturedView(1).toUInt(&ok);
2693  if (ok)
2694  {
2695  event.m_originalairdate = QDate(y, 1, 1);
2696  event.m_description.remove(match.capturedStart(), match.capturedLength());
2697  }
2698  }
2699  // Remove " ."
2700  event.m_description = event.m_description.replace(" .",".").trimmed();
2701 
2702  //find country of origin and remove it from description.
2703  static const QRegularExpression grCountry {
2704  R"((?:\W|\b)(?:(ελλην|τουρκ|αμερικ[αά]ν|γαλλ|αγγλ|βρεττ?αν|γερμαν|ρωσσ?|ιταλ|ελβετ|σουηδ|ισπαν|πορτογαλ|μεξικ[αά]ν|κιν[εέ]ζικ|ιαπων|καναδ|βραζιλι[αά]ν)(ικ[ηή][ςσ])))",
2705  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2706  match = grCountry.match(event.m_description);
2707  if (match.hasMatch())
2708  event.m_description.remove(match.capturedStart(), match.capturedLength());
2709 
2710  // Work out the season and episode numbers (if any)
2711  // Matching pattern "Επεισ[όο]διο:?|Επ 3 από 14|3/14" etc
2712  bool series = false;
2713  static const QRegularExpression grSeason {
2714  R"((?:\W-?)*(?:\(-\s*)?\b(([Α-Ω|A|B|E|Z|H|I|K|M|N]{1,2})(?:'|΄)?|(\d{1,2})(?:ος|ου|oς|os)?)(?:\s*[ΚκKk][υύ]κλο(?:[σς]|υ))\s?)",
2715  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2716  // cap(2) is the season for ΑΒΓΔ
2717  // cap(3) is the season for 1234
2718  match = grSeason.match(event.m_title);
2719  if (match.hasMatch())
2720  {
2721  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2722  {
2723  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2724  //must convert them to numbers.
2725  int tmpinteger = match.capturedView(2).toUInt();
2726  if (tmpinteger < 1)
2727  {
2728  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2729  event.m_season = 6;
2730  else
2731  {
2732  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2733  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2734  if (tmpinteger != -1)
2735  event.m_season = tmpinteger;
2736  else
2737  //sometimes they use english letters instead of greek. Compensating:
2738  {
2739  static const QString LettToNumber2 = "0ABΓΔE6ZHΘIKΛMN";
2740  tmpinteger = LettToNumber2.indexOf(match.capturedView(2));
2741  if (tmpinteger != -1)
2742  event.m_season = tmpinteger;
2743  }
2744  }
2745  }
2746  }
2747  else if (!match.capturedView(3).isEmpty()) //number
2748  {
2749  event.m_season = match.capturedView(3).toUInt();
2750  }
2751  series = true;
2752  event.m_title.remove(match.capturedStart(), match.capturedLength());
2753  }
2754 
2755  // I have to search separately for season in title and description because it wouldn't work when in both.
2756  match = grSeason.match(event.m_description);
2757  if (match.hasMatch())
2758  {
2759  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2760  {
2761  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2762  //must convert them to numbers.
2763  int tmpinteger = match.capturedView(2).toUInt();
2764  if (tmpinteger < 1)
2765  {
2766  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2767  event.m_season = 6;
2768  else
2769  {
2770  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2771  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2772  if (tmpinteger != -1)
2773  event.m_season = tmpinteger;
2774  }
2775  }
2776  }
2777  else if (!match.capturedView(3).isEmpty()) //number
2778  {
2779  event.m_season = match.capturedView(3).toUInt();
2780  }
2781  series = true;
2782  event.m_description.remove(match.capturedStart(), match.capturedLength());
2783  }
2784 
2785 
2786  // If Season is in Roman Numerals (I,II,etc)
2787  static const QRegularExpression grSeasonAsRomanNumerals { ",\\s*([MDCLXVIΙΧ]+)$",
2788  QRegularExpression::CaseInsensitiveOption };
2789  match = grSeasonAsRomanNumerals.match(event.m_title);
2790  auto match2 = grSeasonAsRomanNumerals.match(event.m_description);
2791  if (match.hasMatch())
2792  {
2793  if (!match.capturedView(1).isEmpty()) //number
2794  event.m_season = parseRoman(match.captured(1).toUpper());
2795  series = true;
2796  event.m_title.remove(match.capturedStart(), match.capturedLength());
2797  event.m_title = event.m_title.trimmed();
2798  if (event.m_title.right(1) == ",")
2799  event.m_title.chop(1);
2800  }
2801  else if (match2.hasMatch())
2802  {
2803  if (!match2.capturedView(1).isEmpty()) //number
2804  event.m_season = parseRoman(match2.captured(1).toUpper());
2805  series = true;
2806  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2807  event.m_description = event.m_description.trimmed();
2808  if (event.m_description.right(1) == ",")
2809  event.m_description.chop(1);
2810  }
2811 
2812  static const QRegularExpression grlongEp { R"(\b(?:Επ.|επεισ[οό]διο:?)\s*(\d+)\W?)",
2813  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2814  // cap(1) is the Episode No.
2815  match = grlongEp.match(event.m_title);
2816  match2 = grlongEp.match(event.m_description);
2817  if (match.hasMatch() || match2.hasMatch())
2818  {
2819  if (!match.capturedView(1).isEmpty())
2820  {
2821  event.m_episode = match.capturedView(1).toUInt();
2822  series = true;
2823  event.m_title.remove(match.capturedStart(), match.capturedLength());
2824  }
2825  else if (!match2.capturedView(1).isEmpty())
2826  {
2827  event.m_episode = match2.capturedView(1).toUInt();
2828  series = true;
2829  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2830  }
2831  // Sometimes description omits Season if it's 1. We fix this
2832  if (0 == event.m_season)
2833  event.m_season = 1;
2834  }
2835 
2836  // Sometimes, especially on greek national tv, they include comments in the
2837  // title, e.g "connection to ert1", "ert archives".
2838  // Because they obscure the real title, I'll isolate and remove them.
2839 
2840  static const QRegularExpression grCommentsinTitle { R"(\(([Α-Ωα-ω\s\d-]+)\)(?:\s*$)*)" };
2841  // cap1 = real title
2842  // cap0 = real title in parentheses.
2843  match = grCommentsinTitle.match(event.m_title);
2844  if (match.hasMatch()) // found in title instead
2845  event.m_title.remove(match.capturedStart(), match.capturedLength());
2846 
2847  // Sometimes the real (mostly English) title of a movie or series is
2848  // enclosed in parentheses in the event title, subtitle or description.
2849  // Since the subtitle has been moved to the description field by
2850  // EITFixUp::FixGreekSubtitle, I will search for it only in the description.
2851  // It will replace the translated one to get better chances of metadata
2852  // retrieval. The old title will be moved in the description.
2853  static const QRegularExpression grRealTitleInDescription { R"(^\(([A-Za-z\s\d-]+)\)\s*)" };
2854  // cap1 = real title
2855  // cap0 = real title in parentheses.
2856  match = grRealTitleInDescription.match(event.m_description);
2857  if (match.hasMatch())
2858  {
2859  event.m_description.remove(0, match.capturedLength());
2860  if (match.captured(0) != event.m_title.trimmed())
2861  {
2862  event.m_description = "(" + event.m_title.trimmed() + "). " + event.m_description;
2863  }
2864  event.m_title = match.captured(1);
2865  // Remove the real title from the description
2866  }
2867  else // search in title
2868  {
2869  static const QRegularExpression grRealTitleInTitle { R"(\(([A-Za-z\s\d-]+)\)(?:\s*$)?)" };
2870  // cap1 = real title
2871  // cap0 = real title in parentheses.
2872  match = grRealTitleInTitle.match(event.m_title);
2873  if (match.hasMatch()) // found in title instead
2874  {
2875  event.m_title.remove(match.capturedStart(), match.capturedLength());
2876  QString tmpTranslTitle = event.m_title;
2877  //QString tmpTranslTitle = event.m_title.replace(tmptitle.cap(0),"");
2878  event.m_title = match.captured(1);
2879  event.m_description = "(" + tmpTranslTitle.trimmed() + "). " + event.m_description;
2880  }
2881  }
2882 
2883  // Description field: "^Episode: Lion in the cage. (Description follows)"
2884  static const QRegularExpression grEpisodeAsSubtitle { R"(^Επεισ[οό]διο:\s?([\w\s\-,']+)\.\s?)" };
2885  match = grEpisodeAsSubtitle.match(event.m_description);
2886  if (match.hasMatch())
2887  {
2888  event.m_subtitle = match.captured(1).trimmed();
2889  event.m_description.remove(match.capturedStart(), match.capturedLength());
2890  }
2891  static const QRegularExpression grMovie { R"(\bταιν[ιί]α\b)",
2892  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2893  bool isMovie = (event.m_description.indexOf(grMovie) !=-1) ;
2894  if (isMovie)
2895  event.m_categoryType = ProgramInfo::kCategoryMovie;
2896  else if (series)
2897  event.m_categoryType = ProgramInfo::kCategorySeries;
2898  // clear double commas.
2899  event.m_description.replace(",,", ",");
2900 
2901 // να σβήσω τα κομμάτια που περισσεύουν από την περιγραφή πχ παραγωγής χχχχ
2902 }
2903 
2905 {
2906  struct grCategoryEntry {
2907  QRegularExpression expr;
2908  QString category;
2909  };
2910  static const QRegularExpression grCategFood { "\\W?(?:εκπομπ[ηή]\\W)?(Γαστρονομ[ιί]α[σς]?|μαγειρικ[ηή][σς]?|chef|συνταγ[εέηή]|διατροφ|wine|μ[αά]γειρα[σς]?)\\W?",
2911  QRegularExpression::CaseInsensitiveOption };
2912  static const QRegularExpression grCategDrama { "\\W?(κοινωνικ[ηήό]|δραματικ[ηή]|δρ[αά]μα)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2913  QRegularExpression::CaseInsensitiveOption};
2914  static const QRegularExpression grCategComedy { "\\W?(κωμικ[ηήοό]|χιουμοριστικ[ηήοό]|κωμωδ[ιί]α)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2915  QRegularExpression::CaseInsensitiveOption};
2916  static const QRegularExpression grCategChildren { "\\W?(παιδικ[ηήοό]|κινο[υύ]μ[εέ]ν(ων|α)\\sσχ[εέ]δ[ιί](ων|α))\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2917  QRegularExpression::CaseInsensitiveOption};
2918  static const QRegularExpression grCategMystery { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(μυστηρ[ιί]ου)\\W?",
2919  QRegularExpression::CaseInsensitiveOption};
2920  static const QRegularExpression grCategFantasy { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(φαντασ[ιί]ας)\\W?",
2921  QRegularExpression::CaseInsensitiveOption};
2922  static const QRegularExpression grCategHistory { "\\W?(ιστορικ[ηήοό])\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2923  QRegularExpression::CaseInsensitiveOption};
2924  static const QRegularExpression grCategTeleMag { "\\W?(ενημερωτικ[ηή]|ψυχαγωγικ[ηή]|τηλεπεριοδικ[οό]|μαγκαζ[ιί]νο)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2925  QRegularExpression::CaseInsensitiveOption};
2926  static const QRegularExpression grCategTeleShop { "\\W?(οδηγ[οό][σς]?\\sαγορ[ωώ]ν|τηλεπ[ωώ]λ[ηή]σ|τηλεαγορ|τηλεμ[αά]ρκετ|telemarket)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2927  QRegularExpression::CaseInsensitiveOption};
2928  static const QRegularExpression grCategGameShow { "\\W?(τηλεπαιχν[ιί]δι|quiz)\\W?",
2929  QRegularExpression::CaseInsensitiveOption};
2930  static const QRegularExpression grCategDocumentary { "\\W?(ντοκ[ιυ]μαντ[εέ]ρ)\\W?",
2931  QRegularExpression::CaseInsensitiveOption};
2932  static const QRegularExpression grCategBiography { "\\W?(βιογραφ[ιί]α|βιογραφικ[οό][σς]?)\\W?",
2933  QRegularExpression::CaseInsensitiveOption};
2934  static const QRegularExpression grCategNews { "\\W?(δελτ[ιί]ο\\W?|ειδ[ηή]σε(ι[σς]|ων))\\W?",
2935  QRegularExpression::CaseInsensitiveOption};
2936  static const QRegularExpression grCategSports { "\\W?(champion|αθλητικ[αάοόηή]|πρωτ[αά]θλημα|ποδ[οό]σφαιρο(ου)?|κολ[υύ]μβηση|πατιν[αά]ζ|formula|μπ[αά]σκετ|β[οό]λε[ιϊ])\\W?",
2937  QRegularExpression::CaseInsensitiveOption};
2938  static const QRegularExpression grCategMusic { "\\W?(μουσικ[οόηή]|eurovision|τραγο[υύ]δι)\\W?",
2939  QRegularExpression::CaseInsensitiveOption};
2940  static const QRegularExpression grCategReality { "\\W?(ρι[αά]λιτι|reality)\\W?",
2941  QRegularExpression::CaseInsensitiveOption};
2942  static const QRegularExpression grCategReligion { "\\W?(θρησκε[ιί]α|θρησκευτικ|να[οό][σς]?|θε[ιί]α λειτουργ[ιί]α)\\W?",
2943  QRegularExpression::CaseInsensitiveOption};
2944  static const QRegularExpression grCategCulture { "\\W?(τ[εέ]χν(η|ε[σς])|πολιτισμ)\\W?",
2945  QRegularExpression::CaseInsensitiveOption};
2946  static const QRegularExpression grCategNature { "\\W?(φ[υύ]ση|περιβ[αά]λλο|κατασκευ|επιστ[ηή]μ(?!ονικ[ηή]ς φαντασ[ιί]ας))\\W?",
2947  QRegularExpression::CaseInsensitiveOption};
2948  static const QRegularExpression grCategSciFi { "\\W?(επιστ(.|ημονικ[ηή]ς)\\s?φαντασ[ιί]ας)\\W?",
2949  QRegularExpression::CaseInsensitiveOption};
2950  static const QRegularExpression grCategHealth { "\\W?(υγε[ιί]α|υγειιν|ιατρικ|διατροφ)\\W?",
2951  QRegularExpression::CaseInsensitiveOption};
2952  static const QRegularExpression grCategSpecial { "\\W?(αφι[εέ]ρωμα)\\W?",
2953  QRegularExpression::CaseInsensitiveOption};
2954  static const QList<grCategoryEntry> grCategoryDescData = {
2955  { grCategComedy, "Κωμωδία" },
2956  { grCategTeleMag, "Τηλεπεριοδικό" },
2957  { grCategNature, "Επιστήμη/Φύση" },
2958  { grCategHealth, "Υγεία" },
2959  { grCategReality, "Ριάλιτι" },
2960  { grCategDrama, "Κοινωνικό" },
2961  { grCategChildren, "Παιδικό" },
2962  { grCategSciFi, "Επιστ.Φαντασίας" },
2963  { grCategMystery, "Μυστηρίου" },
2964  { grCategFantasy, "Φαντασίας" },
2965  { grCategHistory, "Ιστορικό" },
2966  { grCategTeleShop, "Τηλεπωλήσεις" },
2967  { grCategFood, "Γαστρονομία" },
2968  { grCategGameShow, "Τηλεπαιχνίδι" },
2969  { grCategBiography, "Βιογραφία" },
2970  { grCategSports, "Αθλητικά" },
2971  { grCategMusic, "Μουσική" },
2972  { grCategDocumentary, "Ντοκιμαντέρ" },
2973  { grCategReligion, "Θρησκεία" },
2974  { grCategCulture, "Τέχνες/Πολιτισμός" },
2975  { grCategSpecial, "Αφιέρωμα" },
2976  };
2977  static const QList<grCategoryEntry> grCategoryTitleData = {
2978  { grCategTeleShop, "Τηλεπωλήσεις" },
2979  { grCategGameShow, "Τηλεπαιχνίδι" },
2980  { grCategMusic, "Μουσική" },
2981  { grCategNews, "Ειδήσεις" },
2982  };
2983 
2984  // Handle special cases
2985  if ((event.m_description.indexOf(grCategFantasy) != -1)
2986  && (event.m_description.indexOf(grCategMystery) != -1))
2987  {
2988  event.m_category = "Φαντασίας/Μυστηρίου";
2989  return;
2990  }
2991 
2992  // Find categories in the description
2993  for (const auto& [expression, category] : grCategoryDescData)
2994  {
2995  if (event.m_description.indexOf(expression) != -1) {
2996  event.m_category = category;
2997  return;
2998  }
2999  }
3000 
3001  // Find categories in the title
3002  for (const auto& [expression, category] : grCategoryTitleData)
3003  {
3004  if (event.m_title.indexOf(expression) != -1) {
3005  event.m_category = category;
3006  return;
3007  }
3008  }
3009 }
3010 
3012 {
3013  // TODO handle scraping the category and category_type from localized text in the short/long description
3014  // TODO remove short description (stored as episode title) which is just the beginning of the long description (actual description)
3015 
3016  // drop the short description if its copy the start of the long description
3017  if (event.m_description.startsWith (event.m_subtitle))
3018  {
3019  event.m_subtitle = "";
3020  }
3021 
3022  // handle cast and crew in items in the DVB Extended Event Descriptor
3023  // remove handled items from the map, so the left overs can be reported
3024  auto i = event.m_items.begin();
3025  while (i != event.m_items.end())
3026  {
3027  /* Possible TODO: if EIT inlcude the priority and/or character
3028  * names for the actors, include them in AddPerson call. */
3029  if ((QString::compare (i.key(), "Role Player") == 0) ||
3030  (QString::compare (i.key(), "Performing Artist") == 0))
3031  {
3032  event.AddPerson (DBPerson::kActor, i.value());
3033  i = event.m_items.erase (i);
3034  }
3035  else if (QString::compare (i.key(), "Director") == 0)
3036  {
3037  event.AddPerson (DBPerson::kDirector, i.value());
3038  i = event.m_items.erase (i);
3039  }
3040  else if (QString::compare (i.key(), "Commentary or Commentator") == 0)
3041  {
3042  event.AddPerson (DBPerson::kCommentator, i.value());
3043  i = event.m_items.erase (i);
3044  }
3045  else if (QString::compare (i.key(), "Presenter") == 0)
3046  {
3047  event.AddPerson (DBPerson::kPresenter, i.value());
3048  i = event.m_items.erase (i);
3049  }
3050  else if (QString::compare (i.key(), "Producer") == 0)
3051  {
3052  event.AddPerson (DBPerson::kProducer, i.value());
3053  i = event.m_items.erase (i);
3054  }
3055  else if (QString::compare (i.key(), "Scriptwriter") == 0)
3056  {
3057  event.AddPerson (DBPerson::kWriter, i.value());
3058  i = event.m_items.erase (i);
3059  }
3060  else
3061  {
3062  ++i;
3063  }
3064  }
3065 
3066  // handle star rating in the description
3067  static const QRegularExpression unitymediaImdbrating { R"(\s*IMDb Rating: (\d\.\d)\s?/10$)" };
3068  auto match = unitymediaImdbrating.match(event.m_description);
3069  if (match.hasMatch())
3070  {
3071  float stars = match.captured(1).toFloat();
3072  event.m_stars = stars / 10.0F;
3073  event.m_description.remove(match.capturedStart(0),
3074  match.capturedLength(0));
3075  }
3076 }
EITFixUp::FixGreekEIT
static void FixGreekEIT(DBEventEIT &event)
Definition: eitfixup.cpp:2532
DBEvent::m_season
uint m_season
Definition: programdata.h:172
EITFixUp::FixDK
static void FixDK(DBEventEIT &event)
Use this to clean YouSee's DVB-C guide in Denmark.
Definition: eitfixup.cpp:2295
EITFixUp::kFixUK
@ kFixUK
Definition: eitfixup.h:35
EITFixUp::kFixBell
@ kFixBell
Definition: eitfixup.h:34
NLMapResult::type
ProgramInfo::CategoryType type
Definition: eitfixup.cpp:1981
EITFixUp::kFixAUDescription
@ kFixAUDescription
Definition: eitfixup.h:52
EITFixUp::FixAUNine
static void FixAUNine(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1336
EventRating::m_system
QString m_system
Definition: programdata.h:78
kUKSpaceColonStart
static const QRegularExpression kUKSpaceColonStart
Definition: eitfixup.cpp:21
EITFixUp::kFixNO
@ kFixNO
Definition: eitfixup.h:47
DBEvent::m_totalepisodes
uint m_totalepisodes
Definition: programdata.h:174
EITFixUp::kFixNRK_DVBT
@ kFixNRK_DVBT
Definition: eitfixup.h:48
EITFixUp::FixBellExpressVu
static void FixBellExpressVu(DBEventEIT &event)
Use this for the Canadian BellExpressVu to standardize DVB-S guide.
Definition: eitfixup.cpp:233
EITFixUp::Fix
static void Fix(DBEventEIT &event)
Definition: eitfixup.cpp:50
EITFixUp::FixFI
static void FixFI(DBEventEIT &event)
Use this to clean DVB-T guide in Finland.
Definition: eitfixup.cpp:1859
EITFixUp::kFixAUStar
@ kFixAUStar
Definition: eitfixup.h:39
EITFixUp::kFixPremiere
@ kFixPremiere
Definition: eitfixup.h:43
EITFixUp::kFixATV
@ kFixATV
Definition: eitfixup.h:58
DBEventEIT::m_fixup
FixupValue m_fixup
Definition: programdata.h:222
EITFixUp::kFixHTML
@ kFixHTML
Definition: eitfixup.h:56
EventRating
Definition: programdata.h:75
EITFixUp::FixATV
static void FixATV(DBEventEIT &event)
Use this to standardise the ATV/ATV2 guide in Germany.
Definition: eitfixup.cpp:1849
DBEvent::m_starttime
QDateTime m_starttime
Definition: programdata.h:152
EITFixUp::kDotToTitle
static const uint kDotToTitle
Definition: eitfixup.h:19
EITFixUp::FixComHem
static void FixComHem(DBEventEIT &event, bool process_subtitle)
Use this to standardize ComHem DVB-C service in Sweden.
Definition: eitfixup.cpp:1045
DBEvent::m_partnumber
uint16_t m_partnumber
Definition: programdata.h:157
LOG
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:39
EITFixUp::kFixSubtitle
@ kFixSubtitle
Definition: eitfixup.h:38
EITFixUp::kMaxToTitle
static const uint kMaxToTitle
Definition: eitfixup.h:17
dish_theme_type_to_string
QString dish_theme_type_to_string(uint theme_type)
Definition: dishdescriptors.cpp:301
EITFixUp::FixGreekCategories
static void FixGreekCategories(DBEventEIT &event)
Definition: eitfixup.cpp:2904
DBPerson::kPresenter
@ kPresenter
Definition: programdata.h:39
EITFixUp::FixAUStar
static void FixAUStar(DBEventEIT &event)
Use this to standardize DVB-S guide in Australia.
Definition: eitfixup.cpp:1294
DBEvent::m_category
QString m_category
Definition: programdata.h:151
NLMapResult::name
QString name
Definition: eitfixup.cpp:1980
kStereo
static const QRegularExpression kStereo
Definition: eitfixup.cpp:20
EITFixUp::parseRoman
static int parseRoman(QString roman)
Definition: eitfixup.cpp:34
DBPerson::kDirector
@ kDirector
Definition: programdata.h:32
EITFixUp::FixNRK_DVBT
static void FixNRK_DVBT(DBEventEIT &event)
Use this to clean DVB-T guide in Norway (NRK)
Definition: eitfixup.cpp:2233
tmp
static guint32 * tmp
Definition: goom_core.cpp:26
r2v
static const QMap< QChar, quint16 > r2v
Definition: eitfixup.cpp:28
EITFixUp::FixCategory
static void FixCategory(DBEventEIT &event)
Definition: eitfixup.cpp:2189
DBEvent::m_seriesId
QString m_seriesId
Definition: programdata.h:165
ProgramInfo::kCategoryMovie
@ kCategoryMovie
Definition: programinfo.h:76
DBEvent::m_parttotal
uint16_t m_parttotal
Definition: programdata.h:158
EITFixUp::kFixCategory
@ kFixCategory
Definition: eitfixup.h:46
DBEvent::m_programId
QString m_programId
Definition: programdata.h:166
DBPerson::kUnknown
@ kUnknown
Definition: programdata.h:30
programinfo.h
DBEventEIT::m_chanid
uint32_t m_chanid
Definition: programdata.h:221
ProgramInfo::kCategoryTVShow
@ kCategoryTVShow
Definition: programinfo.h:77
mythlogging.h
DBEvent::m_categoryType
ProgramInfo::CategoryType m_categoryType
Definition: programdata.h:164
EITFixUp::SetUKSubtitle
static void SetUKSubtitle(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:521
deCrewTitle
static const QMap< QString, DBPerson::Role > deCrewTitle
Definition: eitfixup.cpp:1737
DBEvent::m_title
QString m_title
Definition: programdata.h:148
EITFixUp::FixAUSeven
static void FixAUSeven(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1369
ProgramInfo::kCategorySports
@ kCategorySports
Definition: programinfo.h:77
EITFixUp::kMinMovieDuration
static const int kMinMovieDuration
Definition: eitfixup.h:25
DBPerson::kProducer
@ kProducer
Definition: programdata.h:33
DBEvent::m_subtitle
QString m_subtitle
Definition: programdata.h:149
EITFixUp::kFixNL
@ kFixNL
Definition: eitfixup.h:45
EITFixUp::kFixGreekEIT
@ kFixGreekEIT
Definition: eitfixup.h:69
EITFixUp::kFixDisneyChannel
@ kFixDisneyChannel
Definition: eitfixup.h:59
hardwareprofile.scan.rating
def rating(profile, smoonURL, gate)
Definition: scan.py:39
EITFixUp::kSubtitleMaxLen
static const uint kSubtitleMaxLen
Definition: eitfixup.h:15
EITFixUp::FixNO
static void FixNO(DBEventEIT &event)
Use this to clean DVB-S guide in Norway.
Definition: eitfixup.cpp:2203
EITFixUp::FixNL
static void FixNL(DBEventEIT &event)
Use this to standardize @Home DVB-C guide in the Netherlands.
Definition: eitfixup.cpp:2006
EITFixUp::kFixDK
@ kFixDK
Definition: eitfixup.h:50
EITFixUp::kFixGreekCategories
@ kFixGreekCategories
Definition: eitfixup.h:70
EITFixUp::FixPremiere
static void FixPremiere(DBEventEIT &event)
Use this to standardize DVB-C guide in Germany for the providers Kabel Deutschland and Premiere.
Definition: eitfixup.cpp:1911
categoryTrans
static const QMap< QString, NLMapResult > categoryTrans
Definition: eitfixup.cpp:1983
uint
unsigned int uint
Definition: compat.h:81
DBPerson::kHost
@ kHost
Definition: programdata.h:37
ProgramInfo::CategoryType
CategoryType
Definition: programinfo.h:76
DBEvent::m_episode
uint m_episode
Definition: programdata.h:173
EITFixUp::kFixAUFreeview
@ kFixAUFreeview
Definition: eitfixup.h:51
channelutil.h
EITFixUp::kFixFI
@ kFixFI
Definition: eitfixup.h:42
EITFixUp::kFixComHem
@ kFixComHem
Definition: eitfixup.h:37
EITFixUp::kFixRTL
@ kFixRTL
Definition: eitfixup.h:41
EITFixUp::kFixDish
@ kFixDish
Definition: eitfixup.h:49
EITFixUp::FixAUDescription
static void FixAUDescription(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1310
EITFixUp::kFixGenericDVB
@ kFixGenericDVB
Definition: eitfixup.h:33
eitfixup.h
DBEventEIT
Definition: programdata.h:177
EITFixUp::kFixPBS
@ kFixPBS
Definition: eitfixup.h:36
DBEvent::m_description
QString m_description
Definition: programdata.h:150
EITFixUp::kFixAUSeven
@ kFixAUSeven
Definition: eitfixup.h:54
DBEventEIT::m_items
QMultiMap< QString, QString > m_items
Definition: programdata.h:223
EITFixUp::FixAUFreeview
static void FixAUFreeview(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1411
DBPerson::Role
Role
Definition: programdata.h:28
EITFixUp::AddDVBEITAuthority
static QString AddDVBEITAuthority(uint chanid, const QString &id)
This adds a DVB EIT default authority to series id or program id if one exists in the DB for that cha...
Definition: eitfixup.cpp:206
DBPerson::kCommentator
@ kCommentator
Definition: programdata.h:40
EITFixUp::FixPBS
static void FixPBS(DBEventEIT &event)
Use this to standardize PBS ATSC guide in the USA.
Definition: eitfixup.cpp:1030
EITFixUp::FixDisneyChannel
static void FixDisneyChannel(DBEventEIT &event)
Use this to standardise the Disney Channel guide in Germany.
Definition: eitfixup.cpp:1822
EITFixUp::kMaxDotToColon
static const uint kMaxDotToColon
Definition: eitfixup.h:23
EITFixUp::kFixHDTV
@ kFixHDTV
Definition: eitfixup.h:44
EITFixUp::kFixGreekSubtitle
@ kFixGreekSubtitle
Definition: eitfixup.h:68
DBEvent::m_endtime
QDateTime m_endtime
Definition: programdata.h:153
EITFixUp::kFixMCA
@ kFixMCA
Definition: eitfixup.h:40
NLMapResult
Definition: eitfixup.cpp:1979
EITFixUp::kFixP7S1
@ kFixP7S1
Definition: eitfixup.h:55
EITFixUp::FixUK
static void FixUK(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:659
dishdescriptors.h
EITFixUp::kFixUnitymedia
@ kFixUnitymedia
Definition: eitfixup.h:57
EITFixUp::FixStripHTML
static void FixStripHTML(DBEventEIT &event)
Use this to clean HTML Tags from EIT Data.
Definition: eitfixup.cpp:2506
ProgramInfo::kCategoryNone
@ kCategoryNone
Definition: programinfo.h:76
kDotAtEnd
static const QRegularExpression kDotAtEnd
Definition: eitfixup.cpp:22
ProgramInfo::kCategorySeries
@ kCategorySeries
Definition: programinfo.h:76
EITFixUp::FixGreekSubtitle
static void FixGreekSubtitle(DBEventEIT &event)
Definition: eitfixup.cpp:2517
EITFixUp::kFixAUNine
@ kFixAUNine
Definition: eitfixup.h:53
DBEvent::m_airdate
uint16_t m_airdate
movie year / production year
Definition: programdata.h:154
DBPerson::kWriter
@ kWriter
Definition: programdata.h:35
EventRating::m_rating
QString m_rating
Definition: programdata.h:79
EITFixUp::FixRTL
static void FixRTL(DBEventEIT &event)
Use this to standardise the RTL group guide in Germany.
Definition: eitfixup.cpp:1609
ChannelUtil::GetDefaultAuthority
static QString GetDefaultAuthority(uint chanid)
Returns the DVB default authority for the chanid given.
Definition: channelutil.cpp:1178
EITFixUp::FixMCA
static void FixMCA(DBEventEIT &event)
Use this to standardise the MultiChoice Africa DVB-S guide.
Definition: eitfixup.cpp:1469
DBPerson::kActor
@ kActor
Definition: programdata.h:31
EITFixUp::kMaxQuestionExclamation
static const uint kMaxQuestionExclamation
Definition: eitfixup.h:21
EITFixUp::FixUnitymedia
static void FixUnitymedia(DBEventEIT &event)
Definition: eitfixup.cpp:3011
EITFixUp::FixPRO7
static void FixPRO7(DBEventEIT &event)
Use this to standardise the PRO7/Sat1 group guide in Germany.
Definition: eitfixup.cpp:1746