MythTV  master
eitfixup.cpp
Go to the documentation of this file.
1 // C++ headers
2 #include <algorithm>
3 #include <array>
4 
5 // Qt Headers
6 #include <QRegularExpression>
7 
8 // MythTV headers
10 #include "libmythbase/programinfo.h" // for CategoryType, subtitle types and audio and video properties
11 
12 #include "channelutil.h" // for GetDefaultAuthority()
13 #include "eitfixup.h"
14 #include "mpeg/dishdescriptors.h" // for dish_theme_type_to_string
15 
16 /*------------------------------------------------------------------------
17  * Event Fix Up Scripts - Turned on by entry in dtv_privatetype table
18  *------------------------------------------------------------------------*/
19 
20 static const QRegularExpression kStereo { R"(\b\(?[sS]tereo\)?\b)" };
21 static const QRegularExpression kUKSpaceColonStart { R"(^[ |:]*)" };
22 static const QRegularExpression kDotAtEnd { "\\.$" };
23 
24 static const QMap<QChar,quint16> r2v = {
25  {'I' , 1}, {'V' , 5}, {'X' , 10}, {'L' , 50},
26  {'C' , 100}, {'D' , 500}, {'M' , 1000},
27  {QChar(0x399), 1}, // Greek Ι
28 };
29 
30 int EITFixUp::parseRoman (QString roman)
31 {
32  if (roman.isEmpty())
33  return 0;
34 
35  uint result = 0;
36  for (int i = 0; i < roman.size() - 1; i++)
37  {
38  int v1 = r2v[roman.at(i)];
39  int v2 = r2v[roman.at(i+1)];
40  result += (v1 >= v2) ? v1 : -v1;
41  }
42  return result + r2v[roman.back()];
43 }
44 
45 
47 {
48  if (event.m_fixup)
49  {
50  if (event.m_subtitle == event.m_title)
51  event.m_subtitle = QString("");
52 
53  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
54  {
55  event.m_description = event.m_subtitle;
56  event.m_subtitle = QString("");
57  }
58  }
59 
60  if (kFixHTML & event.m_fixup)
61  FixStripHTML(event);
62 
63  if (kFixHDTV & event.m_fixup)
64  event.m_videoProps |= VID_HDTV;
65 
66  if (kFixBell & event.m_fixup)
67  FixBellExpressVu(event);
68 
69  if (kFixDish & event.m_fixup)
70  FixBellExpressVu(event);
71 
72  if (kFixUK & event.m_fixup)
73  FixUK(event);
74 
75  if (kFixPBS & event.m_fixup)
76  FixPBS(event);
77 
78  if (kFixComHem & event.m_fixup)
79  FixComHem(event, (kFixSubtitle & event.m_fixup) != 0U);
80 
81  if (kFixAUStar & event.m_fixup)
82  FixAUStar(event);
83 
84  if (kFixAUDescription & event.m_fixup)
85  FixAUDescription(event);
86 
87  if (kFixAUFreeview & event.m_fixup)
88  FixAUFreeview(event);
89 
90  if (kFixAUNine & event.m_fixup)
91  FixAUNine(event);
92 
93  if (kFixAUSeven & event.m_fixup)
94  FixAUSeven(event);
95 
96  if (kFixMCA & event.m_fixup)
97  FixMCA(event);
98 
99  if (kFixRTL & event.m_fixup)
100  FixRTL(event);
101 
102  if (kFixP7S1 & event.m_fixup)
103  FixPRO7(event);
104 
105  if (kFixATV & event.m_fixup)
106  FixATV(event);
107 
108  if (kFixDisneyChannel & event.m_fixup)
109  FixDisneyChannel(event);
110 
111  if (kFixFI & event.m_fixup)
112  FixFI(event);
113 
114  if (kFixPremiere & event.m_fixup)
115  FixPremiere(event);
116 
117  if (kFixNL & event.m_fixup)
118  FixNL(event);
119 
120  if (kFixNO & event.m_fixup)
121  FixNO(event);
122 
123  if (kFixNRK_DVBT & event.m_fixup)
124  FixNRK_DVBT(event);
125 
126  if (kFixDK & event.m_fixup)
127  FixDK(event);
128 
129  if (kFixCategory & event.m_fixup)
130  FixCategory(event);
131 
132  if (kFixGreekSubtitle & event.m_fixup)
133  FixGreekSubtitle(event);
134 
135  if (kFixGreekEIT & event.m_fixup)
136  FixGreekEIT(event);
137 
138  if (kFixGreekCategories & event.m_fixup)
139  FixGreekCategories(event);
140 
141  if (kFixUnitymedia & event.m_fixup)
142  FixUnitymedia(event);
143 
144  // Clean up text strings after all fixups have been applied.
145  if (event.m_fixup)
146  {
147  static const QRegularExpression emptyParens { R"(\(\s*\))" };
148  if (!event.m_title.isEmpty())
149  {
150  event.m_title.remove(QChar('\0')).remove(emptyParens);
151  event.m_title = event.m_title.simplified();
152  }
153 
154  if (!event.m_subtitle.isEmpty())
155  {
156  event.m_subtitle.remove(QChar('\0'));
157  event.m_subtitle.remove(emptyParens);
158  event.m_subtitle = event.m_subtitle.simplified();
159  }
160 
161  if (!event.m_description.isEmpty())
162  {
163  event.m_description.remove(QChar('\0'));
164  event.m_description.remove(emptyParens);
165  event.m_description = event.m_description.simplified();
166  }
167  }
168 
169  if (kFixGenericDVB & event.m_fixup)
170  {
171  event.m_programId = AddDVBEITAuthority(event.m_chanid, event.m_programId);
172  event.m_seriesId = AddDVBEITAuthority(event.m_chanid, event.m_seriesId);
173  }
174 
175  // Are any items left unhandled? report them to allow fixups improvements
176  if (!event.m_items.empty())
177  {
178  for (auto i = event.m_items.begin(); i != event.m_items.end(); ++i)
179  {
180  LOG(VB_EIT, LOG_DEBUG, QString("Unhandled item in EIT for"
181  " channel id \"%1\", \"%2\": %3").arg(event.m_chanid)
182  .arg(i.key(), i.value()));
183  }
184  }
185 }
186 
202 QString EITFixUp::AddDVBEITAuthority(uint chanid, const QString &id)
203 {
204  if (id.isEmpty())
205  return id;
206 
207  // CRIDs are not case sensitive, so change all to lower case
208  QString crid = id.toLower();
209 
210  // remove "crid://"
211  if (crid.startsWith("crid://"))
212  crid.remove(0,7);
213 
214  // if id is a CRID with authority, return it
215  if (crid.length() >= 1 && crid[0] != '/')
216  return crid;
217 
218  QString authority = ChannelUtil::GetDefaultAuthority(chanid);
219  if (authority.isEmpty())
220  return ""; // no authority, not a valid CRID, return empty
221 
222  return authority + crid;
223 }
224 
230 {
231  // A 0x0D character is present between the content
232  // and the subtitle if its present
233  int position = event.m_description.indexOf('\r');
234 
235  if (position != -1)
236  {
237  // Subtitle present in the title, so get
238  // it and adjust the description
239  event.m_subtitle = event.m_description.left(position);
240  event.m_description = event.m_description.right(
241  event.m_description.length() - position - 2);
242  }
243 
244  // Take out the content description which is
245  // always next with a period after it
246  position = event.m_description.indexOf(".");
247  // Make sure they didn't leave it out and
248  // you come up with an odd category
249  if (position < 10)
250  {
251  }
252  else
253  {
254  event.m_category = "Unknown";
255  }
256 
257  // If the content descriptor didn't come up with anything, try parsing the category
258  // out of the description.
259  if (event.m_category.isEmpty())
260  {
261  // Take out the content description which is
262  // always next with a period after it
263  position = event.m_description.indexOf(".");
264  if ((position + 1) < event.m_description.length())
265  position = event.m_description.indexOf(". ");
266  // Make sure they didn't leave it out and
267  // you come up with an odd category
268  if ((position > -1) && position < 20)
269  {
270  const QString stmp = event.m_description;
271  event.m_description = stmp.right(stmp.length() - position - 2);
272  event.m_category = stmp.left(position);
273 
274  int position_p = event.m_category.indexOf("(");
275  if (position_p == -1)
276  event.m_description = stmp.right(stmp.length() - position - 2);
277  else
278  event.m_category = "Unknown";
279  }
280  else
281  {
282  event.m_category = "Unknown";
283  }
284 
285  // When a channel is off air the category is "-"
286  // so leave the category as blank
287  if (event.m_category == "-")
288  event.m_category = "OffAir";
289 
290  if (event.m_category.length() > 20)
291  event.m_category = "Unknown";
292  }
293  else if (event.m_categoryType)
294  {
295  QString theme = dish_theme_type_to_string(event.m_categoryType);
296  event.m_description = event.m_description.replace(theme, "");
297  if (event.m_description.startsWith("."))
298  event.m_description = event.m_description.right(event.m_description.length() - 1);
299  if (event.m_description.startsWith(" "))
300  event.m_description = event.m_description.right(event.m_description.length() - 1);
301  }
302 
303  // See if a year is present as (xxxx)
304  static const QRegularExpression bellYear { R"(\([0-9]{4}\))" };
305  position = event.m_description.indexOf(bellYear);
306  if (position != -1 && !event.m_category.isEmpty())
307  {
308  // Parse out the year
309  bool ok = false;
310  uint y = event.m_description.mid(position + 1, 4).toUInt(&ok);
311  if (ok)
312  {
313  event.m_originalairdate = QDate(y, 1, 1);
314  event.m_airdate = y;
315  event.m_previouslyshown = true;
316  }
317 
318  // Get the actors if they exist
319  if (position > 3)
320  {
321  static const QRegularExpression bellActors { R"(\set\s|,)" };
322  QString tmp = event.m_description.left(position-3);
323  QStringList actors =
324  tmp.split(bellActors, Qt::SkipEmptyParts);
325 
326  /* Possible TODO: if EIT inlcude the priority and/or character
327  * names for the actors, include them in AddPerson call. */
328  for (const auto & actor : std::as_const(actors))
329  event.AddPerson(DBPerson::kActor, actor);
330  }
331  // Remove the year and actors from the description
332  event.m_description = event.m_description.right(
333  event.m_description.length() - position - 7);
334  }
335 
336  // Check for (CC) in the decription and
337  // set the <subtitles type="teletext"> flag
338  position = event.m_description.indexOf("(CC)");
339  if (position != -1)
340  {
341  event.m_subtitleType |= SUB_HARDHEAR;
342  event.m_description = event.m_description.replace("(CC)", "");
343  }
344 
345  // Check for (Stereo) in the decription and set the <audio> tags
346  auto match = kStereo.match(event.m_description);
347  if (match.hasMatch())
348  {
349  event.m_audioProps |= AUD_STEREO;
350  event.m_description.remove(match.capturedStart(0),
351  match.capturedLength(0));
352  }
353 
354  // Check for "title (All Day, HD)" in the title
355  static const QRegularExpression bellPPVTitleAllDayHD { R"(\s*\(All Day\, HD\)\s*$)" };
356  match = bellPPVTitleAllDayHD.match(event.m_title);
357  if (match.hasMatch())
358  {
359  event.m_title.remove(match.capturedStart(), match.capturedLength());
360  event.m_videoProps |= VID_HDTV;
361  }
362 
363  // Check for "title (All Day)" in the title
364  static const QRegularExpression bellPPVTitleAllDay { R"(\s*\(All Day.*\)\s*$)" };
365  match = bellPPVTitleAllDay.match(event.m_title);
366  if (match.hasMatch())
367  event.m_title.remove(match.capturedStart(), match.capturedLength());
368 
369  // Check for "HD - title" in the title
370  static const QRegularExpression bellPPVTitleHD { R"(^HD\s?-\s?)" };
371  match = bellPPVTitleHD.match(event.m_title);
372  if (match.hasMatch())
373  {
374  event.m_title.remove(match.capturedStart(), match.capturedLength());
375  event.m_videoProps |= VID_HDTV;
376  }
377 
378  // Check for (HD) in the decription
379  position = event.m_description.indexOf("(HD)");
380  if (position != -1)
381  {
382  event.m_description = event.m_description.replace("(HD)", "");
383  event.m_videoProps |= VID_HDTV;
384  }
385 
386  // Check for (HD) in the title
387  position = event.m_title.indexOf("(HD)");
388  if (position != -1)
389  {
390  event.m_title = event.m_title.replace("(HD)", "");
391  event.m_videoProps |= VID_HDTV;
392  }
393 
394  // Check for HD at the end of the title
395  static const QRegularExpression dishPPVTitleHD { R"(\sHD\s*$)" };
396  match = dishPPVTitleHD.match(event.m_title);
397  if (match.hasMatch())
398  {
399  event.m_title.remove(match.capturedStart(), match.capturedLength());
400  event.m_videoProps |= VID_HDTV;
401  }
402 
403  // Check for (DD) at the end of the description
404  position = event.m_description.indexOf("(DD)");
405  if (position != -1)
406  {
407  event.m_description = event.m_description.replace("(DD)", "");
408  event.m_audioProps |= AUD_DOLBY;
409  event.m_audioProps |= AUD_STEREO;
410  }
411 
412  // Remove SAP from Dish descriptions
413  position = event.m_description.indexOf("(SAP)");
414  if (position != -1)
415  {
416  event.m_description = event.m_description.replace("(SAP", "");
417  event.m_subtitleType |= SUB_HARDHEAR;
418  }
419 
420  // Remove any trailing colon in title
421  static const QRegularExpression dishPPVTitleColon { R"(\:\s*$)" };
422  match = dishPPVTitleColon.match(event.m_title);
423  if (match.hasMatch())
424  event.m_title.remove(match.capturedStart(), match.capturedLength());
425 
426  // Remove New at the end of the description
427  static const QRegularExpression dishDescriptionNew { R"(\s*New\.\s*)" };
428  match = dishDescriptionNew.match(event.m_description);
429  if (match.hasMatch())
430  {
431  event.m_previouslyshown = false;
432  event.m_description.remove(match.capturedStart(), match.capturedLength());
433  }
434 
435  // Remove Series Finale at the end of the desciption
436  static const QRegularExpression dishDescriptionFinale { R"(\s*(Series|Season)\sFinale\.\s*)" };
437  match = dishDescriptionFinale.match(event.m_description);
438  if (match.hasMatch())
439  {
440  event.m_previouslyshown = false;
441  event.m_description.remove(match.capturedStart(), match.capturedLength());
442  }
443 
444  // Remove Series Finale at the end of the desciption
445  static const QRegularExpression dishDescriptionFinale2 { R"(\s*Finale\.\s*)" };
446  match = dishDescriptionFinale2.match(event.m_description);
447  if (match.hasMatch())
448  {
449  event.m_previouslyshown = false;
450  event.m_description.remove(match.capturedStart(), match.capturedLength());
451  }
452 
453  // Remove Series Premiere at the end of the description
454  static const QRegularExpression dishDescriptionPremiere { R"(\s*(Series|Season)\s(Premier|Premiere)\.\s*)" };
455  match = dishDescriptionPremiere.match(event.m_description);
456  if (match.hasMatch())
457  {
458  event.m_previouslyshown = false;
459  event.m_description.remove(match.capturedStart(), match.capturedLength());
460  }
461 
462  // Remove Series Premiere at the end of the description
463  static const QRegularExpression dishDescriptionPremiere2 { R"(\s*(Premier|Premiere)\.\s*)" };
464  match = dishDescriptionPremiere2.match(event.m_description);
465  if (match.hasMatch())
466  {
467  event.m_previouslyshown = false;
468  event.m_description.remove(match.capturedStart(), match.capturedLength());
469  }
470 
471  // Remove Dish's PPV code at the end of the description
472  static const QRegularExpression ppvcode { R"(\s*\(([A-Z]|[0-9]){5}\)\s*$)",
473  QRegularExpression::CaseInsensitiveOption };
474  match = ppvcode.match(event.m_description);
475  if (match.hasMatch())
476  event.m_description.remove(match.capturedStart(), match.capturedLength());
477 
478  // Remove trailing garbage
479  static const QRegularExpression dishPPVSpacePerenEnd { R"(\s\)\s*$)" };
480  match = dishPPVSpacePerenEnd.match(event.m_description);
481  if (match.hasMatch())
482  event.m_description.remove(match.capturedStart(), match.capturedLength());
483 
484  // Check for subtitle "All Day (... Eastern)" in the subtitle
485  static const QRegularExpression bellPPVSubtitleAllDay { R"(^All Day \(.*\sEastern\)\s*$)" };
486  match = bellPPVSubtitleAllDay.match(event.m_subtitle);
487  if (match.hasMatch())
488  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
489 
490  // Check for description "(... Eastern)" in the description
491  static const QRegularExpression bellPPVDescriptionAllDay { R"(^\(.*\sEastern\))" };
492  match = bellPPVDescriptionAllDay.match(event.m_description);
493  if (match.hasMatch())
494  event.m_description.remove(match.capturedStart(), match.capturedLength());
495 
496  // Check for description "(... ET)" in the description
497  static const QRegularExpression bellPPVDescriptionAllDay2 { R"(^\([0-9].*am-[0-9].*am\sET\))" };
498  match = bellPPVDescriptionAllDay2.match(event.m_description);
499  if (match.hasMatch())
500  event.m_description.remove(match.capturedStart(), match.capturedLength());
501 
502  // Check for description "(nnnnn)" in the description
503  static const QRegularExpression bellPPVDescriptionEventId { R"(\([0-9]{5}\))" };
504  match = bellPPVDescriptionEventId.match(event.m_description);
505  if (match.hasMatch())
506  event.m_description.remove(match.capturedStart(), match.capturedLength());
507 }
508 
513 {
514  QStringList strListColon = event.m_description.split(":");
515  QStringList strListEnd;
516 
517  bool fColon = false;
518  bool fQuotedSubtitle = false;
519  QString strEnd;
520  if (strListColon.count()>1)
521  {
522  bool fDoubleDot = false;
523  bool fSingleDot = true;
524  int nLength = strListColon[0].length();
525 
526  int nPosition1 = event.m_description.indexOf("..");
527  if ((nPosition1 < nLength) && (nPosition1 >= 0))
528  fDoubleDot = true;
529  nPosition1 = event.m_description.indexOf(".");
530  if (nPosition1==-1)
531  fSingleDot = false;
532  if (nPosition1 > nLength)
533  fSingleDot = false;
534  else
535  {
536  QString strTmp = event.m_description.mid(nPosition1+1,
537  nLength-nPosition1);
538 
539  QStringList tmp = strTmp.split(" ");
540  if (((uint) tmp.size()) < kMaxDotToColon)
541  fSingleDot = false;
542  }
543 
544  if (fDoubleDot)
545  {
546  strListEnd = strListColon;
547  fColon = true;
548  }
549  else if (!fSingleDot)
550  {
551  QStringList strListTmp;
552  uint nTitle=0;
553  int nTitleMax=-1;
554  for (int i =0; (i<strListColon.count()) && (nTitleMax==-1);i++)
555  {
556  const QStringList tmp = strListColon[i].split(" ");
557 
558  nTitle += tmp.size();
559 
560  if (nTitle < kMaxToTitle)
561  strListTmp.push_back(strListColon[i]);
562  else
563  nTitleMax=i;
564  }
565  QString strPartial;
566  for (int i=0;i<(nTitleMax-1);i++)
567  strPartial+=strListTmp[i]+":";
568  if (nTitleMax>0)
569  {
570  strPartial+=strListTmp[nTitleMax-1];
571  strListEnd.push_back(strPartial);
572  }
573  for (int i=nTitleMax+1;i<strListColon.count();i++)
574  strListEnd.push_back(strListColon[i]);
575  fColon = true;
576  }
577  }
578  static const QRegularExpression ukQuotedSubtitle { R"(^'([\w\s\-,]+?)\.' )" };
579  auto match = ukQuotedSubtitle.match(event.m_description);
580  if (match.hasMatch())
581  {
582  event.m_subtitle = match.captured(1);
583  event.m_description.remove(match.capturedStart(0),
584  match.capturedLength(0));
585  fQuotedSubtitle = true;
586  }
587  QStringList strListPeriod;
588  QStringList strListQuestion;
589  QStringList strListExcl;
590  if (!(fColon || fQuotedSubtitle))
591  {
592  strListPeriod = event.m_description.split(".");
593  if (strListPeriod.count() >1)
594  {
595  int nPosition1 = event.m_description.indexOf(".");
596  int nPosition2 = event.m_description.indexOf("..");
597  if ((nPosition1 < nPosition2) || (nPosition2==-1))
598  strListEnd = strListPeriod;
599  }
600 
601  strListQuestion = event.m_description.split("?");
602  strListExcl = event.m_description.split("!");
603  if ((strListQuestion.size() > 1) &&
604  ((uint)strListQuestion.size() <= kMaxQuestionExclamation))
605  {
606  strListEnd = strListQuestion;
607  strEnd = "?";
608  }
609  else if ((strListExcl.size() > 1) &&
610  ((uint)strListExcl.size() <= kMaxQuestionExclamation))
611  {
612  strListEnd = strListExcl;
613  strEnd = "!";
614  }
615  else
616  {
617  strEnd.clear();
618  }
619  }
620 
621  if (!strListEnd.empty())
622  {
623  QStringList strListSpace = strListEnd[0].split(
624  " ", Qt::SkipEmptyParts);
625  if (fColon && ((uint)strListSpace.size() > kMaxToTitle))
626  return;
627  if ((uint)strListSpace.size() > kDotToTitle)
628  return;
629  static const QRegularExpression ukExclusionFromSubtitle {
630  "(starring|stars\\s|drama|seres|sitcom)",
631  QRegularExpression::CaseInsensitiveOption };
632  if (strListSpace.filter(ukExclusionFromSubtitle).empty())
633  {
634  event.m_subtitle = strListEnd[0]+strEnd;
635  event.m_subtitle.remove(kUKSpaceColonStart);
636  event.m_description=
637  event.m_description.mid(strListEnd[0].length()+1);
638  event.m_description.remove(kUKSpaceColonStart);
639  }
640  }
641 }
642 
643 
648 {
649  static const QRegularExpression uk24ep { R"(^\d{1,2}:00[ap]m to \d{1,2}:00[ap]m: )" };
650  static const QRegularExpression ukTime { R"(\d{1,2}[\.:]\d{1,2}\s*(am|pm|))" };
651  QString strFull;
652 
653  bool isMovie = event.m_category.startsWith("Movie",Qt::CaseInsensitive) ||
654  event.m_category.startsWith("Film",Qt::CaseInsensitive);
655  // BBC three case (could add another record here ?)
656  static const QRegularExpression ukThen { R"(\s*?(Then|Followed by) 60 Seconds\.)",
657  QRegularExpression::CaseInsensitiveOption };
658  static const QRegularExpression ukNew { R"((New\.|\s*?(Brand New|New)\s*?(Series|Episode)\s*?[:\.\-]))",
659  QRegularExpression::CaseInsensitiveOption };
660  static const QRegularExpression ukNewTitle { R"(^(Brand New|New:)\s*)",
661  QRegularExpression::CaseInsensitiveOption };
662  event.m_description = event.m_description.remove(ukThen);
663  event.m_description = event.m_description.remove(ukNew);
664  event.m_title = event.m_title.remove(ukNewTitle);
665 
666  // Removal of Class TV, CBBC and CBeebies etc..
667  static const QRegularExpression ukTitleRemove { "^(?:[tT]4:|Schools\\s*?:)" };
668  static const QRegularExpression ukDescriptionRemove { R"(^(?:CBBC\s*?\.|CBeebies\s*?\.|Class TV\s*?:|BBC Switch\.))" };
669  event.m_title = event.m_title.remove(ukTitleRemove);
670  event.m_description = event.m_description.remove(ukDescriptionRemove);
671 
672  // Removal of BBC FOUR and BBC THREE
673  static const QRegularExpression ukBBC34 { R"(BBC (?:THREE|FOUR) on BBC (?:ONE|TWO)\.)",
674  QRegularExpression::CaseInsensitiveOption };
675  event.m_description = event.m_description.remove(ukBBC34);
676 
677  // BBC 7 [Rpt of ...] case.
678  static const QRegularExpression ukBBC7rpt { R"(\[Rptd?[^]]+?\d{1,2}\.\d{1,2}[ap]m\]\.)" };
679  event.m_description = event.m_description.remove(ukBBC7rpt);
680 
681  // "All New To 4Music!
682  static const QRegularExpression ukAllNew { R"(All New To 4Music!\s?)" };
683  event.m_description = event.m_description.remove(ukAllNew);
684 
685  // Removal of 'Also in HD' text
686  static const QRegularExpression ukAlsoInHD { R"(\s*Also in HD\.)",
687  QRegularExpression::CaseInsensitiveOption };
688  event.m_description = event.m_description.remove(ukAlsoInHD);
689 
690  // Remove [AD,S] etc.
691  static const QRegularExpression ukCC { R"(\[(?:(AD|SL|S|W|HD),?)+\])" };
692  auto match = ukCC.match(event.m_description);
693  while (match.hasMatch())
694  {
695  QStringList tmpCCitems = match.captured(0).remove("[").remove("]").split(",");
696  if (tmpCCitems.contains("AD"))
697  event.m_audioProps |= AUD_VISUALIMPAIR;
698  if (tmpCCitems.contains("HD"))
699  event.m_videoProps |= VID_HDTV;
700  if (tmpCCitems.contains("S"))
701  event.m_subtitleType |= SUB_NORMAL;
702  if (tmpCCitems.contains("SL"))
703  event.m_subtitleType |= SUB_SIGNED;
704  if (tmpCCitems.contains("W"))
705  event.m_videoProps |= VID_WIDESCREEN;
706  event.m_description.remove(match.capturedStart(0),
707  match.capturedLength(0));
708  match = ukCC.match(event.m_description, match.capturedStart(0));
709  }
710 
711  event.m_title = event.m_title.trimmed();
712  event.m_description = event.m_description.trimmed();
713 
714  // Constituents of UK season regexp, decomposed for clarity
715 
716  // Matches Season 2, S 2 and "Series 2," etc but not "hits 2"
717  // cap1 = season
718  static const QString seasonStr = R"(\b(?:Season|Series|S)\s*(\d+)\s*,?)";
719 
720  // Work out the season and episode numbers (if any)
721  // Matching pattern "Season 2 Episode|Ep 3 of 14|3/14" etc
722 
723  // Matches Episode 3, Ep 3/4, Ep 3 of 4 etc but not "step 1"
724  // cap1 = ep, cap2 = total
725  static const QString longEp = R"(\b(?:Ep|Episode)\s*(\d+)\s*(?:(?:/|of)\s*(\d*))?)";
726 
727  // Matches S2 Ep 3/4, "Season 2, Ep 3 of 4", Episode 3 etc
728  // cap1 = season, cap2 = ep, cap3 = total
729  static const QString longSeasEp = QString("\\(?(?:%1)?\\s*%2").arg(seasonStr, longEp);
730 
731  // Matches long seas/ep with surrounding parenthesis & trailing period
732  // cap1 = season, cap2 = ep, cap3 = total
733  static const QString longContext = QString(R"(\(*%1\s*\)?\s*\.?)").arg(longSeasEp);
734 
735  // Matches 3/4, 3 of 4
736  // cap1 = ep, cap2 = total
737  static const QString shortEp = R"((\d+)\s*(?:/|of)\s*(\d+))";
738 
739  // Matches short ep/total, ignoring Parts and idioms such as 9/11, 24/7 etc.
740  // ie. x/y in parenthesis or has no leading or trailing text in the sentence.
741  // cap0 may include previous/anchoring period
742  // cap1 = shortEp with surrounding parenthesis & trailing period (to remove)
743  // cap2 = ep, cap3 = total,
744  static const QString shortContext =
745  QString(R"((?:^|\.)(\s*\(*\s*%1[\s)]*(?:[).:]|$)))").arg(shortEp);
746 
747  // Prefer long format resorting to short format
748  // cap0 = long match to remove, cap1 = long season, cap2 = long ep, cap3 = long total,
749  // cap4 = short match to remove, cap5 = short ep, cap6 = short total
750  static const QRegularExpression ukSeries { "(?:" + longContext + "|" + shortContext + ")",
751  QRegularExpression::CaseInsensitiveOption };
752 
753  bool series = false;
754  bool fromTitle = true;
755  match = ukSeries.match(event.m_title);
756  if (!match.hasMatch())
757  {
758  fromTitle = false;
759  match = ukSeries.match(event.m_description);
760  }
761  if (match.hasMatch())
762  {
763  if (!match.captured(1).isEmpty())
764  {
765  event.m_season = match.captured(1).toUInt();
766  series = true;
767  }
768 
769  if (!match.captured(2).isEmpty())
770  {
771  event.m_episode = match.captured(2).toUInt();
772  series = true;
773  }
774  else if (!match.captured(5).isEmpty())
775  {
776  event.m_episode = match.captured(5).toUInt();
777  series = true;
778  }
779 
780  if (!match.captured(3).isEmpty())
781  {
782  event.m_totalepisodes = match.captured(3).toUInt();
783  series = true;
784  }
785  else if (!match.captured(6).isEmpty())
786  {
787  event.m_totalepisodes = match.captured(6).toUInt();
788  series = true;
789  }
790 
791  // Remove long or short match. Short text doesn't start at position2
792  int form = match.captured(4).isEmpty() ? 0 : 4;
793 
794  if (fromTitle)
795  {
796  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from title (%4) \"%5\"")
797  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
798  .arg(event.m_title, event.m_description));
799 
800  event.m_title.remove(match.capturedStart(form),
801  match.capturedLength(form));
802  }
803  else
804  {
805  LOG(VB_EIT, LOG_DEBUG, QString("Extracted S%1E%2/%3 from description (%4) \"%5\"")
806  .arg(event.m_season).arg(event.m_episode).arg(event.m_totalepisodes)
807  .arg(event.m_title, event.m_description));
808 
809  if (match.capturedStart(form) == 0)
810  {
811  // Remove from the start of the description.
812  // Otherwise it ends up in the subtitle.
813  event.m_description.remove(match.capturedStart(form),
814  match.capturedLength(form));
815  }
816  }
817  }
818 
819  if (isMovie)
820  event.m_categoryType = ProgramInfo::kCategoryMovie;
821  else if (series)
822  event.m_categoryType = ProgramInfo::kCategorySeries;
823 
824  // Multi-part episodes, or films (e.g. ITV film split by news)
825  // Matches Part 1, Pt 1/2, Part 1 of 2 etc.
826  static const QRegularExpression ukPart { R"([-(\:,.]\s*(?:Part|Pt)\s*(\d+)\s*(?:(?:of|/)\s*(\d+))?\s*[-):,.])",
827  QRegularExpression::CaseInsensitiveOption };
828  match = ukPart.match(event.m_title);
829  auto match2 = ukPart.match(event.m_description);
830  if (match.hasMatch())
831  {
832  event.m_partnumber = match.captured(1).toUInt();
833  event.m_parttotal = match.captured(2).toUInt();
834 
835  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from title (%3)")
836  .arg(event.m_partnumber).arg(event.m_parttotal).arg(event.m_title));
837 
838  // Remove from the title
839  event.m_title.remove(match.capturedStart(0),
840  match.capturedLength(0));
841  }
842  else if (match2.hasMatch())
843  {
844  event.m_partnumber = match2.captured(1).toUInt();
845  event.m_parttotal = match2.captured(2).toUInt();
846 
847  LOG(VB_EIT, LOG_DEBUG, QString("Extracted Part %1/%2 from description (%3) \"%4\"")
848  .arg(event.m_partnumber).arg(event.m_parttotal)
849  .arg(event.m_title, event.m_description));
850 
851  // Remove from the start of the description.
852  // Otherwise it ends up in the subtitle.
853  if (match2.capturedStart(0) == 0)
854  {
855  // Retain a single colon (subtitle separator) if we remove any
856  QString sub = match2.captured(0).contains(":") ? ":" : "";
857  event.m_description = event.m_description.replace(match2.captured(0), sub);
858  }
859  }
860 
861  static const QRegularExpression ukStarring { R"((?:Western\s)?[Ss]tarring ([\w\s\-']+?)[Aa]nd\s([\w\s\-']+?)[\.|,]\s*(\d{4})?(?:\.\s)?)" };
862  match = ukStarring.match(event.m_description);
863  if (match.hasMatch())
864  {
865  // if we match this we've captured 2 actors and an (optional) airdate
866  /* Possible TODO: if EIT inlcude the priority and/or character
867  * names for the actors, include them in AddPerson call. */
868  event.AddPerson(DBPerson::kActor, match.captured(1));
869  event.AddPerson(DBPerson::kActor, match.captured(2));
870  if (match.captured(3).length() > 0)
871  {
872  bool ok = false;
873  uint y = match.captured(3).toUInt(&ok);
874  if (ok)
875  {
876  event.m_airdate = y;
877  event.m_originalairdate = QDate(y, 1, 1);
878  }
879  }
880  }
881 
882  static const QRegularExpression ukLaONoSplit { "^Law & Order: (?:Criminal Intent|LA|"
883  "Special Victims Unit|Trial by Jury|UK|You the Jury)" };
884  if (!event.m_title.startsWith("CSI:") && !event.m_title.startsWith("CD:") &&
885  !event.m_title.contains(ukLaONoSplit) &&
886  !event.m_title.startsWith("Mission: Impossible"))
887  {
888  static const QRegularExpression ukDoubleDotStart { R"(^\.\.+)" };
889  static const QRegularExpression ukDoubleDotEnd { R"(\.\.+$)" };
890  if ((event.m_title.indexOf(ukDoubleDotEnd) != -1) &&
891  (event.m_description.indexOf(ukDoubleDotStart) != -1))
892  {
893  QString strPart=event.m_title.remove(ukDoubleDotEnd)+" ";
894  strFull = strPart + event.m_description.remove(ukDoubleDotStart);
895  static const QRegularExpression ukCEPQ { R"([:\!\.\?]\s)" };
896  static const QRegularExpression ukSpaceStart { "^ " };
897  int position1 = strFull.indexOf(ukCEPQ,strPart.length());
898  if (isMovie && (position1 != -1))
899  {
900  if (strFull[position1] == '!' || strFull[position1] == '?'
901  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
902  position1++;
903  event.m_title = strFull.left(position1);
904  event.m_description = strFull.mid(position1 + 1);
905  event.m_description.remove(ukSpaceStart);
906  }
907  else
908  {
909  position1 = strFull.indexOf(ukCEPQ);
910  if (position1 != -1)
911  {
912  if (strFull[position1] == '!' || strFull[position1] == '?'
913  || (position1>2 && strFull[position1] == '.' && strFull[position1-2] == '.'))
914  position1++;
915  event.m_title = strFull.left(position1);
916  event.m_description = strFull.mid(position1 + 1);
917  event.m_description.remove(ukSpaceStart);
918  SetUKSubtitle(event);
919  }
920  }
921  }
922  else if (event.m_description.indexOf(uk24ep) != -1)
923  {
924  auto match24 = uk24ep.match(event.m_description);
925  if (match24.hasMatch())
926  {
927  // Special case for episodes of 24.
928  // -2 from the length cause we don't want ": " on the end
929  event.m_subtitle = event.m_description.mid(match24.capturedStart(0),
930  match24.captured(0).length() - 2);
931  event.m_description = event.m_description.remove(match24.captured(0));
932  }
933  }
934  else if (event.m_description.indexOf(ukTime) == -1)
935  {
936  static const QRegularExpression ukYearColon { R"(^[\d]{4}:)" };
937  if (!isMovie && (event.m_title.indexOf(ukYearColon) < 0))
938  {
939  int position1 = event.m_title.indexOf(":");
940  if ((position1 != -1) &&
941  (event.m_description.indexOf(":") < 0 ))
942  {
943  static const QRegularExpression ukCompleteDots { R"(^\.\.+$)" };
944  if (event.m_title.mid(position1+1).indexOf(ukCompleteDots)==0)
945  {
946  SetUKSubtitle(event);
947  QString strTmp = event.m_title.mid(position1+1);
948  event.m_title.resize(position1);
949  event.m_subtitle = strTmp+event.m_subtitle;
950  }
951  else if ((uint)position1 < kSubtitleMaxLen)
952  {
953  event.m_subtitle = event.m_title.mid(position1 + 1);
954  event.m_title = event.m_title.left(position1);
955  }
956  }
957  else
958  {
959  SetUKSubtitle(event);
960  }
961  }
962  }
963  }
964 
965  if (!isMovie && event.m_subtitle.isEmpty() &&
966  !event.m_title.startsWith("The X-Files"))
967  {
968  int position1 = event.m_description.indexOf(ukTime);
969  if (position1 != -1)
970  {
971  static const QRegularExpression ukColonPeriod { R"([:\.])" };
972  int position2 = event.m_description.indexOf(ukColonPeriod);
973  if ((position2>=0) && (position2 < (position1-2)))
974  SetUKSubtitle(event);
975  }
976  else
977  {
978  position1 = event.m_title.indexOf("-");
979  if (position1 != -1)
980  {
981  if ((uint)position1 < kSubtitleMaxLen)
982  {
983  event.m_subtitle = event.m_title.mid(position1 + 1);
984  event.m_subtitle.remove(kUKSpaceColonStart);
985  event.m_title = event.m_title.left(position1);
986  }
987  }
988  else
989  {
990  SetUKSubtitle(event);
991  }
992  }
993  }
994 
995  // Work out the year (if any)
996  static const QRegularExpression ukYear { R"([\[\(]([\d]{4})[\)\]])" };
997  match = ukYear.match(event.m_description);
998  if (match.hasMatch())
999  {
1000  event.m_description.remove(match.capturedStart(0),
1001  match.capturedLength(0));
1002  bool ok = false;
1003  uint y = match.captured(1).toUInt(&ok);
1004  if (ok)
1005  {
1006  event.m_airdate = y;
1007  event.m_originalairdate = QDate(y, 1, 1);
1008  }
1009  }
1010 
1011  // Trim leading/trailing '.'
1012  static const QRegularExpression ukDotSpaceStart { R"(^\. )" };
1013  static const QRegularExpression ukDotEnd { R"(\.$)" };
1014  event.m_subtitle.remove(ukDotSpaceStart);
1015  if (event.m_subtitle.lastIndexOf("..") != (event.m_subtitle.length()-2))
1016  event.m_subtitle.remove(ukDotEnd);
1017 
1018  // Reverse the subtitle and empty description
1019  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())
1020  {
1021  event.m_description=event.m_subtitle;
1022  event.m_subtitle.clear();
1023  }
1024 }
1025 
1030 {
1031  /* Used for PBS ATSC Subtitles are separated by a colon */
1032  int position = event.m_description.indexOf(':');
1033  if (position != -1)
1034  {
1035  const QString stmp = event.m_description;
1036  event.m_subtitle = stmp.left(position);
1037  event.m_description = stmp.right(stmp.length() - position - 2);
1038  }
1039 }
1040 
1044 void EITFixUp::FixComHem(DBEventEIT &event, bool process_subtitle)
1045 {
1046  static const QRegularExpression comHemPersSeparator { R"((, |\soch\s))" };
1047 
1048  // Reverse what EITFixUp::Fix() did
1049  if (event.m_subtitle.isEmpty() && !event.m_description.isEmpty())
1050  {
1051  event.m_subtitle = event.m_description;
1052  event.m_description = "";
1053  }
1054 
1055  // Remove subtitle, it contains the category and we already know that
1056  event.m_subtitle = "";
1057 
1058  bool isSeries = false;
1059  // Try to find episode numbers
1060  static const QRegularExpression comHemSeries1
1061  { R"(\s?(?:[dD]el|[eE]pisode)\s([0-9]+)(?:\s?(?:/|:|av)\s?([0-9]+))?\.)" };
1062  static const QRegularExpression comHemSeries2 { R"(\s?-?\s?([Dd]el\s+([0-9]+)))" };
1063  auto match = comHemSeries1.match(event.m_description);
1064  auto match2 = comHemSeries2.match(event.m_title);
1065  if (match2.hasMatch())
1066  {
1067  event.m_partnumber = match2.capturedView(2).toUInt();
1068  event.m_title.remove(match2.capturedStart(), match2.capturedLength());
1069  }
1070  else if (match.hasMatch())
1071  {
1072  if (match.capturedStart(1) != -1)
1073  event.m_partnumber = match.capturedView(1).toUInt();
1074  if (match.capturedStart(2) != -1)
1075  event.m_parttotal = match.capturedView(2).toUInt();
1076 
1077  // Remove the episode numbers, but only if it's not at the begining
1078  // of the description (subtitle code might use it)
1079  if (match.capturedStart() > 0)
1080  event.m_description.remove(match.capturedStart(),
1081  match.capturedLength());
1082  isSeries = true;
1083  }
1084 
1085  // Add partnumber/parttotal to subtitle
1086  // This will be overwritten if we find a better subtitle
1087  if (event.m_partnumber > 0)
1088  {
1089  event.m_subtitle = QString("Del %1").arg(event.m_partnumber);
1090  if (event.m_parttotal > 0)
1091  event.m_subtitle += QString(" av %1").arg(event.m_parttotal);
1092  }
1093 
1094  // Move subtitle info from title to subtitle
1095  static const QRegularExpression comHemTSub { R"(\s+-\s+([^\-]+))" };
1096  match = comHemTSub.match(event.m_title);
1097  if (match.hasMatch())
1098  {
1099  event.m_subtitle = match.captured(1);
1100  event.m_title.remove(match.capturedStart(), match.capturedLength());
1101  }
1102 
1103  // No need to continue without a description.
1104  if (event.m_description.length() <= 0)
1105  return;
1106 
1107  // Try to find country category, year and possibly other information
1108  // from the begining of the description
1109  static const QRegularExpression comHemCountry
1110  { R"(^(\(.+\))?\s?([^ ]+)\s([^\.0-9]+)\sfrån\s([0-9]{4})(?:\smed\s([^\.]+))?\.?)" };
1111  match = comHemCountry.match(event.m_description);
1112  if (match.hasMatch())
1113  {
1114  QString replacement;
1115 
1116  // Original title, usually english title
1117  // note: list[1] contains extra () around the text that needs removing
1118  if (!match.capturedView(1).isEmpty())
1119  {
1120  replacement = match.captured(1) + " ";
1121  //store it somewhere?
1122  }
1123 
1124  // Countr(y|ies)
1125  if (!match.capturedView(2).isEmpty())
1126  {
1127  replacement += match.captured(2) + " ";
1128  //store it somewhere?
1129  }
1130 
1131  // Category
1132  if (!match.capturedView(3).isEmpty())
1133  {
1134  replacement += match.captured(3) + ".";
1135  if(event.m_category.isEmpty())
1136  {
1137  event.m_category = match.captured(3);
1138  }
1139 
1140  if(match.captured(3).indexOf("serie")!=-1)
1141  {
1142  isSeries = true;
1143  }
1144  }
1145 
1146  // Year
1147  if (!match.capturedView(4).isEmpty())
1148  {
1149  bool ok = false;
1150  uint y = match.capturedView(4).trimmed().toUInt(&ok);
1151  if (ok)
1152  event.m_airdate = y;
1153  }
1154 
1155  // Actors
1156  if (!match.capturedView(5).isEmpty())
1157  {
1158  const QStringList actors =
1159  match.captured(5).split(comHemPersSeparator, Qt::SkipEmptyParts);
1160  /* Possible TODO: if EIT inlcude the priority and/or character
1161  * names for the actors, include them in AddPerson call. */
1162  for (const auto & actor : std::as_const(actors))
1163  event.AddPerson(DBPerson::kActor, actor);
1164  }
1165 
1166  // Remove year and actors.
1167  // The reason category is left in the description is because otherwise
1168  // the country would look wierd like "Amerikansk. Rest of description."
1169  event.m_description = event.m_description.replace(match.captured(0),replacement);
1170  }
1171 
1172  if (isSeries)
1173  event.m_categoryType = ProgramInfo::kCategorySeries;
1174 
1175  // Look for additional persons in the description
1176  static const QRegularExpression comHemPersons
1177  { R"(\s?([Rr]egi|[Ss]kådespelare|[Pp]rogramledare|[Ii] rollerna):\s([^\.]+)\.)" };
1178  auto iter = comHemPersons.globalMatch(event.m_description);
1179  while (iter.hasNext())
1180  {
1181  auto pmatch = iter.next();
1183 
1184  static const QRegularExpression comHemDirector { "[Rr]egi" };
1185  static const QRegularExpression comHemActor { "[Ss]kådespelare|[Ii] rollerna" };
1186  static const QRegularExpression comHemHost { "[Pp]rogramledare" };
1187 #if QT_VERSION < QT_VERSION_CHECK(6,5,0)
1188  auto dmatch = comHemDirector.match(pmatch.capturedView(1));
1189  auto amatch = comHemActor.match(pmatch.capturedView(1));
1190  auto hmatch = comHemHost.match(pmatch.capturedView(1));
1191 #else
1192  auto dmatch = comHemDirector.matchView(pmatch.capturedView(1));
1193  auto amatch = comHemActor.matchView(pmatch.capturedView(1));
1194  auto hmatch = comHemHost.matchView(pmatch.capturedView(1));
1195 #endif
1196  if (dmatch.hasMatch())
1197  role = DBPerson::kDirector;
1198  else if (amatch.hasMatch())
1199  role = DBPerson::kActor;
1200  else if (hmatch.hasMatch())
1201  role = DBPerson::kHost;
1202  else
1203  {
1204  event.m_description.remove(pmatch.capturedStart(), pmatch.capturedLength());
1205  continue;
1206  }
1207 
1208  const QStringList actors =
1209  pmatch.captured(2).split(comHemPersSeparator, Qt::SkipEmptyParts);
1210  /* Possible TODO: if EIT inlcude the priority and/or character
1211  * names for the actors, include them in AddPerson call. */
1212  for (const auto & actor : std::as_const(actors))
1213  event.AddPerson(role, actor);
1214 
1215  // Remove it
1216  event.m_description=event.m_description.replace(pmatch.captured(0),"");
1217  }
1218 
1219  // Is this event on a channel we shoud look for a subtitle?
1220  // The subtitle is the first sentence in the description, but the
1221  // subtitle can't be the only thing in the description and it must be
1222  // shorter than 55 characters or we risk picking up the wrong thing.
1223  if (process_subtitle)
1224  {
1225  static const QRegularExpression comHemSub { R"([.\?\!] )" };
1226  int pos2 = event.m_description.indexOf(comHemSub);
1227  bool pvalid = pos2 != -1 && pos2 <= 55;
1228  if (pvalid && (event.m_description.length() - (pos2 + 2)) > 0)
1229  {
1230  event.m_subtitle = event.m_description.left(
1231  pos2 + (event.m_description[pos2] == '?' ? 1 : 0));
1232  event.m_description = event.m_description.mid(pos2 + 2);
1233  }
1234  }
1235 
1236  // Teletext subtitles?
1237  static const QRegularExpression comHemTT { "[Tt]ext-[Tt][Vv]" };
1238  if (event.m_description.indexOf(comHemTT) != -1)
1239  event.m_subtitleType |= SUB_NORMAL;
1240 
1241  // Try to findout if this is a rerun and if so the date.
1242  static const QRegularExpression comHemRerun1 { R"([Rr]epris\sfrån\s([^\.]+)(?:\.|$))" };
1243  static const QRegularExpression comHemRerun2 { R"(([0-9]+)/([0-9]+)(?:\s-\s([0-9]{4}))?)" };
1244  match = comHemRerun1.match(event.m_description);
1245  if (!match.hasMatch())
1246  return;
1247 
1248  // Rerun from today
1249  if (match.captured(1) == "i dag")
1250  {
1251  event.m_originalairdate = event.m_starttime.date();
1252  return;
1253  }
1254 
1255  // Rerun from yesterday afternoon
1256  if (match.captured(1) == "eftermiddagen")
1257  {
1258  event.m_originalairdate = event.m_starttime.date().addDays(-1);
1259  return;
1260  }
1261 
1262  // Rerun with day, month and possibly year specified
1263 #if QT_VERSION < QT_VERSION_CHECK(6,5,0)
1264  match2 = comHemRerun2.match(match.capturedView(1));
1265 #else
1266  match2 = comHemRerun2.matchView(match.capturedView(1));
1267 #endif
1268  if (match2.hasMatch())
1269  {
1270  int day = match2.capturedView(1).toInt();
1271  int month = match2.capturedView(2).toInt();
1272  //int year;
1273  //if (match2.capturedLength(3) > 0)
1274  // year = match2.capturedView(3).toInt();
1275  //else
1276  // year = event.m_starttime.date().year();
1277 
1278  if (day > 0 && month > 0)
1279  {
1280  QDate date(event.m_starttime.date().year(), month, day);
1281  // it's a rerun so it must be in the past
1282  if (date > event.m_starttime.date())
1283  date = date.addYears(-1);
1284  event.m_originalairdate = date;
1285  }
1286  return;
1287  }
1288 }
1289 
1294 {
1295  event.m_category = event.m_subtitle;
1296  /* Used for DVB-S Subtitles are separated by a colon */
1297  int position = event.m_description.indexOf(':');
1298  if (position != -1)
1299  {
1300  const QString stmp = event.m_description;
1301  event.m_subtitle = stmp.left(position);
1302  event.m_description = stmp.right(stmp.length() - position - 2);
1303  }
1304 }
1305 
1310 {
1311  if (event.m_description.startsWith("[Program data ") || event.m_description.startsWith("[Program info "))//TEN
1312  {
1313  event.m_description = "";//event.m_subtitle;
1314  }
1315  if (event.m_description.endsWith("Copyright West TV Ltd. 2011)"))
1316  event.m_description.resize(event.m_description.length()-40);
1317 
1318  if (event.m_description.isEmpty() && !event.m_subtitle.isEmpty())//due to ten's copyright info, this won't be caught before
1319  {
1320  event.m_description = event.m_subtitle;
1321  event.m_subtitle.clear();
1322  }
1323  if (event.m_description.startsWith(event.m_title+" - "))
1324  event.m_description.remove(0,event.m_title.length()+3);
1325  if (event.m_title.startsWith("LIVE: ", Qt::CaseInsensitive))
1326  {
1327  event.m_title.remove(0, 6);
1328  event.m_description.prepend("(Live) ");
1329  }
1330 }
1331 
1336 {
1337  static const QRegularExpression rating { "\\((G|PG|M|MA)\\)" };
1338  auto match = rating.match(event.m_description);
1339  if (match.hasMatch())
1340  {
1341  EventRating prograting;
1342  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1343  event.m_ratings.push_back(prograting);
1344  event.m_description.remove(0,match.capturedLength()+1);
1345  }
1346  if (event.m_description.startsWith("[HD]"))
1347  {
1348  event.m_videoProps |= VID_HDTV;
1349  event.m_description.remove(0,5);
1350  }
1351  if (event.m_description.startsWith("[CC]"))
1352  {
1353  event.m_subtitleType |= SUB_NORMAL;
1354  event.m_description.remove(0,5);
1355  }
1356  if (event.m_subtitle == "Movie")
1357  {
1358  event.m_subtitle.clear();
1359  event.m_categoryType = ProgramInfo::kCategoryMovie;
1360  }
1361  if (event.m_description.startsWith(event.m_title))
1362  event.m_description.remove(0,event.m_title.length()+1);
1363 }
1364 
1369 {
1370  if (event.m_description.endsWith(" Rpt"))
1371  {
1372  event.m_previouslyshown = true;
1373  event.m_description.resize(event.m_description.size()-4);
1374  }
1375  static const QRegularExpression year { "(\\d{4})$" };
1376  auto match = year.match(event.m_description);
1377  if (match.hasMatch())
1378  {
1379  event.m_airdate = match.capturedView(1).toUInt();
1380  event.m_description.resize(event.m_description.size()-5);
1381  }
1382  if (event.m_description.endsWith(" CC"))
1383  {
1384  event.m_subtitleType |= SUB_NORMAL;
1385  event.m_description.resize(event.m_description.size()-3);
1386  }
1387  QString advisories;//store the advisories to append later
1388  static const QRegularExpression adv { "(\\([A-Z,]+\\))$" };
1389  match = adv.match(event.m_description);
1390  if (match.hasMatch())
1391  {
1392  advisories = match.captured(1);
1393  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1394  }
1395  static const QRegularExpression rating { "(C|G|PG|M|MA)$" };
1396  match = rating.match(event.m_description);
1397  if (match.hasMatch())
1398  {
1399  EventRating prograting;
1400  prograting.m_system="AU"; prograting.m_rating = match.captured(1);
1401  if (!advisories.isEmpty())
1402  prograting.m_rating.append(" ").append(advisories);
1403  event.m_ratings.push_back(prograting);
1404  event.m_description.remove(match.capturedStart()-1, match.capturedLength()+1);
1405  }
1406 }
1411 {
1412  // If the description has been truncated to fit within the
1413  // 'subtitle' eit field, none of the following will work (ABC)
1414  if (event.m_description.endsWith(".."))
1415  return;
1416  event.m_description = event.m_description.trimmed();
1417 
1418  static const QRegularExpression auFreeviewSY { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\)$)" };
1419  auto match = auFreeviewSY.match(event.m_description);
1420  if (match.hasMatch())
1421  {
1422  if (event.m_subtitle.isEmpty())//nine sometimes has an actual subtitle field and the brackets thingo)
1423  event.m_subtitle = match.captured(2);
1424  event.m_airdate = match.capturedView(3).toUInt();
1425  event.m_description = match.captured(1);
1426  return;
1427  }
1428  static const QRegularExpression auFreeviewY { "(.*) \\(([12][0-9][0-9][0-9])\\)$" };
1429  match = auFreeviewY.match(event.m_description);
1430  if (match.hasMatch())
1431  {
1432  event.m_airdate = match.capturedView(2).toUInt();
1433  event.m_description = match.captured(1);
1434  return;
1435  }
1436  static const QRegularExpression auFreeviewSYC { R"((.*) \((.+)\) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1437  match = auFreeviewSYC.match(event.m_description);
1438  if (match.hasMatch())
1439  {
1440  if (event.m_subtitle.isEmpty())
1441  event.m_subtitle = match.captured(2);
1442  event.m_airdate = match.capturedView(3).toUInt();
1443  QStringList actors = match.captured(4).split("/");
1444  /* Possible TODO: if EIT inlcude the priority and/or character
1445  * names for the actors, include them in AddPerson call. */
1446  for (const QString& actor : std::as_const(actors))
1447  event.AddPerson(DBPerson::kActor, actor);
1448  event.m_description = match.captured(1);
1449  return;
1450  }
1451  static const QRegularExpression auFreeviewYC { R"((.*) \(([12][0-9][0-9][0-9])\) \((.+)\)$)" };
1452  match = auFreeviewYC.match(event.m_description);
1453  if (match.hasMatch())
1454  {
1455  event.m_airdate = match.capturedView(2).toUInt();
1456  QStringList actors = match.captured(3).split("/");
1457  /* Possible TODO: if EIT inlcude the priority and/or character
1458  * names for the actors, include them in AddPerson call. */
1459  for (const QString& actor : std::as_const(actors))
1460  event.AddPerson(DBPerson::kActor, actor);
1461  event.m_description = match.captured(1);
1462  }
1463 }
1464 
1469 {
1470  const uint SUBTITLE_PCT = 60; // % of description to allow subtitle to
1471  const uint lSUBTITLE_MAX_LEN = 128;// max length of subtitle field in db.
1472 
1473  // Remove subtitle, it contains category information too specific to use
1474  event.m_subtitle = QString("");
1475 
1476  // No need to continue without a description.
1477  if (event.m_description.length() <= 0)
1478  return;
1479 
1480  // Replace incomplete title if the full one is in the description
1481  static const QRegularExpression mcaIncompleteTitle { R"((.*).\.\.\.$)" };
1482  auto match = mcaIncompleteTitle.match(event.m_title);
1483  if (match.hasMatch())
1484  {
1485  static const QString mcaCompleteTitlea { "^'?(" };
1486  static const QString mcaCompleteTitleb { R"([^\.\?]+[^\'])'?[\.\?]\s+(.+))" };
1487  static const QRegularExpression mcaCompleteTitle
1488  { mcaCompleteTitlea + match.captured(1) + mcaCompleteTitleb,
1489  QRegularExpression::CaseInsensitiveOption};
1490  match = mcaCompleteTitle.match(event.m_description);
1491  if (match.hasMatch())
1492  {
1493  event.m_title = match.captured(1).trimmed();
1494  event.m_description = match.captured(2).trimmed();
1495  }
1496  }
1497 
1498  // Try to find subtitle in description
1499  static const QRegularExpression mcaSubtitle { R"(^'([^\.]+)'\.\s+(.+))" };
1500  match = mcaSubtitle.match(event.m_description);
1501  if (match.hasMatch())
1502  {
1503  uint matchLen = match.capturedLength(1);
1504 #if QT_VERSION < QT_VERSION_CHECK(6,0,0)
1505  uint evDescLen = std::max(event.m_description.length(), 1);
1506 #else
1507  uint evDescLen = std::max(event.m_description.length(), 1LL);
1508 #endif
1509 
1510  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1511  ((matchLen * 100 / evDescLen) < SUBTITLE_PCT))
1512  {
1513  event.m_subtitle = match.captured(1);
1514  event.m_description = match.captured(2);
1515  }
1516  }
1517 
1518  // Try to find episode numbers in subtitle
1519  static const QRegularExpression mcaSeries { R"(^S?(\d+)\/E?(\d+)\s-\s(.*)$)" };
1520  match = mcaSeries.match(event.m_subtitle);
1521  if (match.hasMatch())
1522  {
1523  uint season = match.capturedView(1).toUInt();
1524  uint episode = match.capturedView(2).toUInt();
1525  event.m_subtitle = match.captured(3).trimmed();
1526  event.m_syndicatedepisodenumber =
1527  QString("S%1E%2").arg(season).arg(episode);
1528  event.m_season = season;
1529  event.m_episode = episode;
1530  event.m_categoryType = ProgramInfo::kCategorySeries;
1531  }
1532 
1533  // Closed captioned?
1534  static const QRegularExpression mcaCC { R"(,?\s(HI|English) Subtitles\.?)" };
1535  int position = event.m_description.indexOf(mcaCC);
1536  if (position > 0)
1537  {
1538  event.m_subtitleType |= SUB_HARDHEAR;
1539  event.m_description.remove(mcaCC);
1540  }
1541 
1542  // Dolby Digital 5.1?
1543  static const QRegularExpression mcaDD { R"(,?\sDD\.?)" };
1544  position = event.m_description.indexOf(mcaDD);
1545  if ((position > 0) && (position > event.m_description.length() - 7))
1546  {
1547  event.m_audioProps |= AUD_DOLBY;
1548  event.m_description.remove(mcaDD);
1549  }
1550 
1551  // Remove bouquet tags
1552  static const QRegularExpression mcaAvail { R"(\s(Only available on [^\.]*bouquet|Not available in RSA [^\.]*)\.?)" };
1553  event.m_description.remove(mcaAvail);
1554 
1555  // Try to find year and director from the end of the description
1556  bool isMovie = false;
1557  static const QRegularExpression mcaCredits { R"((.*)\s\((\d{4})\)\s*([^\.]+)\.?\s*$)" };
1558  match = mcaCredits.match(event.m_description);
1559  if (match.hasMatch())
1560  {
1561  isMovie = true;
1562  event.m_description = match.captured(1).trimmed();
1563  bool ok = false;
1564  uint y = match.captured(2).trimmed().toUInt(&ok);
1565  if (ok)
1566  event.m_airdate = y;
1567  event.AddPerson(DBPerson::kDirector, match.captured(3).trimmed());
1568  }
1569  else
1570  {
1571  // Try to find year only from the end of the description
1572  static const QRegularExpression mcaYear { R"((.*)\s\((\d{4})\)\s*$)" };
1573  match = mcaYear.match(event.m_description);
1574  if (match.hasMatch())
1575  {
1576  isMovie = true;
1577  event.m_description = match.captured(1).trimmed();
1578  bool ok = false;
1579  uint y = match.captured(2).trimmed().toUInt(&ok);
1580  if (ok)
1581  event.m_airdate = y;
1582  }
1583  }
1584 
1585  if (isMovie)
1586  {
1587  static const QRegularExpression mcaActors { R"((.*\.)\s+([^\.]+\s[A-Z][^\.]+)\.\s*)" };
1588  match = mcaActors.match(event.m_description);
1589  if (match.hasMatch())
1590  {
1591  static const QRegularExpression mcaActorsSeparator { "(,\\s+)" };
1592  const QStringList actors = match.captured(2).split(
1593  mcaActorsSeparator, Qt::SkipEmptyParts);
1594  /* Possible TODO: if EIT inlcude the priority and/or character
1595  * names for the actors, include them in AddPerson call. */
1596  for (const auto & actor : std::as_const(actors))
1597  event.AddPerson(DBPerson::kActor, actor.trimmed());
1598  event.m_description = match.captured(1).trimmed();
1599  }
1600  event.m_categoryType = ProgramInfo::kCategoryMovie;
1601  }
1602 }
1603 
1608 {
1609  // subtitle with episode number: "Folge *: 'subtitle'
1610  static const QRegularExpression superRTLSubtitle { R"(^Folge\s(\d{1,3}):\s'(.*)')" };
1611  auto match = superRTLSubtitle.match(event.m_subtitle);
1612  if (match.hasMatch())
1613  {
1614  event.m_season = 0;
1615  event.m_episode = match.capturedView(1).toUInt();
1616  event.m_subtitle = match.captured(2);
1617  }
1618 
1619  // No need to continue without a description or with an subtitle.
1620  if (event.m_description.length() <= 0 || event.m_subtitle.length() > 0)
1621  return;
1622 
1623  // Repeat
1624  static const QRegularExpression rtlRepeat
1625  { R"([\s\(]?Wiederholung.+vo[m|n].+(\d{2}\.\d{2}\.\d{4}|\d{2}[:\.]\d{2}\sUhr)\)?)" };
1626  match = rtlRepeat.match(event.m_description);
1627  if (match.hasMatch())
1628  {
1629  // remove '.' if it matches at the beginning of the description
1630  int pos = match.capturedStart(0);
1631  int length = match.capturedLength(0) + (pos ? 0 : 1);
1632  event.m_description = event.m_description.remove(pos, length).trimmed();
1633  }
1634 
1635  // should be (?:\x{8a}|\\.\\s*|$) but 0x8A gets replaced with 0x20
1636  static const QRegularExpression rtlSubtitle1 { R"(^Folge\s(\d{1,4})\s*:\s+'(.*)'(?:\s|\.\s*|$))" };
1637  static const QRegularExpression rtlSubtitle2 { R"(^Folge\s(\d{1,4})\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1638  static const QRegularExpression rtlSubtitle3 { R"(^(?:Folge\s)?(\d{1,4}(?:\/[IVX]+)?)\s+(.{0,5}[^\?!\.]{0,120})[\?!\.]\s*)" };
1639  static const QRegularExpression rtlSubtitle4 { R"(^Thema.{0,5}:\s([^\.]+)\.\s*)" };
1640  static const QRegularExpression rtlSubtitle5 { "^'(.+)'\\.\\s*" };
1641  static const QRegularExpression rtlEpisodeNo1 { R"(^(Folge\s\d{1,4})\.*\s*)" };
1642  static const QRegularExpression rtlEpisodeNo2 { R"(^(\d{1,2}\/[IVX]+)\.*\s*)" };
1643 
1644  auto match1 = rtlSubtitle1.match(event.m_description);
1645  auto match2 = rtlSubtitle2.match(event.m_description);
1646  auto match3 = rtlSubtitle3.match(event.m_description);
1647  auto match4 = rtlSubtitle4.match(event.m_description);
1648  auto match5 = rtlSubtitle5.match(event.m_description);
1649  auto match6 = rtlEpisodeNo1.match(event.m_description);
1650  auto match7 = rtlEpisodeNo2.match(event.m_description);
1651 
1652  // subtitle with episode number: "Folge *: 'subtitle'. description
1653  if (match1.hasMatch())
1654  {
1655  event.m_syndicatedepisodenumber = match1.captured(1);
1656  event.m_subtitle = match1.captured(2);
1657  event.m_description =
1658  event.m_description.remove(0, match1.capturedLength());
1659  }
1660  // episode number subtitle
1661  else if (match2.hasMatch())
1662  {
1663  event.m_syndicatedepisodenumber = match2.captured(1);
1664  event.m_subtitle = match2.captured(2);
1665  event.m_description =
1666  event.m_description.remove(0, match2.capturedLength());
1667  }
1668  // episode number subtitle
1669  else if (match3.hasMatch())
1670  {
1671  event.m_syndicatedepisodenumber = match3.captured(1);
1672  event.m_subtitle = match3.captured(2);
1673  event.m_description =
1674  event.m_description.remove(0, match3.capturedLength());
1675  }
1676  // "Thema..."
1677  else if (match4.hasMatch())
1678  {
1679  event.m_subtitle = match4.captured(1);
1680  event.m_description =
1681  event.m_description.remove(0, match4.capturedLength());
1682  }
1683  // "'...'"
1684  else if (match5.hasMatch())
1685  {
1686  event.m_subtitle = match5.captured(1);
1687  event.m_description =
1688  event.m_description.remove(0, match5.capturedLength());
1689  }
1690  // episode number
1691  else if (match6.hasMatch())
1692  {
1693  event.m_syndicatedepisodenumber = match6.captured(2);
1694  event.m_subtitle = match6.captured(1);
1695  event.m_description =
1696  event.m_description.remove(0, match6.capturedLength());
1697  }
1698  // episode number
1699  else if (match7.hasMatch())
1700  {
1701  event.m_syndicatedepisodenumber = match7.captured(2);
1702  event.m_subtitle = match7.captured(1);
1703  event.m_description =
1704  event.m_description.remove(0, match7.capturedLength());
1705  }
1706 
1707  /* got an episode title now? (we did not have one at the start of this function) */
1708  if (!event.m_subtitle.isEmpty())
1710 
1711  /* if we do not have an episode title by now try some guessing as last resort */
1712  if (event.m_subtitle.length() == 0)
1713  {
1714  const uint SUBTITLE_PCT = 35; // % of description to allow subtitle up to
1715  const uint lSUBTITLE_MAX_LEN = 50; // max length of subtitle field in db
1716 
1717  static const QRegularExpression rtlSubtitle { R"(^([^\.]{3,})\.\s+(.+))" };
1718  match = rtlSubtitle.match(event.m_description);
1719  if (match.hasMatch())
1720  {
1721  uint matchLen = match.capturedLength(1);
1722 #if QT_VERSION < QT_VERSION_CHECK(6,0,0)
1723  uint evDescLen = std::max(event.m_description.length(), 1);
1724 #else
1725  uint evDescLen = std::max(event.m_description.length(), 1LL);
1726 #endif
1727 
1728  if ((matchLen < lSUBTITLE_MAX_LEN) &&
1729  (matchLen * 100 / evDescLen < SUBTITLE_PCT))
1730  {
1731  event.m_subtitle = match.captured(1);
1732  event.m_description = match.captured(2);
1733  }
1734  }
1735  }
1736 }
1737 
1738 // FIXME add more jobs
1739 static const QMap<QString,DBPerson::Role> deCrewTitle {
1740  { "Regie", DBPerson::kDirector },
1741  { "Drehbuch", DBPerson::kWriter },
1742  { "Autor", DBPerson::kWriter },
1743 };
1744 
1749 {
1750  // strip repeat info and set previouslyshown flag
1751  static const QRegularExpression pro7Repeat
1752  { R"((?<=\s|^)\(WH vom \w+, \d{2}\.\d{2}\.\d{4}, \d{2}:\d{2} Uhr\)$)" };
1753  auto match = pro7Repeat.match(event.m_subtitle);
1754  if (match.hasMatch())
1755  {
1756  event.m_previouslyshown = true;
1757  event.m_subtitle.remove(match.capturedStart(0),
1758  match.capturedLength(0));
1759  event.m_subtitle = event.m_subtitle.trimmed();
1760  }
1761 
1762  // strip "Mit Gebärdensprache (Online-Stream)"
1763  static const QRegularExpression pro7signLanguage
1764  { R"((?<=\s|^)Mit Gebärdensprache \(Online\-Stream\)$)" };
1765  match = pro7signLanguage.match(event.m_subtitle);
1766  if (match.hasMatch())
1767  {
1768  event.m_subtitle.remove(match.capturedStart(0),
1769  match.capturedLength(0));
1770  event.m_subtitle = event.m_subtitle.trimmed();
1771  }
1772 
1773  // move age ratings into metadata
1774  static const QRegularExpression pro7ratingAllAges
1775  { R"((?<=\s|^)Altersfreigabe: Ohne Altersbeschränkung$)" };
1776  match = pro7ratingAllAges.match(event.m_subtitle);
1777  if (match.hasMatch())
1778  {
1779  EventRating prograting;
1780  prograting.m_system="DE";
1781  prograting.m_rating = "0";
1782  event.m_ratings.push_back(prograting);
1783 
1784  event.m_subtitle.remove(match.capturedStart(0),
1785  match.capturedLength(0));
1786  event.m_subtitle = event.m_subtitle.trimmed();
1787  }
1788  static const QRegularExpression pro7rating
1789  { R"((?<=\s|^)Altersfreigabe: ab (\d+)$)" };
1790  match = pro7rating.match(event.m_subtitle);
1791  if (match.hasMatch())
1792  {
1793  EventRating prograting;
1794  prograting.m_system="DE";
1795  prograting.m_rating = match.captured(1);
1796  event.m_ratings.push_back(prograting);
1797 
1798  event.m_subtitle.remove(match.capturedStart(0),
1799  match.capturedLength(0));
1800  event.m_subtitle = event.m_subtitle.trimmed();
1801  }
1802 
1803  // move category and (original) airdate into metadata, add country and airdate to description
1804  static const QRegularExpression pro7CategoryOriginalairdate
1805  { R"((?<=\s|^)(Late Night Show|Live Shopping|Real Crime|Real Life Doku|Romantic Comedy|Scripted Reality|\S+), ([A-Z]+(?:\/[A-Z]+)*) (\d{4})$)" };
1806  match = pro7CategoryOriginalairdate.match(event.m_subtitle);
1807  if (match.hasMatch())
1808  {
1809  event.m_category = match.captured(1);
1810 
1811  event.m_description.append(" (").append(match.captured(2)).append(" ").append(match.captured(3)).append(")");
1812 
1813  uint y = match.captured(3).toUInt();
1814  event.m_originalairdate = QDate(y, 1, 1);
1815  if (event.m_airdate == 0)
1816  {
1817  event.m_airdate = y;
1818  }
1819 
1820  event.m_subtitle.remove(match.capturedStart(0),
1821  match.capturedLength(0));
1822  event.m_subtitle = event.m_subtitle.trimmed();
1823  }
1824 
1825  // remove subtitle if equal to title
1826  if (event.m_title == event.m_subtitle) {
1827  event.m_subtitle = "";
1828  }
1829 }
1830 
1835 {
1836  static const QRegularExpression deDisneyChannelSubtitle { R"(,([^,]+?)\s{0,1}(\d{4})$)" };
1837  auto match = deDisneyChannelSubtitle.match(event.m_subtitle);
1838  if (match.hasMatch())
1839  {
1840  if (event.m_airdate == 0)
1841  {
1842  event.m_airdate = match.captured(3).toUInt();
1843  }
1844  event.m_subtitle.remove(match.capturedStart(0),
1845  match.capturedLength(0));
1846  }
1847  static const QRegularExpression tmp { R"(\s[^\s]+?-(Serie))" };
1848  match = tmp.match(event.m_subtitle);
1849  if (match.hasMatch())
1850  {
1851  event.m_categoryType = ProgramInfo::kCategorySeries;
1852  event.m_category=match.captured(0).trimmed();
1853  event.m_subtitle.remove(match.capturedStart(0),
1854  match.capturedLength(0));
1855  }
1856 }
1857 
1862 {
1863  static const QRegularExpression atvSubtitle { R"(,{0,1}\sFolge\s(\d{1,3})$)" };
1864  event.m_subtitle.replace(atvSubtitle, "");
1865 }
1866 
1867 
1872 {
1873  static const QRegularExpression fiRerun { R"(\s?Uusinta[a-zA-Z\s]*\.?)" };
1874  auto match = fiRerun.match(event.m_description);
1875  if (match.hasMatch())
1876  {
1877  event.m_previouslyshown = true;
1878  event.m_description.remove(match.capturedStart(), match.capturedLength());
1879  }
1880 
1881  static const QRegularExpression fiRerun2 { R"(\([Uu]\))" };
1882  match = fiRerun2.match(event.m_description);
1883  if (match.hasMatch())
1884  {
1885  event.m_previouslyshown = true;
1886  event.m_description.remove(match.capturedStart(), match.capturedLength());
1887  }
1888 
1889  // Check for (Stereo) in the decription and set the <audio> tags
1890  match = kStereo.match(event.m_description);
1891  if (match.hasMatch())
1892  {
1893  event.m_audioProps |= AUD_STEREO;
1894  event.m_description.remove(match.capturedStart(), match.capturedLength());
1895  }
1896 
1897  // Remove age limit in parenthesis at end of title
1898  static const QRegularExpression fiAgeLimit { R"(\((\d{1,2}|[ST])\)$)" };
1899  match = fiAgeLimit.match(event.m_title);
1900  if (match.hasMatch())
1901  {
1902  EventRating prograting;
1903  prograting.m_system="FI"; prograting.m_rating = match.captured(1);
1904  event.m_ratings.push_back(prograting);
1905  event.m_title.remove(match.capturedStart(), match.capturedLength());
1906  }
1907 
1908  // Remove Film or Elokuva at start of title
1909  static const QRegularExpression fiFilm { "^(Film|Elokuva): " };
1910  match = fiFilm.match(event.m_title);
1911  if (match.hasMatch())
1912  {
1913  event.m_category = "Film";
1914  event.m_categoryType = ProgramInfo::kCategoryMovie;
1915  event.m_title.remove(match.capturedStart(), match.capturedLength());
1916  }
1917 }
1918 
1924 {
1925  QString country = "";
1926 
1927  static const QRegularExpression dePremiereLength { R"(\s?[0-9]+\sMin\.)" };
1928  event.m_description = event.m_description.replace(dePremiereLength, "");
1929 
1930  static const QRegularExpression dePremiereAirdate { R"(\s?([^\s^\.]+)\s((?:1|2)[0-9]{3})\.)" };
1931  auto match = dePremiereAirdate.match(event.m_description);
1932  if ( match.hasMatch())
1933  {
1934  country = match.captured(1).trimmed();
1935  bool ok = false;
1936  uint y = match.captured(2).toUInt(&ok);
1937  if (ok)
1938  event.m_airdate = y;
1939  event.m_description.remove(match.capturedStart(0),
1940  match.capturedLength(0));
1941  }
1942 
1943  static const QRegularExpression dePremiereCredits { R"(\sVon\s([^,]+)(?:,|\su\.\sa\.)\smit\s([^\.]*)\.)" };
1944  match = dePremiereCredits.match(event.m_description);
1945  if (match.hasMatch())
1946  {
1947  event.AddPerson(DBPerson::kDirector, match.captured(1));
1948  const QStringList actors = match.captured(2).split(
1949  ", ", Qt::SkipEmptyParts);
1950  /* Possible TODO: if EIT inlcude the priority and/or character
1951  * names for the actors, include them in AddPerson call. */
1952  for (const auto & actor : std::as_const(actors))
1953  event.AddPerson(DBPerson::kActor, actor);
1954  event.m_description.remove(match.capturedStart(0),
1955  match.capturedLength(0));
1956  }
1957 
1958  event.m_description = event.m_description.replace("\u000A$", "");
1959  event.m_description = event.m_description.replace("\u000A", " ");
1960 
1961  // move the original titel from the title to subtitle
1962  static const QRegularExpression dePremiereOTitle { R"(\s*\(([^\)]*)\)$)" };
1963  match = dePremiereOTitle.match(event.m_title);
1964  if (match.hasMatch())
1965  {
1966  event.m_subtitle = QString("%1, %2").arg(match.captured(1), country);
1967  event.m_title.remove(match.capturedStart(0),
1968  match.capturedLength(0));
1969  }
1970 
1971  // Find infos about season and episode number
1972  static const QRegularExpression deSkyDescriptionSeasonEpisode { R"(^(\d{1,2}).\sStaffel,\sFolge\s(\d{1,2}):\s)" };
1973  match = deSkyDescriptionSeasonEpisode.match(event.m_description);
1974  if (match.hasMatch())
1975  {
1976  event.m_season = match.captured(1).trimmed().toUInt();
1977  event.m_episode = match.captured(2).trimmed().toUInt();
1978  event.m_description.remove(match.capturedStart(0),
1979  match.capturedLength(0));
1980  }
1981 }
1982 
1983 /*
1984  * Mapping table from English category names to Dutch names and types
1985  */
1986 struct NLMapResult {
1987  QString name;
1989 };
1990 static const QMap<QString, NLMapResult> categoryTrans = {
1991  { "Documentary", { "Documentaire", ProgramInfo::kCategoryNone } },
1992  { "News", { "Nieuws/actualiteiten", ProgramInfo::kCategoryNone } },
1993  { "Kids", { "Jeugd", ProgramInfo::kCategoryNone } },
1994  { "Show/game Show", { "Amusement", ProgramInfo::kCategoryTVShow } },
1995  { "Music/Ballet/Dance", { "Muziek", ProgramInfo::kCategoryNone } },
1996  { "News magazine", { "Informatief", ProgramInfo::kCategoryNone } },
1997  { "Movie", { "Film", ProgramInfo::kCategoryMovie } },
1998  { "Nature/animals/Environment", { "Natuur", ProgramInfo::kCategoryNone } },
1999  { "Movie - Adult", { "Erotiek", ProgramInfo::kCategoryNone } },
2000  { "Movie - Soap/melodrama/folkloric",
2001  { "Serie/soap", ProgramInfo::kCategorySeries } },
2002  { "Arts/Culture", { "Kunst/Cultuur", ProgramInfo::kCategoryNone } },
2003  { "Sports", { "Sport", ProgramInfo::kCategorySports } },
2004  { "Cartoons/Puppets", { "Animatie", ProgramInfo::kCategoryNone } },
2005  { "Movie - Comedy", { "Comedy", ProgramInfo::kCategorySeries } },
2006  { "Movie - Detective/Thriller", { "Misdaad", ProgramInfo::kCategoryNone } },
2007  { "Social/Spiritual Sciences", { "Religieus", ProgramInfo::kCategoryNone } },
2008 };
2009 
2014 {
2015  QString fullinfo = event.m_subtitle + event.m_description;
2016  event.m_subtitle = "";
2017 
2018  // Convert categories to Dutch categories Myth knows.
2019  // nog invoegen: comedy, sport, misdaad
2020 
2021  if (categoryTrans.contains(event.m_category))
2022  {
2023  auto [name, type] = categoryTrans[event.m_category];
2024  event.m_category = name;
2025  event.m_categoryType = type;
2026  }
2027 
2028  // Film - categories are usually not Films
2029  if (event.m_category.startsWith("Film -"))
2030  event.m_categoryType = ProgramInfo::kCategorySeries;
2031 
2032  // Get stereo info
2033  auto match = kStereo.match(fullinfo);
2034  if (match.hasMatch())
2035  {
2036  event.m_audioProps |= AUD_STEREO;
2037  fullinfo.remove(match.capturedStart(), match.capturedLength());
2038  }
2039 
2040  //Get widescreen info
2041  static const QRegularExpression nlWide { "breedbeeld" };
2042  match = nlWide.match(fullinfo);
2043  if (match.hasMatch())
2044  {
2045  event.m_videoProps |= VID_WIDESCREEN;
2046  fullinfo = fullinfo.replace("breedbeeld", ".");
2047  }
2048 
2049  // Get repeat info
2050  static const QRegularExpression nlRepeat { "herh." };
2051  match = nlRepeat.match(fullinfo);
2052  if (match.hasMatch())
2053  fullinfo = fullinfo.replace("herh.", ".");
2054 
2055  // Get teletext subtitle info
2056  static const QRegularExpression nlTxt { "txt" };
2057  match = nlTxt.match(fullinfo);
2058  if (match.hasMatch())
2059  {
2060  event.m_subtitleType |= SUB_NORMAL;
2061  fullinfo = fullinfo.replace("txt", ".");
2062  }
2063 
2064  // Get HDTV information
2065  static const QRegularExpression nlHD { R"(\sHD$)" };
2066  match = nlHD.match(event.m_title);
2067  if (match.hasMatch())
2068  {
2069  event.m_videoProps |= VID_HDTV;
2070  event.m_title.remove(match.capturedStart(), match.capturedLength());
2071  }
2072 
2073  // Try to make subtitle from Afl.:
2074  static const QRegularExpression nlSub { R"(\sAfl\.:\s([^\.]+)\.)" };
2075  match = nlSub.match(fullinfo);
2076  if (match.hasMatch())
2077  {
2078  QString tmpSubString = match.captured(0);
2079  tmpSubString = tmpSubString.right(match.capturedLength() - 7);
2080  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2081  fullinfo.remove(match.capturedStart(), match.capturedLength());
2082  }
2083 
2084  // Try to make subtitle from " "
2085  static const QRegularExpression nlSub2 { R"(\s\"([^\"]+)\")" };
2086  match = nlSub2.match(fullinfo);
2087  if (match.hasMatch())
2088  {
2089  QString tmpSubString = match.captured(0);
2090  tmpSubString = tmpSubString.right(match.capturedLength() - 2);
2091  event.m_subtitle = tmpSubString.left(tmpSubString.length() -1);
2092  fullinfo.remove(match.capturedStart(), match.capturedLength());
2093  }
2094 
2095 
2096  // This is trying to catch the case where the subtitle is in the main title
2097  // but avoid cases where it isn't a subtitle e.g cd:uk
2098  int position = event.m_title.indexOf(":");
2099  if ((position != -1) &&
2100  (event.m_title[position + 1].toUpper() == event.m_title[position + 1]) &&
2101  (event.m_subtitle.isEmpty()))
2102  {
2103  event.m_subtitle = event.m_title.mid(position + 1);
2104  event.m_title = event.m_title.left(position);
2105  }
2106 
2107 
2108  // Get the actors
2109  static const QRegularExpression nlActors { R"(\sMet:\s.+e\.a\.)" };
2110  static const QRegularExpression nlPersSeparator { R"((, |\sen\s))" };
2111  match = nlActors.match(fullinfo);
2112  if (match.hasMatch())
2113  {
2114  QString tmpActorsString = match.captured(0);
2115  tmpActorsString = tmpActorsString.right(tmpActorsString.length() - 6);
2116  tmpActorsString = tmpActorsString.left(tmpActorsString.length() - 5);
2117  const QStringList actors =
2118  tmpActorsString.split(nlPersSeparator, Qt::SkipEmptyParts);
2119  /* Possible TODO: if EIT inlcude the priority and/or character
2120  * names for the actors, include them in AddPerson call. */
2121  for (const auto & actor : std::as_const(actors))
2122  event.AddPerson(DBPerson::kActor, actor);
2123  fullinfo.remove(match.capturedStart(), match.capturedLength());
2124  }
2125 
2126  // Try to find presenter
2127  static const QRegularExpression nlPres { R"(\sPresentatie:\s([^\.]+)\.)" };
2128  match = nlPres.match(fullinfo);
2129  if (match.hasMatch())
2130  {
2131  QString tmpPresString = match.captured(0);
2132  tmpPresString = tmpPresString.right(tmpPresString.length() - 14);
2133  tmpPresString = tmpPresString.left(tmpPresString.length() -1);
2134  const QStringList presenters =
2135  tmpPresString.split(nlPersSeparator, Qt::SkipEmptyParts);
2136  for (const auto & presenter : std::as_const(presenters))
2137  event.AddPerson(DBPerson::kPresenter, presenter);
2138  fullinfo.remove(match.capturedStart(), match.capturedLength());
2139  }
2140 
2141  // Try to find year
2142  static const QRegularExpression nlYear1 { R"(\suit\s([1-2][0-9]{3}))" };
2143  static const QRegularExpression nlYear2 { R"((\s\([A-Z]{0,3}/?)([1-2][0-9]{3})\))",
2144  QRegularExpression::CaseInsensitiveOption };
2145  match = nlYear1.match(fullinfo);
2146  if (match.hasMatch())
2147  {
2148  bool ok = false;
2149  uint y = match.capturedView(1).toUInt(&ok);
2150  if (ok)
2151  event.m_originalairdate = QDate(y, 1, 1);
2152  }
2153 
2154  match = nlYear2.match(fullinfo);
2155  if (match.hasMatch())
2156  {
2157  bool ok = false;
2158  uint y = match.capturedView(2).toUInt(&ok);
2159  if (ok)
2160  event.m_originalairdate = QDate(y, 1, 1);
2161  }
2162 
2163  // Try to find director
2164  static const QRegularExpression nlDirector { R"(\svan\s(([A-Z][a-z]+\s)|([A-Z]\.\s)))" };
2165  match = nlDirector.match(fullinfo);
2166  if (match.hasMatch())
2167  event.AddPerson(DBPerson::kDirector, match.captured(1));
2168 
2169  // Strip leftovers
2170  static const QRegularExpression nlRub { R"(\s?\(\W+\)\s?)" };
2171  fullinfo.remove(nlRub);
2172 
2173  // Strip category info from description
2174  static const QRegularExpression nlCat { "^(Amusement|Muziek|Informatief|Nieuws/actualiteiten|Jeugd|Animatie|Sport|Serie/soap|Kunst/Cultuur|Documentaire|Film|Natuur|Erotiek|Comedy|Misdaad|Religieus)\\.\\s" };
2175  fullinfo.remove(nlCat);
2176 
2177  // Remove omroep from title
2178  static const QRegularExpression nlOmroep { R"(\s\(([A-Z]+/?)+\)$)" };
2179  event.m_title.remove(nlOmroep);
2180 
2181  // Put information back in description
2182 
2183  event.m_description = fullinfo;
2184 }
2185 
2187 {
2188  // remove category movie from short events
2190  event.m_starttime.secsTo(event.m_endtime) < kMinMovieDuration)
2191  {
2192  /* default taken from ContentDescriptor::GetMythCategory */
2193  event.m_categoryType = ProgramInfo::kCategoryTVShow;
2194  }
2195 }
2196 
2201 {
2202  // Check for "title (R)" in the title
2203  static const QRegularExpression noRerun { "\\(R\\)" };
2204  auto match = noRerun.match(event.m_title);
2205  if (match.hasMatch())
2206  {
2207  event.m_previouslyshown = true;
2208  event.m_title.remove(match.capturedStart(), match.capturedLength());
2209  }
2210  // Check for "subtitle (HD)" in the subtitle
2211  static const QRegularExpression noHD { R"([\(\[]HD[\)\]])" };
2212  match = noHD.match(event.m_subtitle);
2213  if (match.hasMatch())
2214  {
2215  event.m_videoProps |= VID_HDTV;
2216  event.m_subtitle.remove(match.capturedStart(), match.capturedLength());
2217  }
2218  // Check for "description (HD)" in the description
2219  match = noHD.match(event.m_description);
2220  if (match.hasMatch())
2221  {
2222  event.m_videoProps |= VID_HDTV;
2223  event.m_description.remove(match.capturedStart(), match.capturedLength());
2224  }
2225 }
2226 
2231 {
2232  // Check for "title (R)" in the title
2233  static const QRegularExpression noRerun { "\\(R\\)" };
2234  auto match = noRerun.match(event.m_title);
2235  if (match.hasMatch())
2236  {
2237  event.m_previouslyshown = true;
2238  event.m_title.remove(match.capturedStart(), match.capturedLength());
2239  }
2240  // Check for "(R)" in the description
2241  match = noRerun.match(event.m_description);
2242  if (match.hasMatch())
2243  {
2244  event.m_previouslyshown = true;
2245  }
2246 
2247  // Move colon separated category from program-titles into description
2248  // Have seen "NRK2s historiekveld: Film: bla-bla"
2249  static const QRegularExpression noNRKCategories
2250  { "^(Superstrek[ea]r|Supersomm[ea]r|Superjul|Barne-tv|Fantorangen|Kuraffen|Supermorg[eo]n|Julemorg[eo]n|Sommermorg[eo]n|"
2251  "Kuraffen-TV|Sport i dag|NRKs sportsl.rdag|NRKs sportss.ndag|Dagens dokumentar|"
2252  "NRK2s historiekveld|Detektimen|Nattkino|Filmklassiker|Film|Kortfilm|P.skemorg[eo]n|"
2253  "Radioteatret|Opera|P2-Akademiet|Nyhetsmorg[eo]n i P2 og Alltid Nyheter:): (.+)" };
2254  match = noNRKCategories.match(event.m_title);
2255  if (match.hasMatch() && (match.capturedLength(2) > 1))
2256  {
2257  event.m_title = match.captured(2);
2258  event.m_description = "(" + match.captured(1) + ") " + event.m_description;
2259  }
2260 
2261  // Remove season premiere markings
2262  static const QRegularExpression noPremiere { "\\s+-\\s+(Sesongpremiere|Premiere|premiere)!?$" };
2263  match = noPremiere.match(event.m_title);
2264  if (match.hasMatch() && (match.capturedStart() >= 3))
2265  event.m_title.remove(match.capturedStart(), match.capturedLength());
2266 
2267  // Try to find colon-delimited subtitle in title, only tested for NRK channels
2268  if (!event.m_title.startsWith("CSI:") &&
2269  !event.m_title.startsWith("CD:") &&
2270  !event.m_title.startsWith("Distriktsnyheter: fra"))
2271  {
2272  static const QRegularExpression noColonSubtitle { "^([^:]+): (.+)" };
2273  match = noColonSubtitle.match(event.m_title);
2274  if (match.hasMatch())
2275  {
2276  if (event.m_subtitle.length() <= 0)
2277  {
2278  event.m_title = match.captured(1);
2279  event.m_subtitle = match.captured(2);
2280  }
2281  else if (event.m_subtitle == match.captured(2))
2282  {
2283  event.m_title = match.captured(1);
2284  }
2285  }
2286  }
2287 }
2288 
2293 {
2294  // Source: YouSee Rules of Operation v1.16
2295  // url: http://yousee.dk/~/media/pdf/CPE/Rules_Operation.ashx
2296  int episode = -1;
2297  int season = -1;
2298 
2299  // Title search
2300  // episode and part/part total
2301  static const QRegularExpression dkEpisode { R"(\(([0-9]+)\))" };
2302  auto match = dkEpisode.match(event.m_title);
2303  if (match.hasMatch())
2304  {
2305  episode = match.capturedView(1).toInt();
2306  event.m_partnumber = match.capturedView(1).toInt();
2307  event.m_title.remove(match.capturedStart(), match.capturedLength());
2308  }
2309 
2310  static const QRegularExpression dkPart { R"(\(([0-9]+):([0-9]+)\))" };
2311  match = dkPart.match(event.m_title);
2312  if (match.hasMatch())
2313  {
2314  episode = match.capturedView(1).toInt();
2315  event.m_partnumber = match.capturedView(1).toInt();
2316  event.m_parttotal = match.capturedView(2).toInt();
2317  event.m_title.remove(match.capturedStart(), match.capturedLength());
2318  }
2319 
2320  // subtitle delimiters
2321  static const QRegularExpression dkSubtitle1 { "^([^:]+): (.+)" };
2322  match = dkSubtitle1.match(event.m_title);
2323  if (match.hasMatch())
2324  {
2325  event.m_title = match.captured(1);
2326  event.m_subtitle = match.captured(2);
2327  }
2328  else
2329  {
2330  static const QRegularExpression dkSubtitle2 { "^([^:]+) - (.+)" };
2331  match = dkSubtitle2.match(event.m_title);
2332  if (match.hasMatch())
2333  {
2334  event.m_title = match.captured(1);
2335  event.m_subtitle = match.captured(2);
2336  }
2337  }
2338 
2339  // Description search
2340  // Season (Sæson [:digit:]+.) => episode = season episode number
2341  // or year (- år [:digit:]+(\\)|:) ) => episode = total episode number
2342  static const QRegularExpression dkSeason1 { "Sæson ([0-9]+)\\." };
2343  match = dkSeason1.match(event.m_description);
2344  if (match.hasMatch())
2345  {
2346  season = match.capturedView(1).toInt();
2347  }
2348  else
2349  {
2350  static const QRegularExpression dkSeason2 { "- år ([0-9]+) :" };
2351  match = dkSeason2.match(event.m_description);
2352  if (match.hasMatch())
2353  {
2354  season = match.capturedView(1).toInt();
2355  }
2356  }
2357 
2358  if (episode > 0)
2359  event.m_episode = episode;
2360 
2361  if (season > 0)
2362  event.m_season = season;
2363 
2364  //Feature:
2365  static const QRegularExpression dkFeatures { "Features:(.+)" };
2366  match = dkFeatures.match(event.m_description);
2367  if (match.hasMatch())
2368  {
2369  QString features = match.captured(1);
2370  event.m_description.remove(match.capturedStart(),
2371  match.capturedLength());
2372  // 16:9
2373  static const QRegularExpression dkWidescreen { " 16:9" };
2374  if (features.indexOf(dkWidescreen) != -1)
2375  event.m_videoProps |= VID_WIDESCREEN;
2376  // HDTV
2377  static const QRegularExpression dkHD { " HD" };
2378  if (features.indexOf(dkHD) != -1)
2379  event.m_videoProps |= VID_HDTV;
2380  // Dolby Digital surround
2381  static const QRegularExpression dkDolby { " 5:1" };
2382  if (features.indexOf(dkDolby) != -1)
2383  event.m_audioProps |= AUD_DOLBY;
2384  // surround
2385  static const QRegularExpression dkSurround { R"( \(\(S\)\))" };
2386  if (features.indexOf(dkSurround) != -1)
2387  event.m_audioProps |= AUD_SURROUND;
2388  // stereo
2389  static const QRegularExpression dkStereo { " S" };
2390  if (features.indexOf(dkStereo) != -1)
2391  event.m_audioProps |= AUD_STEREO;
2392  // (G)
2393  static const QRegularExpression dkReplay { " \\(G\\)" };
2394  if (features.indexOf(dkReplay) != -1)
2395  event.m_previouslyshown = true;
2396  // TTV
2397  static const QRegularExpression dkTxt { " TTV" };
2398  if (features.indexOf(dkTxt) != -1)
2399  event.m_subtitleType |= SUB_NORMAL;
2400  }
2401 
2402  // Series and program id
2403  // programid is currently not transmitted
2404  // YouSee doesn't use a default authority but uses the first byte after
2405  // the / to indicate if the seriesid is global unique or unique on the
2406  // service id
2407  if (event.m_seriesId.length() >= 1 && event.m_seriesId[0] == '/')
2408  {
2409  QString newid;
2410  if (event.m_seriesId[1] == '1')
2411  {
2412  newid = QString("%1%2").arg(event.m_chanid).
2413  arg(event.m_seriesId.mid(2,8));
2414  }
2415  else
2416  {
2417  newid = event.m_seriesId.mid(2,8);
2418  }
2419  event.m_seriesId = newid;
2420  }
2421 
2422  if (event.m_programId.length() >= 1 && event.m_programId[0] == '/')
2423  event.m_programId[0]='_';
2424 
2425  // Add season and episode number to subtitle
2426  if (episode > 0)
2427  {
2428  event.m_subtitle = QString("%1 (%2").arg(event.m_subtitle).arg(episode);
2429  if (event.m_parttotal >0)
2430  event.m_subtitle = QString("%1:%2").arg(event.m_subtitle).
2431  arg(event.m_parttotal);
2432  if (season > 0)
2433  {
2434  event.m_season = season;
2435  event.m_episode = episode;
2436  event.m_syndicatedepisodenumber =
2437  QString("S%1E%2").arg(season).arg(episode);
2438  event.m_subtitle = QString("%1 Sæson %2").arg(event.m_subtitle).
2439  arg(season);
2440  }
2441  event.m_subtitle = QString("%1)").arg(event.m_subtitle);
2442  }
2443 
2444  // Find actors and director in description
2445  static const QRegularExpression dkDirector { "(?:Instr.: |Instrukt.r: )(.+)$" };
2446  static const QRegularExpression dkPersonsSeparator { "(, )|(og )" };
2447  QStringList directors {};
2448  match = dkDirector.match(event.m_description);
2449  if (match.hasMatch())
2450  {
2451  QString tmpDirectorsString = match.captured(1);
2452  directors = tmpDirectorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2453  for (const auto & director : std::as_const(directors))
2454  {
2455  tmpDirectorsString = director.split(":").last().trimmed().
2456  remove(kDotAtEnd);
2457  if (tmpDirectorsString != "")
2458  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2459  }
2460  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2461  }
2462 
2463  static const QRegularExpression dkActors { "(?:Medvirkende: |Medv\\.: )(.+)" };
2464  match = dkActors.match(event.m_description);
2465  if (match.hasMatch())
2466  {
2467  QString tmpActorsString = match.captured(1);
2468  const QStringList actors =
2469  tmpActorsString.split(dkPersonsSeparator, Qt::SkipEmptyParts);
2470  for (const auto & actor : std::as_const(actors))
2471  {
2472  tmpActorsString = actor.split(":").last().trimmed().remove(kDotAtEnd);
2473  if (!tmpActorsString.isEmpty() && !directors.contains(tmpActorsString))
2474  event.AddPerson(DBPerson::kActor, tmpActorsString);
2475  }
2476  //event.m_description.remove(match.capturedStart(), match.capturedLength());
2477  }
2478 
2479  //find year
2480  static const QRegularExpression dkYear { " fra ([0-9]{4})[ \\.]" };
2481  match = dkYear.match(event.m_description);
2482  if (match.hasMatch())
2483  {
2484  bool ok = false;
2485  uint y = match.capturedView(1).toUInt(&ok);
2486  if (ok)
2487  event.m_originalairdate = QDate(y, 1, 1);
2488  }
2489 }
2490 
2495 {
2496  LOG(VB_EIT, LOG_INFO, QString("Applying html strip to %1").arg(event.m_title));
2497  static const QRegularExpression html { "</?EM>", QRegularExpression::CaseInsensitiveOption };
2498  event.m_title.remove(html);
2499 }
2500 
2501 // Moves the subtitle field into the description since it's just used
2502 // as more description field. All the sort-out will happen in the description
2503 // field. Also, sometimes the description is just a repeat of the title. If so,
2504 // we remove it.
2506 {
2507  if (event.m_title == event.m_description)
2508  {
2509  event.m_description = QString("");
2510  }
2511  if (!event.m_subtitle.isEmpty())
2512  {
2513  if (event.m_subtitle.trimmed().right(1) != ".'" )
2514  event.m_subtitle = event.m_subtitle.trimmed() + ".";
2515  event.m_description = event.m_subtitle.trimmed() + QString(" ") + event.m_description;
2516  event.m_subtitle = QString("");
2517  }
2518 }
2519 
2521 {
2522  // Program ratings
2523  static const QRegularExpression grRating { R"(\[(K|Κ|8|12|16|18)\]\s*)",
2524  QRegularExpression::CaseInsensitiveOption };
2525  auto match = grRating.match(event.m_title);
2526  if (match.hasMatch())
2527  {
2528  EventRating prograting;
2529  prograting.m_system="GR"; prograting.m_rating = match.captured(1);
2530  event.m_ratings.push_back(prograting);
2531  event.m_title.remove(match.capturedStart(), match.capturedLength());
2532  event.m_title = event.m_title.trimmed();
2533  }
2534 
2535  //Live show
2536  int position = event.m_title.indexOf("(Ζ)");
2537  if (position != -1)
2538  {
2539  event.m_title = event.m_title.replace("(Ζ)", "");
2540  event.m_description.prepend("Ζωντανή Μετάδοση. ");
2541  }
2542 
2543  // Greek not previously Shown
2544  static const QRegularExpression grNotPreviouslyShown {
2545  R"(\W?(?:-\s*)*(?:\b[Α1]['΄η]?\s*(?:τηλεοπτικ[ηή]\s*)?(?:μετ[αά]δοση|προβολ[ηή]))\W?)",
2546  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2547  match = grNotPreviouslyShown.match(event.m_title);
2548  if (match.hasMatch())
2549  {
2550  event.m_previouslyshown = false;
2551  event.m_title.remove(match.capturedStart(), match.capturedLength());
2552  }
2553 
2554  // Greek Replay (Ε)
2555  // it might look redundant compared to previous check but at least it helps
2556  // remove the (Ε) From the title.
2557  static const QRegularExpression grReplay { R"(\([ΕE]\))" };
2558  match = grReplay.match(event.m_title);
2559  if (match.hasMatch())
2560  {
2561  event.m_previouslyshown = true;
2562  event.m_title.remove(match.capturedStart(), match.capturedLength());
2563  }
2564 
2565  // Check for (HD) in the decription
2566  position = event.m_description.indexOf("(HD)");
2567  if (position != -1)
2568  {
2569  event.m_description = event.m_description.replace("(HD)", "");
2570  event.m_videoProps |= VID_HDTV;
2571  }
2572 
2573  // Check for (Full HD) in the decription
2574  position = event.m_description.indexOf("(Full HD)");
2575  if (position != -1)
2576  {
2577  event.m_description = event.m_description.replace("(Full HD)", "");
2578  event.m_videoProps |= VID_HDTV;
2579  }
2580 
2581  static const QRegularExpression grFixnofullstopActors { R"(\w\s(Παίζουν:|Πρωταγων))" };
2582  match = grFixnofullstopActors.match(event.m_description);
2583  if (match.hasMatch())
2584  event.m_description.insert(match.capturedStart() + 1, ".");
2585 
2586  // If they forgot the "." at the end of the sentence before the actors/directors begin, let's insert it.
2587  static const QRegularExpression grFixnofullstopDirectors { R"(\w\s(Σκηνοθ[εέ]))" };
2588  match = grFixnofullstopDirectors.match(event.m_description);
2589  if (match.hasMatch())
2590  event.m_description.insert(match.capturedStart() + 1, ".");
2591 
2592  // Find actors and director in description
2593  // I am looking for actors first and then for directors/presenters because
2594  // sometimes punctuation is missing and the "Παίζουν:" label is mistaken
2595  // for a director's/presenter's surname (directors/presenters are shown
2596  // before actors in the description field.). So removing the text after
2597  // adding the actors AND THEN looking for dir/pres helps to clear things up.
2598  static const QRegularExpression grActors { R"((?:[Ππ]α[ιί]ζουν:|[ΜMμ]ε τους:|Πρωταγωνιστο[υύ]ν:|Πρωταγωνιστε[ιί]:?)(?:\s+στο ρόλο(?: του| της)?\s(?:\w+\s[οη]\s))?([-\w\s']+(?:,[-\w\s']+)*)(?:κ\.[αά])?\W?)" };
2599  // cap(1) actors, just names
2600  static const QRegularExpression grPeopleSeparator { R"(([,-]\s+))" };
2601  match = grActors.match(event.m_description);
2602  if (match.hasMatch())
2603  {
2604  QString tmpActorsString = match.captured(1);
2605  const QStringList actors =
2606  tmpActorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2607  for (const auto & actor : std::as_const(actors))
2608  {
2609  tmpActorsString = actor.split(":").last().trimmed().remove(kDotAtEnd);
2610  if (tmpActorsString != "")
2611  event.AddPerson(DBPerson::kActor, tmpActorsString);
2612  }
2613  event.m_description.remove(match.capturedStart(), match.capturedLength());
2614  }
2615 
2616  // Director
2617  static const QRegularExpression grDirector { R"((?:Σκηνοθεσία: |Σκηνοθέτης: |Σκηνοθέτης - Επιμέλεια: )(\w+\s\w+\s?)(?:\W?))" };
2618  match = grDirector.match(event.m_description);
2619  if (match.hasMatch())
2620  {
2621  QString tmpDirectorsString = match.captured(1);
2622  const QStringList directors =
2623  tmpDirectorsString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2624  for (const auto & director : std::as_const(directors))
2625  {
2626  tmpDirectorsString = director.split(":").last().trimmed().
2627  remove(kDotAtEnd);
2628  if (tmpDirectorsString != "")
2629  {
2630  event.AddPerson(DBPerson::kDirector, tmpDirectorsString);
2631  }
2632  }
2633  event.m_description.remove(match.capturedStart(), match.capturedLength());
2634  }
2635 
2636  //Try to find presenter
2637  static const QRegularExpression grPres { R"((?:Παρουσ[ιί]αση:(?:\b)*|Παρουσι[αά]ζ(?:ουν|ει)(?::|\sο|\sη)|Παρουσι[αά]στ(?:[ηή]ς|ρια|ριες|[εέ]ς)(?::|\sο|\sη)|Με τ(?:ον |ην )(?:[\s|:|ο|η])*(?:\b)*)([-\w\s]+(?:,[-\w\s]+)*)\W?)",
2638  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2639  match = grPres.match(event.m_description);
2640  if (match.hasMatch())
2641  {
2642  QString tmpPresentersString = match.captured(1);
2643  const QStringList presenters =
2644  tmpPresentersString.split(grPeopleSeparator, Qt::SkipEmptyParts);
2645  for (const auto & presenter : std::as_const(presenters))
2646  {
2647  tmpPresentersString = presenter.split(":").last().trimmed().
2648  remove(kDotAtEnd);
2649  if (tmpPresentersString != "")
2650  {
2651  event.AddPerson(DBPerson::kPresenter, tmpPresentersString);
2652  }
2653  }
2654  event.m_description.remove(match.capturedStart(), match.capturedLength());
2655  }
2656 
2657  //find year e.g Παραγωγής 1966 ή ΝΤΟΚΙΜΑΝΤΕΡ - 1998 Κατάλληλο για όλους
2658  // Used in Private channels (not 'secret', just not owned by Government!)
2659  static const QRegularExpression grYear { R"(\W?(?:\s?παραγωγ[ηή]ς|\s?-|,)\s*([1-2][0-9]{3})(?:-\d{1,4})?)",
2660  QRegularExpression::CaseInsensitiveOption };
2661  match = grYear.match(event.m_description);
2662  if (match.hasMatch())
2663  {
2664  bool ok = false;
2665  uint y = match.capturedView(1).toUInt(&ok);
2666  if (ok)
2667  {
2668  event.m_originalairdate = QDate(y, 1, 1);
2669  event.m_description.remove(match.capturedStart(), match.capturedLength());
2670  }
2671  }
2672  // Remove " ."
2673  event.m_description = event.m_description.replace(" .",".").trimmed();
2674 
2675  //find country of origin and remove it from description.
2676  static const QRegularExpression grCountry {
2677  R"((?:\W|\b)(?:(ελλην|τουρκ|αμερικ[αά]ν|γαλλ|αγγλ|βρεττ?αν|γερμαν|ρωσσ?|ιταλ|ελβετ|σουηδ|ισπαν|πορτογαλ|μεξικ[αά]ν|κιν[εέ]ζικ|ιαπων|καναδ|βραζιλι[αά]ν)(ικ[ηή][ςσ])))",
2678  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2679  match = grCountry.match(event.m_description);
2680  if (match.hasMatch())
2681  event.m_description.remove(match.capturedStart(), match.capturedLength());
2682 
2683  // Work out the season and episode numbers (if any)
2684  // Matching pattern "Επεισ[όο]διο:?|Επ 3 από 14|3/14" etc
2685  bool series = false;
2686  static const QRegularExpression grSeason {
2687  R"((?:\W-?)*(?:\(-\s*)?\b(([Α-Ω|A|B|E|Z|H|I|K|M|N]{1,2})(?:'|΄)?|(\d{1,2})(?:ος|ου|oς|os)?)(?:\s*[ΚκKk][υύ]κλο(?:[σς]|υ))\s?)",
2688  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2689  // cap(2) is the season for ΑΒΓΔ
2690  // cap(3) is the season for 1234
2691  match = grSeason.match(event.m_title);
2692  if (match.hasMatch())
2693  {
2694  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2695  {
2696  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2697  //must convert them to numbers.
2698  int tmpinteger = match.capturedView(2).toUInt();
2699  if (tmpinteger < 1)
2700  {
2701  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2702  event.m_season = 6;
2703  else
2704  {
2705  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2706  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2707  if (tmpinteger != -1)
2708  event.m_season = tmpinteger;
2709  else
2710  //sometimes they use english letters instead of greek. Compensating:
2711  {
2712  static const QString LettToNumber2 = "0ABΓΔE6ZHΘIKΛMN";
2713  tmpinteger = LettToNumber2.indexOf(match.capturedView(2));
2714  if (tmpinteger != -1)
2715  event.m_season = tmpinteger;
2716  }
2717  }
2718  }
2719  }
2720  else if (!match.capturedView(3).isEmpty()) //number
2721  {
2722  event.m_season = match.capturedView(3).toUInt();
2723  }
2724  series = true;
2725  event.m_title.remove(match.capturedStart(), match.capturedLength());
2726  }
2727 
2728  // I have to search separately for season in title and description because it wouldn't work when in both.
2729  match = grSeason.match(event.m_description);
2730  if (match.hasMatch())
2731  {
2732  if (!match.capturedView(2).isEmpty()) // we found a letter representing a number
2733  {
2734  //sometimes Nat. TV writes numbers as letters, i.e Α=1, Β=2, Γ=3, etc
2735  //must convert them to numbers.
2736  int tmpinteger = match.capturedView(2).toUInt();
2737  if (tmpinteger < 1)
2738  {
2739  if (match.captured(2) == "ΣΤ") // 6, don't ask!
2740  event.m_season = 6;
2741  else
2742  {
2743  static const QString LettToNumber = "0ΑΒΓΔΕ6ΖΗΘΙΚΛΜΝ";
2744  tmpinteger = LettToNumber.indexOf(match.capturedView(2));
2745  if (tmpinteger != -1)
2746  event.m_season = tmpinteger;
2747  }
2748  }
2749  }
2750  else if (!match.capturedView(3).isEmpty()) //number
2751  {
2752  event.m_season = match.capturedView(3).toUInt();
2753  }
2754  series = true;
2755  event.m_description.remove(match.capturedStart(), match.capturedLength());
2756  }
2757 
2758 
2759  // If Season is in Roman Numerals (I,II,etc)
2760  static const QRegularExpression grSeasonAsRomanNumerals { ",\\s*([MDCLXVIΙΧ]+)$",
2761  QRegularExpression::CaseInsensitiveOption };
2762  match = grSeasonAsRomanNumerals.match(event.m_title);
2763  auto match2 = grSeasonAsRomanNumerals.match(event.m_description);
2764  if (match.hasMatch())
2765  {
2766  if (!match.capturedView(1).isEmpty()) //number
2767  event.m_season = parseRoman(match.captured(1).toUpper());
2768  series = true;
2769  event.m_title.remove(match.capturedStart(), match.capturedLength());
2770  event.m_title = event.m_title.trimmed();
2771  if (event.m_title.right(1) == ",")
2772  event.m_title.chop(1);
2773  }
2774  else if (match2.hasMatch())
2775  {
2776  if (!match2.capturedView(1).isEmpty()) //number
2777  event.m_season = parseRoman(match2.captured(1).toUpper());
2778  series = true;
2779  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2780  event.m_description = event.m_description.trimmed();
2781  if (event.m_description.right(1) == ",")
2782  event.m_description.chop(1);
2783  }
2784 
2785  static const QRegularExpression grlongEp { R"(\b(?:Επ.|επεισ[οό]διο:?)\s*(\d+)\W?)",
2786  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2787  // cap(1) is the Episode No.
2788  match = grlongEp.match(event.m_title);
2789  match2 = grlongEp.match(event.m_description);
2790  if (match.hasMatch() || match2.hasMatch())
2791  {
2792  if (!match.capturedView(1).isEmpty())
2793  {
2794  event.m_episode = match.capturedView(1).toUInt();
2795  series = true;
2796  event.m_title.remove(match.capturedStart(), match.capturedLength());
2797  }
2798  else if (!match2.capturedView(1).isEmpty())
2799  {
2800  event.m_episode = match2.capturedView(1).toUInt();
2801  series = true;
2802  event.m_description.remove(match2.capturedStart(), match2.capturedLength());
2803  }
2804  // Sometimes description omits Season if it's 1. We fix this
2805  if (0 == event.m_season)
2806  event.m_season = 1;
2807  }
2808 
2809  // Sometimes, especially on greek national tv, they include comments in the
2810  // title, e.g "connection to ert1", "ert archives".
2811  // Because they obscure the real title, I'll isolate and remove them.
2812 
2813  static const QRegularExpression grCommentsinTitle { R"(\(([Α-Ωα-ω\s\d-]+)\)(?:\s*$)*)" };
2814  // cap1 = real title
2815  // cap0 = real title in parentheses.
2816  match = grCommentsinTitle.match(event.m_title);
2817  if (match.hasMatch()) // found in title instead
2818  event.m_title.remove(match.capturedStart(), match.capturedLength());
2819 
2820  // Sometimes the real (mostly English) title of a movie or series is
2821  // enclosed in parentheses in the event title, subtitle or description.
2822  // Since the subtitle has been moved to the description field by
2823  // EITFixUp::FixGreekSubtitle, I will search for it only in the description.
2824  // It will replace the translated one to get better chances of metadata
2825  // retrieval. The old title will be moved in the description.
2826  static const QRegularExpression grRealTitleInDescription { R"(^\(([A-Za-z\s\d-]+)\)\s*)" };
2827  // cap1 = real title
2828  // cap0 = real title in parentheses.
2829  match = grRealTitleInDescription.match(event.m_description);
2830  if (match.hasMatch())
2831  {
2832  event.m_description.remove(0, match.capturedLength());
2833  if (match.captured(0) != event.m_title.trimmed())
2834  {
2835  event.m_description = "(" + event.m_title.trimmed() + "). " + event.m_description;
2836  }
2837  event.m_title = match.captured(1);
2838  // Remove the real title from the description
2839  }
2840  else // search in title
2841  {
2842  static const QRegularExpression grRealTitleInTitle { R"(\(([A-Za-z\s\d-]+)\)(?:\s*$)?)" };
2843  // cap1 = real title
2844  // cap0 = real title in parentheses.
2845  match = grRealTitleInTitle.match(event.m_title);
2846  if (match.hasMatch()) // found in title instead
2847  {
2848  event.m_title.remove(match.capturedStart(), match.capturedLength());
2849  QString tmpTranslTitle = event.m_title;
2850  //QString tmpTranslTitle = event.m_title.replace(tmptitle.cap(0),"");
2851  event.m_title = match.captured(1);
2852  event.m_description = "(" + tmpTranslTitle.trimmed() + "). " + event.m_description;
2853  }
2854  }
2855 
2856  // Description field: "^Episode: Lion in the cage. (Description follows)"
2857  static const QRegularExpression grEpisodeAsSubtitle { R"(^Επεισ[οό]διο:\s?([\w\s\-,']+)\.\s?)" };
2858  match = grEpisodeAsSubtitle.match(event.m_description);
2859  if (match.hasMatch())
2860  {
2861  event.m_subtitle = match.captured(1).trimmed();
2862  event.m_description.remove(match.capturedStart(), match.capturedLength());
2863  }
2864  static const QRegularExpression grMovie { R"(\bταιν[ιί]α\b)",
2865  QRegularExpression::CaseInsensitiveOption|QRegularExpression::UseUnicodePropertiesOption };
2866  bool isMovie = (event.m_description.indexOf(grMovie) !=-1) ;
2867  if (isMovie)
2868  event.m_categoryType = ProgramInfo::kCategoryMovie;
2869  else if (series)
2870  event.m_categoryType = ProgramInfo::kCategorySeries;
2871  // clear double commas.
2872  event.m_description.replace(",,", ",");
2873 
2874 // να σβήσω τα κομμάτια που περισσεύουν από την περιγραφή πχ παραγωγής χχχχ
2875 }
2876 
2878 {
2879  struct grCategoryEntry {
2880  QRegularExpression expr;
2881  QString category;
2882  };
2883  static const QRegularExpression grCategFood { "\\W?(?:εκπομπ[ηή]\\W)?(Γαστρονομ[ιί]α[σς]?|μαγειρικ[ηή][σς]?|chef|συνταγ[εέηή]|διατροφ|wine|μ[αά]γειρα[σς]?)\\W?",
2884  QRegularExpression::CaseInsensitiveOption };
2885  static const QRegularExpression grCategDrama { "\\W?(κοινωνικ[ηήό]|δραματικ[ηή]|δρ[αά]μα)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2886  QRegularExpression::CaseInsensitiveOption};
2887  static const QRegularExpression grCategComedy { "\\W?(κωμικ[ηήοό]|χιουμοριστικ[ηήοό]|κωμωδ[ιί]α)\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2888  QRegularExpression::CaseInsensitiveOption};
2889  static const QRegularExpression grCategChildren { "\\W?(παιδικ[ηήοό]|κινο[υύ]μ[εέ]ν(ων|α)\\sσχ[εέ]δ[ιί](ων|α))\\W(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2890  QRegularExpression::CaseInsensitiveOption};
2891  static const QRegularExpression grCategMystery { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(μυστηρ[ιί]ου)\\W?",
2892  QRegularExpression::CaseInsensitiveOption};
2893  static const QRegularExpression grCategFantasy { "(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?\\W?(φαντασ[ιί]ας)\\W?",
2894  QRegularExpression::CaseInsensitiveOption};
2895  static const QRegularExpression grCategHistory { "\\W?(ιστορικ[ηήοό])\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2896  QRegularExpression::CaseInsensitiveOption};
2897  static const QRegularExpression grCategTeleMag { "\\W?(ενημερωτικ[ηή]|ψυχαγωγικ[ηή]|τηλεπεριοδικ[οό]|μαγκαζ[ιί]νο)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2898  QRegularExpression::CaseInsensitiveOption};
2899  static const QRegularExpression grCategTeleShop { "\\W?(οδηγ[οό][σς]?\\sαγορ[ωώ]ν|τηλεπ[ωώ]λ[ηή]σ|τηλεαγορ|τηλεμ[αά]ρκετ|telemarket)\\W?(?:(?:εκπομπ[ηή]|σειρ[αά]|ταιν[ιί]α)\\W)?",
2900  QRegularExpression::CaseInsensitiveOption};
2901  static const QRegularExpression grCategGameShow { "\\W?(τηλεπαιχν[ιί]δι|quiz)\\W?",
2902  QRegularExpression::CaseInsensitiveOption};
2903  static const QRegularExpression grCategDocumentary { "\\W?(ντοκ[ιυ]μαντ[εέ]ρ)\\W?",
2904  QRegularExpression::CaseInsensitiveOption};
2905  static const QRegularExpression grCategBiography { "\\W?(βιογραφ[ιί]α|βιογραφικ[οό][σς]?)\\W?",
2906  QRegularExpression::CaseInsensitiveOption};
2907  static const QRegularExpression grCategNews { "\\W?(δελτ[ιί]ο\\W?|ειδ[ηή]σε(ι[σς]|ων))\\W?",
2908  QRegularExpression::CaseInsensitiveOption};
2909  static const QRegularExpression grCategSports { "\\W?(champion|αθλητικ[αάοόηή]|πρωτ[αά]θλημα|ποδ[οό]σφαιρο(ου)?|κολ[υύ]μβηση|πατιν[αά]ζ|formula|μπ[αά]σκετ|β[οό]λε[ιϊ])\\W?",
2910  QRegularExpression::CaseInsensitiveOption};
2911  static const QRegularExpression grCategMusic { "\\W?(μουσικ[οόηή]|eurovision|τραγο[υύ]δι)\\W?",
2912  QRegularExpression::CaseInsensitiveOption};
2913  static const QRegularExpression grCategReality { "\\W?(ρι[αά]λιτι|reality)\\W?",
2914  QRegularExpression::CaseInsensitiveOption};
2915  static const QRegularExpression grCategReligion { "\\W?(θρησκε[ιί]α|θρησκευτικ|να[οό][σς]?|θε[ιί]α λειτουργ[ιί]α)\\W?",
2916  QRegularExpression::CaseInsensitiveOption};
2917  static const QRegularExpression grCategCulture { "\\W?(τ[εέ]χν(η|ε[σς])|πολιτισμ)\\W?",
2918  QRegularExpression::CaseInsensitiveOption};
2919  static const QRegularExpression grCategNature { "\\W?(φ[υύ]ση|περιβ[αά]λλο|κατασκευ|επιστ[ηή]μ(?!ονικ[ηή]ς φαντασ[ιί]ας))\\W?",
2920  QRegularExpression::CaseInsensitiveOption};
2921  static const QRegularExpression grCategSciFi { "\\W?(επιστ(.|ημονικ[ηή]ς)\\s?φαντασ[ιί]ας)\\W?",
2922  QRegularExpression::CaseInsensitiveOption};
2923  static const QRegularExpression grCategHealth { "\\W?(υγε[ιί]α|υγειιν|ιατρικ|διατροφ)\\W?",
2924  QRegularExpression::CaseInsensitiveOption};
2925  static const QRegularExpression grCategSpecial { "\\W?(αφι[εέ]ρωμα)\\W?",
2926  QRegularExpression::CaseInsensitiveOption};
2927  static const QList<grCategoryEntry> grCategoryDescData = {
2928  { grCategComedy, "Κωμωδία" },
2929  { grCategTeleMag, "Τηλεπεριοδικό" },
2930  { grCategNature, "Επιστήμη/Φύση" },
2931  { grCategHealth, "Υγεία" },
2932  { grCategReality, "Ριάλιτι" },
2933  { grCategDrama, "Κοινωνικό" },
2934  { grCategChildren, "Παιδικό" },
2935  { grCategSciFi, "Επιστ.Φαντασίας" },
2936  { grCategMystery, "Μυστηρίου" },
2937  { grCategFantasy, "Φαντασίας" },
2938  { grCategHistory, "Ιστορικό" },
2939  { grCategTeleShop, "Τηλεπωλήσεις" },
2940  { grCategFood, "Γαστρονομία" },
2941  { grCategGameShow, "Τηλεπαιχνίδι" },
2942  { grCategBiography, "Βιογραφία" },
2943  { grCategSports, "Αθλητικά" },
2944  { grCategMusic, "Μουσική" },
2945  { grCategDocumentary, "Ντοκιμαντέρ" },
2946  { grCategReligion, "Θρησκεία" },
2947  { grCategCulture, "Τέχνες/Πολιτισμός" },
2948  { grCategSpecial, "Αφιέρωμα" },
2949  };
2950  static const QList<grCategoryEntry> grCategoryTitleData = {
2951  { grCategTeleShop, "Τηλεπωλήσεις" },
2952  { grCategGameShow, "Τηλεπαιχνίδι" },
2953  { grCategMusic, "Μουσική" },
2954  { grCategNews, "Ειδήσεις" },
2955  };
2956 
2957  // Handle special cases
2958  if ((event.m_description.indexOf(grCategFantasy) != -1)
2959  && (event.m_description.indexOf(grCategMystery) != -1))
2960  {
2961  event.m_category = "Φαντασίας/Μυστηρίου";
2962  return;
2963  }
2964 
2965  // Find categories in the description
2966  for (const auto& [expression, category] : grCategoryDescData)
2967  {
2968  if (event.m_description.indexOf(expression) != -1) {
2969  event.m_category = category;
2970  return;
2971  }
2972  }
2973 
2974  // Find categories in the title
2975  for (const auto& [expression, category] : grCategoryTitleData)
2976  {
2977  if (event.m_title.indexOf(expression) != -1) {
2978  event.m_category = category;
2979  return;
2980  }
2981  }
2982 }
2983 
2985 {
2986  // TODO handle scraping the category and category_type from localized text in the short/long description
2987  // TODO remove short description (stored as episode title) which is just the beginning of the long description (actual description)
2988 
2989  // drop the short description if its copy the start of the long description
2990  if (event.m_description.startsWith (event.m_subtitle))
2991  {
2992  event.m_subtitle = "";
2993  }
2994 
2995  // handle cast and crew in items in the DVB Extended Event Descriptor
2996  // remove handled items from the map, so the left overs can be reported
2997  auto i = event.m_items.begin();
2998  while (i != event.m_items.end())
2999  {
3000  /* Possible TODO: if EIT inlcude the priority and/or character
3001  * names for the actors, include them in AddPerson call. */
3002  if ((QString::compare (i.key(), "Role Player") == 0) ||
3003  (QString::compare (i.key(), "Performing Artist") == 0))
3004  {
3005  event.AddPerson (DBPerson::kActor, i.value());
3006  i = event.m_items.erase (i);
3007  }
3008  else if (QString::compare (i.key(), "Director") == 0)
3009  {
3010  event.AddPerson (DBPerson::kDirector, i.value());
3011  i = event.m_items.erase (i);
3012  }
3013  else if (QString::compare (i.key(), "Commentary or Commentator") == 0)
3014  {
3015  event.AddPerson (DBPerson::kCommentator, i.value());
3016  i = event.m_items.erase (i);
3017  }
3018  else if (QString::compare (i.key(), "Presenter") == 0)
3019  {
3020  event.AddPerson (DBPerson::kPresenter, i.value());
3021  i = event.m_items.erase (i);
3022  }
3023  else if (QString::compare (i.key(), "Producer") == 0)
3024  {
3025  event.AddPerson (DBPerson::kProducer, i.value());
3026  i = event.m_items.erase (i);
3027  }
3028  else if (QString::compare (i.key(), "Scriptwriter") == 0)
3029  {
3030  event.AddPerson (DBPerson::kWriter, i.value());
3031  i = event.m_items.erase (i);
3032  }
3033  else
3034  {
3035  ++i;
3036  }
3037  }
3038 
3039  // handle star rating in the description
3040  static const QRegularExpression unitymediaImdbrating { R"(\s*IMDb Rating: (\d\.\d)\s?/10$)" };
3041  auto match = unitymediaImdbrating.match(event.m_description);
3042  if (match.hasMatch())
3043  {
3044  float stars = match.captured(1).toFloat();
3045  event.m_stars = stars / 10.0F;
3046  event.m_description.remove(match.capturedStart(0),
3047  match.capturedLength(0));
3048  }
3049 }
EITFixUp::FixGreekEIT
static void FixGreekEIT(DBEventEIT &event)
Definition: eitfixup.cpp:2520
DBEvent::m_season
uint m_season
Definition: programdata.h:172
EITFixUp::FixDK
static void FixDK(DBEventEIT &event)
Use this to clean YouSee's DVB-C guide in Denmark.
Definition: eitfixup.cpp:2292
EITFixUp::kFixUK
@ kFixUK
Definition: eitfixup.h:35
EITFixUp::kFixBell
@ kFixBell
Definition: eitfixup.h:34
NLMapResult::type
ProgramInfo::CategoryType type
Definition: eitfixup.cpp:1988
EITFixUp::kFixAUDescription
@ kFixAUDescription
Definition: eitfixup.h:52
EITFixUp::FixAUNine
static void FixAUNine(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1335
EventRating::m_system
QString m_system
Definition: programdata.h:78
kUKSpaceColonStart
static const QRegularExpression kUKSpaceColonStart
Definition: eitfixup.cpp:21
EITFixUp::kFixNO
@ kFixNO
Definition: eitfixup.h:47
DBPerson::kDirector
@ kDirector
Definition: programdata.h:32
DBEvent::m_totalepisodes
uint m_totalepisodes
Definition: programdata.h:174
EITFixUp::kFixNRK_DVBT
@ kFixNRK_DVBT
Definition: eitfixup.h:48
EITFixUp::FixBellExpressVu
static void FixBellExpressVu(DBEventEIT &event)
Use this for the Canadian BellExpressVu to standardize DVB-S guide.
Definition: eitfixup.cpp:229
ProgramInfo::kCategorySeries
@ kCategorySeries
Definition: programinfo.h:77
EITFixUp::Fix
static void Fix(DBEventEIT &event)
Definition: eitfixup.cpp:46
EITFixUp::FixFI
static void FixFI(DBEventEIT &event)
Use this to clean DVB-T guide in Finland.
Definition: eitfixup.cpp:1871
EITFixUp::kFixAUStar
@ kFixAUStar
Definition: eitfixup.h:39
EITFixUp::kFixPremiere
@ kFixPremiere
Definition: eitfixup.h:43
EITFixUp::kFixATV
@ kFixATV
Definition: eitfixup.h:58
DBEventEIT::m_fixup
FixupValue m_fixup
Definition: programdata.h:222
EITFixUp::kFixHTML
@ kFixHTML
Definition: eitfixup.h:56
EventRating
Definition: programdata.h:75
EITFixUp::FixATV
static void FixATV(DBEventEIT &event)
Use this to standardise the ATV/ATV2 guide in Germany.
Definition: eitfixup.cpp:1861
DBPerson::kProducer
@ kProducer
Definition: programdata.h:33
ProgramInfo::CategoryType
CategoryType
Definition: programinfo.h:76
DBEvent::m_starttime
QDateTime m_starttime
Definition: programdata.h:152
DBPerson::kActor
@ kActor
Definition: programdata.h:31
EITFixUp::kDotToTitle
static const uint kDotToTitle
Definition: eitfixup.h:19
EITFixUp::FixComHem
static void FixComHem(DBEventEIT &event, bool process_subtitle)
Use this to standardize ComHem DVB-C service in Sweden.
Definition: eitfixup.cpp:1044
ProgramInfo::kCategorySports
@ kCategorySports
Definition: programinfo.h:78
DBEvent::m_partnumber
uint16_t m_partnumber
Definition: programdata.h:157
LOG
#define LOG(_MASK_, _LEVEL_, _QSTRING_)
Definition: mythlogging.h:39
EITFixUp::kFixSubtitle
@ kFixSubtitle
Definition: eitfixup.h:38
EITFixUp::kMaxToTitle
static const uint kMaxToTitle
Definition: eitfixup.h:17
dish_theme_type_to_string
QString dish_theme_type_to_string(uint theme_type)
Definition: dishdescriptors.cpp:304
EITFixUp::FixGreekCategories
static void FixGreekCategories(DBEventEIT &event)
Definition: eitfixup.cpp:2877
DBPerson::kWriter
@ kWriter
Definition: programdata.h:35
EITFixUp::FixAUStar
static void FixAUStar(DBEventEIT &event)
Use this to standardize DVB-S guide in Australia.
Definition: eitfixup.cpp:1293
DBEvent::m_category
QString m_category
Definition: programdata.h:151
NLMapResult::name
QString name
Definition: eitfixup.cpp:1987
kStereo
static const QRegularExpression kStereo
Definition: eitfixup.cpp:20
EITFixUp::parseRoman
static int parseRoman(QString roman)
Definition: eitfixup.cpp:30
EITFixUp::FixNRK_DVBT
static void FixNRK_DVBT(DBEventEIT &event)
Use this to clean DVB-T guide in Norway (NRK)
Definition: eitfixup.cpp:2230
tmp
static guint32 * tmp
Definition: goom_core.cpp:26
r2v
static const QMap< QChar, quint16 > r2v
Definition: eitfixup.cpp:24
EITFixUp::FixCategory
static void FixCategory(DBEventEIT &event)
Definition: eitfixup.cpp:2186
DBEvent::m_seriesId
QString m_seriesId
Definition: programdata.h:165
ProgramInfo::kCategoryNone
@ kCategoryNone
Definition: programinfo.h:77
DBEvent::m_parttotal
uint16_t m_parttotal
Definition: programdata.h:158
EITFixUp::kFixCategory
@ kFixCategory
Definition: eitfixup.h:46
DBEvent::m_programId
QString m_programId
Definition: programdata.h:166
programinfo.h
DBEventEIT::m_chanid
uint32_t m_chanid
Definition: programdata.h:221
mythlogging.h
DBEvent::m_categoryType
ProgramInfo::CategoryType m_categoryType
Definition: programdata.h:164
DBPerson::kHost
@ kHost
Definition: programdata.h:37
EITFixUp::SetUKSubtitle
static void SetUKSubtitle(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:512
deCrewTitle
static const QMap< QString, DBPerson::Role > deCrewTitle
Definition: eitfixup.cpp:1739
DBEvent::m_title
QString m_title
Definition: programdata.h:148
EITFixUp::FixAUSeven
static void FixAUSeven(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1368
EITFixUp::kMinMovieDuration
static const int kMinMovieDuration
Definition: eitfixup.h:25
DBEvent::m_subtitle
QString m_subtitle
Definition: programdata.h:149
EITFixUp::kFixNL
@ kFixNL
Definition: eitfixup.h:45
EITFixUp::kFixGreekEIT
@ kFixGreekEIT
Definition: eitfixup.h:69
EITFixUp::kFixDisneyChannel
@ kFixDisneyChannel
Definition: eitfixup.h:59
hardwareprofile.scan.rating
def rating(profile, smoonURL, gate)
Definition: scan.py:37
ProgramInfo::kCategoryMovie
@ kCategoryMovie
Definition: programinfo.h:77
EITFixUp::kSubtitleMaxLen
static const uint kSubtitleMaxLen
Definition: eitfixup.h:15
EITFixUp::FixNO
static void FixNO(DBEventEIT &event)
Use this to clean DVB-S guide in Norway.
Definition: eitfixup.cpp:2200
EITFixUp::FixNL
static void FixNL(DBEventEIT &event)
Use this to standardize @Home DVB-C guide in the Netherlands.
Definition: eitfixup.cpp:2013
EITFixUp::kFixDK
@ kFixDK
Definition: eitfixup.h:50
EITFixUp::kFixGreekCategories
@ kFixGreekCategories
Definition: eitfixup.h:70
EITFixUp::FixPremiere
static void FixPremiere(DBEventEIT &event)
Use this to standardize DVB-C guide in Germany for the providers Kabel Deutschland and Premiere.
Definition: eitfixup.cpp:1923
categoryTrans
static const QMap< QString, NLMapResult > categoryTrans
Definition: eitfixup.cpp:1990
DBEvent::m_episode
uint m_episode
Definition: programdata.h:173
EITFixUp::kFixAUFreeview
@ kFixAUFreeview
Definition: eitfixup.h:51
channelutil.h
EITFixUp::kFixFI
@ kFixFI
Definition: eitfixup.h:42
EITFixUp::kFixComHem
@ kFixComHem
Definition: eitfixup.h:37
EITFixUp::kFixRTL
@ kFixRTL
Definition: eitfixup.h:41
EITFixUp::kFixDish
@ kFixDish
Definition: eitfixup.h:49
EITFixUp::FixAUDescription
static void FixAUDescription(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1309
EITFixUp::kFixGenericDVB
@ kFixGenericDVB
Definition: eitfixup.h:33
eitfixup.h
DBEventEIT
Definition: programdata.h:177
EITFixUp::kFixPBS
@ kFixPBS
Definition: eitfixup.h:36
DBEvent::m_description
QString m_description
Definition: programdata.h:150
EITFixUp::kFixAUSeven
@ kFixAUSeven
Definition: eitfixup.h:54
DBEventEIT::m_items
QMultiMap< QString, QString > m_items
Definition: programdata.h:223
EITFixUp::FixAUFreeview
static void FixAUFreeview(DBEventEIT &event)
Use this to standardize DVB-T guide in Australia.
Definition: eitfixup.cpp:1410
EITFixUp::AddDVBEITAuthority
static QString AddDVBEITAuthority(uint chanid, const QString &id)
This adds a DVB EIT default authority to series id or program id if one exists in the DB for that cha...
Definition: eitfixup.cpp:202
EITFixUp::FixPBS
static void FixPBS(DBEventEIT &event)
Use this to standardize PBS ATSC guide in the USA.
Definition: eitfixup.cpp:1029
EITFixUp::FixDisneyChannel
static void FixDisneyChannel(DBEventEIT &event)
Use this to standardise the Disney Channel guide in Germany.
Definition: eitfixup.cpp:1834
DBPerson::kUnknown
@ kUnknown
Definition: programdata.h:30
EITFixUp::kMaxDotToColon
static const uint kMaxDotToColon
Definition: eitfixup.h:23
EITFixUp::kFixHDTV
@ kFixHDTV
Definition: eitfixup.h:44
EITFixUp::kFixGreekSubtitle
@ kFixGreekSubtitle
Definition: eitfixup.h:68
DBEvent::m_endtime
QDateTime m_endtime
Definition: programdata.h:153
EITFixUp::kFixMCA
@ kFixMCA
Definition: eitfixup.h:40
NLMapResult
Definition: eitfixup.cpp:1986
EITFixUp::kFixP7S1
@ kFixP7S1
Definition: eitfixup.h:55
DBPerson::kPresenter
@ kPresenter
Definition: programdata.h:39
EITFixUp::FixUK
static void FixUK(DBEventEIT &event)
Use this in the United Kingdom to standardize DVB-T guide.
Definition: eitfixup.cpp:647
dishdescriptors.h
EITFixUp::kFixUnitymedia
@ kFixUnitymedia
Definition: eitfixup.h:57
EITFixUp::FixStripHTML
static void FixStripHTML(DBEventEIT &event)
Use this to clean HTML Tags from EIT Data.
Definition: eitfixup.cpp:2494
kDotAtEnd
static const QRegularExpression kDotAtEnd
Definition: eitfixup.cpp:22
EITFixUp::FixGreekSubtitle
static void FixGreekSubtitle(DBEventEIT &event)
Definition: eitfixup.cpp:2505
EITFixUp::kFixAUNine
@ kFixAUNine
Definition: eitfixup.h:53
DBPerson::kCommentator
@ kCommentator
Definition: programdata.h:40
DBEvent::m_airdate
uint16_t m_airdate
movie year / production year
Definition: programdata.h:154
EventRating::m_rating
QString m_rating
Definition: programdata.h:79
EITFixUp::FixRTL
static void FixRTL(DBEventEIT &event)
Use this to standardise the RTL group guide in Germany.
Definition: eitfixup.cpp:1607
ChannelUtil::GetDefaultAuthority
static QString GetDefaultAuthority(uint chanid)
Returns the DVB default authority for the chanid given.
Definition: channelutil.cpp:1202
EITFixUp::FixMCA
static void FixMCA(DBEventEIT &event)
Use this to standardise the MultiChoice Africa DVB-S guide.
Definition: eitfixup.cpp:1468
ProgramInfo::kCategoryTVShow
@ kCategoryTVShow
Definition: programinfo.h:78
DBPerson::Role
Role
Definition: programdata.h:28
EITFixUp::kMaxQuestionExclamation
static const uint kMaxQuestionExclamation
Definition: eitfixup.h:21
uint
unsigned int uint
Definition: freesurround.h:24
EITFixUp::FixUnitymedia
static void FixUnitymedia(DBEventEIT &event)
Definition: eitfixup.cpp:2984
EITFixUp::FixPRO7
static void FixPRO7(DBEventEIT &event)
Use this to standardise the PRO7/Sat1 group guide in Germany.
Definition: eitfixup.cpp:1748