MythTV  0.27pre
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Properties Friends Groups Pages
xmltvparser.cpp
Go to the documentation of this file.
1 #include "xmltvparser.h"
2 
3 // Qt headers
4 #include <QFile>
5 #include <QStringList>
6 #include <QDateTime>
7 #include <QDomDocument>
8 #include <QUrl>
9 
10 // C++ headers
11 #include <iostream>
12 #include <cstdlib>
13 
14 // libmyth headers
15 #include "exitcodes.h"
16 #include "mythcorecontext.h"
17 #include "mythdate.h"
18 
19 // libmythtv headers
20 #include "programinfo.h"
21 #include "programdata.h"
22 #include "dvbdescriptors.h"
23 #include "channelinfo.h"
24 
25 // filldata headers
26 #include "channeldata.h"
27 #include "fillutil.h"
28 
29 XMLTVParser::XMLTVParser() : current_year(0)
30 {
31  current_year = MythDate::current().date().toString("yyyy").toUInt();
32 }
33 
34 static uint ELFHash(const QByteArray &ba)
35 {
36  const uchar *k = (const uchar *)ba.data();
37  uint h = 0;
38  uint g;
39 
40  if (k)
41  {
42  while (*k)
43  {
44  h = (h << 4) + *k++;
45  if ((g = (h & 0xf0000000)) != 0)
46  h ^= g >> 24;
47  h &= ~g;
48  }
49  }
50 
51  return h;
52 }
53 
54 static QString getFirstText(QDomElement element)
55 {
56  for (QDomNode dname = element.firstChild(); !dname.isNull();
57  dname = dname.nextSibling())
58  {
59  QDomText t = dname.toText();
60  if (!t.isNull())
61  return t.data();
62  }
63  return QString();
64 }
65 
66 ChannelInfo *XMLTVParser::parseChannel(QDomElement &element, QUrl &baseUrl)
67 {
68  ChannelInfo *chaninfo = new ChannelInfo;
69 
70  QString xmltvid = element.attribute("id", "");
71 
72  chaninfo->xmltvid = xmltvid;
73  chaninfo->tvformat = "Default";
74 
75  for (QDomNode child = element.firstChild(); !child.isNull();
76  child = child.nextSibling())
77  {
78  QDomElement info = child.toElement();
79  if (!info.isNull())
80  {
81  if (info.tagName() == "icon")
82  {
83  QString path = info.attribute("src", "");
84  if (!path.isEmpty() && !path.contains("://"))
85  {
86  QString base = baseUrl.toString(QUrl::StripTrailingSlash);
87  chaninfo->icon = base +
88  ((path.startsWith("/")) ? path : QString("/") + path);
89  }
90  else if (!path.isEmpty())
91  {
92  QUrl url(path);
93  if (url.isValid())
94  chaninfo->icon = url.toString();
95  }
96  }
97  else if (info.tagName() == "display-name")
98  {
99  if (chaninfo->name.isEmpty())
100  {
101  chaninfo->name = info.text();
102  }
103  else if (chaninfo->callsign.isEmpty())
104  {
105  chaninfo->callsign = info.text();
106  }
107  else if (chaninfo->channum.isEmpty())
108  {
109  chaninfo->channum = info.text();
110  }
111  }
112  }
113  }
114 
115  chaninfo->freqid = chaninfo->channum;
116  return chaninfo;
117 }
118 
119 static void fromXMLTVDate(QString &timestr, QDateTime &dt)
120 {
121  // The XMLTV spec requires dates to either be in UTC/GMT or to specify a
122  // valid timezone. We are sticking to the spec and require all grabbers
123  // to comply.
124 
125  if (timestr.isEmpty())
126  {
127  LOG(VB_XMLTV, LOG_ERR, "Found empty Date/Time in XMLTV data, ignoring");
128  return;
129  }
130 
131  QStringList split = timestr.split(" ");
132  QString ts = split[0];
133  QDateTime tmpDT;
134  tmpDT.setTimeSpec(Qt::LocalTime);
135 
136  // UTC/GMT, just strip
137  if (ts.endsWith('Z'))
138  ts.truncate(ts.length()-1);
139 
140  if (ts.length() == 14)
141  {
142  tmpDT = QDateTime::fromString(ts, "yyyyMMddHHmmss");
143  }
144  else if (ts.length() == 12)
145  {
146  tmpDT = QDateTime::fromString(ts, "yyyyMMddHHmm");
147  }
148  else if (ts.length() == 8)
149  {
150  tmpDT = QDateTime::fromString(ts, "yyyyMMdd");
151  }
152  else if (ts.length() == 6)
153  {
154  tmpDT = QDateTime::fromString(ts, "yyyyMM");
155  }
156  else if (ts.length() == 4)
157  {
158  tmpDT = QDateTime::fromString(ts, "yyyy");
159  }
160 
161  if (!tmpDT.isValid())
162  {
163  LOG(VB_GENERAL, LOG_ERR,
164  QString("Ignoring unknown timestamp format: %1")
165  .arg(ts));
166  return;
167  }
168 
169  if (split.size() > 1)
170  {
171  QString tmp = split[1].trimmed();
172 
173  // These shouldn't be required and they aren't ISO 8601 but the
174  // xmltv spec mentions these and just these so handle them just in
175  // case
176  if (tmp == "GMT" || tmp == "UTC")
177  tmp = "+0000";
178  else if (tmp == "BST")
179  tmp = "+0100";
180 
181  // While this seems like a hack, it's better than what was done before
182  QString isoDateString = QString("%1 %2").arg(tmpDT.toString(Qt::ISODate))
183  .arg(tmp);
184  dt = QDateTime::fromString(isoDateString, Qt::ISODate).toUTC();
185  }
186 
187  if (!dt.isValid())
188  {
189  static bool warned_once_on_implicit_utc = false;
190  if (!warned_once_on_implicit_utc)
191  {
192  LOG(VB_XMLTV, LOG_ERR, "No explicit time zone found, "
193  "guessing implicit UTC! Please consider enhancing "
194  "the guide source to provice explicit UTC or local "
195  "time instead.");
196  warned_once_on_implicit_utc = true;
197  }
198  dt = tmpDT;
199  }
200 
201  dt.setTimeSpec(Qt::UTC);
202 
204 }
205 
206 static void parseCredits(QDomElement &element, ProgInfo *pginfo)
207 {
208  for (QDomNode child = element.firstChild(); !child.isNull();
209  child = child.nextSibling())
210  {
211  QDomElement info = child.toElement();
212  if (!info.isNull())
213  pginfo->AddPerson(info.tagName(), getFirstText(info));
214  }
215 }
216 
217 static void parseVideo(QDomElement &element, ProgInfo *pginfo)
218 {
219  for (QDomNode child = element.firstChild(); !child.isNull();
220  child = child.nextSibling())
221  {
222  QDomElement info = child.toElement();
223  if (!info.isNull())
224  {
225  if (info.tagName() == "quality")
226  {
227  if (getFirstText(info) == "HDTV")
228  pginfo->videoProps |= VID_HDTV;
229  }
230  else if (info.tagName() == "aspect")
231  {
232  if (getFirstText(info) == "16:9")
233  pginfo->videoProps |= VID_WIDESCREEN;
234  }
235  }
236  }
237 }
238 
239 static void parseAudio(QDomElement &element, ProgInfo *pginfo)
240 {
241  for (QDomNode child = element.firstChild(); !child.isNull();
242  child = child.nextSibling())
243  {
244  QDomElement info = child.toElement();
245  if (!info.isNull())
246  {
247  if (info.tagName() == "stereo")
248  {
249  if (getFirstText(info) == "mono")
250  {
251  pginfo->audioProps |= AUD_MONO;
252  }
253  else if (getFirstText(info) == "stereo")
254  {
255  pginfo->audioProps |= AUD_STEREO;
256  }
257  else if (getFirstText(info) == "dolby" ||
258  getFirstText(info) == "dolby digital")
259  {
260  pginfo->audioProps |= AUD_DOLBY;
261  }
262  else if (getFirstText(info) == "surround")
263  {
264  pginfo->audioProps |= AUD_SURROUND;
265  }
266  }
267  }
268  }
269 }
270 
271 ProgInfo *XMLTVParser::parseProgram(QDomElement &element)
272 {
273  QString uniqueid, season, episode;
274  int dd_progid_done = 0;
275  ProgInfo *pginfo = new ProgInfo();
276 
277  QString text = element.attribute("start", "");
278  fromXMLTVDate(text, pginfo->starttime);
279  pginfo->startts = text;
280 
281  text = element.attribute("stop", "");
282  fromXMLTVDate(text, pginfo->endtime);
283  pginfo->endts = text;
284 
285  text = element.attribute("channel", "");
286  QStringList split = text.split(" ");
287 
288  pginfo->channel = split[0];
289 
290  text = element.attribute("clumpidx", "");
291  if (!text.isEmpty())
292  {
293  split = text.split('/');
294  pginfo->clumpidx = split[0];
295  pginfo->clumpmax = split[1];
296  }
297 
298  for (QDomNode child = element.firstChild(); !child.isNull();
299  child = child.nextSibling())
300  {
301  QDomElement info = child.toElement();
302  if (!info.isNull())
303  {
304  if (info.tagName() == "title")
305  {
306  if (info.attribute("lang") == "ja_JP")
307  {
308  pginfo->title = getFirstText(info);
309  }
310  else if (info.attribute("lang") == "ja_JP@kana")
311  {
312  pginfo->title_pronounce = getFirstText(info);
313  }
314  else if (pginfo->title.isEmpty())
315  {
316  pginfo->title = getFirstText(info);
317  }
318  }
319  else if (info.tagName() == "sub-title" &&
320  pginfo->subtitle.isEmpty())
321  {
322  pginfo->subtitle = getFirstText(info);
323  }
324  else if (info.tagName() == "desc" && pginfo->description.isEmpty())
325  {
326  pginfo->description = getFirstText(info);
327  }
328  else if (info.tagName() == "category")
329  {
330  const QString cat = getFirstText(info).toLower();
331 
332  if (ProgramInfo::kCategoryNone == pginfo->categoryType &&
334  {
336  }
337  else if (pginfo->category.isEmpty())
338  {
339  pginfo->category = cat;
340  }
341 
342  if (cat == QObject::tr("movie") || cat == QObject::tr("film"))
343  {
344  // Hack for tv_grab_uk_rt
346  }
347  }
348  else if (info.tagName() == "date" && !pginfo->airdate)
349  {
350  // Movie production year
351  QString date = getFirstText(info);
352  pginfo->airdate = date.left(4).toUInt();
353  }
354  else if (info.tagName() == "star-rating" && pginfo->stars.isEmpty())
355  {
356  QDomNodeList values = info.elementsByTagName("value");
357  QDomElement item;
358  QString stars, num, den;
359  float rating = 0.0;
360 
361  // Use the first rating to appear in the xml, this should be
362  // the most important one.
363  //
364  // Averaging is not a good idea here, any subsequent ratings
365  // are likely to represent that days recommended programmes
366  // which on a bad night could given to an average programme.
367  // In the case of uk_rt it's not unknown for a recommendation
368  // to be given to programmes which are 'so bad, you have to
369  // watch!'
370  item = values.item(0).toElement();
371  if (!item.isNull())
372  {
373  stars = getFirstText(item);
374  num = stars.section('/', 0, 0);
375  den = stars.section('/', 1, 1);
376  if (0.0 < den.toFloat())
377  rating = num.toFloat()/den.toFloat();
378  }
379 
380  pginfo->stars.setNum(rating);
381  }
382  else if (info.tagName() == "rating")
383  {
384  // again, the structure of ratings seems poorly represented
385  // in the XML. no idea what we'd do with multiple values.
386  QDomNodeList values = info.elementsByTagName("value");
387  QDomElement item = values.item(0).toElement();
388  if (item.isNull())
389  continue;
391  rating.system = info.attribute("system", "");
392  rating.rating = getFirstText(item);
393  pginfo->ratings.append(rating);
394  }
395  else if (info.tagName() == "previously-shown")
396  {
397  pginfo->previouslyshown = true;
398 
399  QString prevdate = info.attribute("start");
400  if (!prevdate.isEmpty())
401  {
402  QDateTime date;
403  fromXMLTVDate(prevdate, date);
404  pginfo->originalairdate = date.date();
405  }
406  }
407  else if (info.tagName() == "credits")
408  {
409  parseCredits(info, pginfo);
410  }
411  else if (info.tagName() == "subtitles")
412  {
413  if (info.attribute("type") == "teletext")
414  pginfo->subtitleType |= SUB_NORMAL;
415  else if (info.attribute("type") == "onscreen")
416  pginfo->subtitleType |= SUB_ONSCREEN;
417  else if (info.attribute("type") == "deaf-signed")
418  pginfo->subtitleType |= SUB_SIGNED;
419  }
420  else if (info.tagName() == "audio")
421  {
422  parseAudio(info, pginfo);
423  }
424  else if (info.tagName() == "video")
425  {
426  parseVideo(info, pginfo);
427  }
428  else if (info.tagName() == "episode-num")
429  {
430  if (info.attribute("system") == "dd_progid")
431  {
432  QString episodenum(getFirstText(info));
433  // if this field includes a dot, strip it out
434  int idx = episodenum.indexOf('.');
435  if (idx != -1)
436  episodenum.remove(idx, 1);
437  pginfo->programId = episodenum;
438  dd_progid_done = 1;
439  }
440  else if (info.attribute("system") == "xmltv_ns")
441  {
442  int tmp;
443  QString episodenum(getFirstText(info));
444  episode = episodenum.section('.',1,1);
445  episode = episode.section('/',0,0).trimmed();
446  season = episodenum.section('.',0,0).trimmed();
447  QString part(episodenum.section('.',2,2));
448  QString partnumber(part.section('/',0,0).trimmed());
449  QString parttotal(part.section('/',1,1).trimmed());
450 
452 
453  if (!episode.isEmpty())
454  {
455  tmp = episode.toInt() + 1;
456  episode = QString::number(tmp);
457  pginfo->syndicatedepisodenumber = QString('E' + episode);
458  }
459 
460  if (!season.isEmpty())
461  {
462  tmp = season.toInt() + 1;
463  season = QString::number(tmp);
464  pginfo->syndicatedepisodenumber.append(QString('S' + season));
465  }
466 
467  uint partno = 0;
468  if (!partnumber.isEmpty())
469  {
470  bool ok;
471  partno = partnumber.toUInt(&ok) + 1;
472  partno = (ok) ? partno : 0;
473  }
474 
475  if (!parttotal.isEmpty() && partno > 0)
476  {
477  bool ok;
478  uint partto = parttotal.toUInt(&ok);
479  if (ok && partnumber <= parttotal)
480  {
481  pginfo->parttotal = partto;
482  pginfo->partnumber = partno;
483  }
484  }
485  }
486  else if (info.attribute("system") == "onscreen" &&
487  pginfo->subtitle.isEmpty())
488  {
490  pginfo->subtitle = getFirstText(info);
491  }
492  }
493  }
494  }
495 
496  if (pginfo->category.isEmpty() &&
499 
500  if (!pginfo->airdate)
501  pginfo->airdate = current_year;
502 
503  /* Let's build ourself a programid */
504  QString programid;
505 
507  programid = "MV";
508  else if (ProgramInfo::kCategorySeries == pginfo->categoryType)
509  programid = "EP";
510  else if (ProgramInfo::kCategorySports == pginfo->categoryType)
511  programid = "SP";
512  else
513  programid = "SH";
514 
515  if (!uniqueid.isEmpty()) // we already have a unique id ready for use
516  programid.append(uniqueid);
517  else
518  {
519  QString seriesid = QString::number(ELFHash(pginfo->title.toUtf8()));
520  pginfo->seriesId = seriesid;
521  programid.append(seriesid);
522 
523  if (!episode.isEmpty() && !season.isEmpty())
524  {
525  /* Append unpadded episode and season number to the seriesid (to
526  maintain consistency with historical encoding), but limit the
527  season number representation to a single base-36 character to
528  ensure unique programid generation. */
529  int season_int = season.toInt();
530  if (season_int > 35)
531  {
532  // Cannot represent season as a single base-36 character, so
533  // remove the programid and fall back to normal dup matching.
535  programid.clear();
536  }
537  else
538  {
539  programid.append(episode);
540  programid.append(QString::number(season_int, 36));
541  if (pginfo->partnumber && pginfo->parttotal)
542  {
543  programid += QString::number(pginfo->partnumber);
544  programid += QString::number(pginfo->parttotal);
545  }
546  }
547  }
548  else
549  {
550  /* No ep/season info? Well then remove the programid and rely on
551  normal dupchecking methods instead. */
553  programid.clear();
554  }
555  }
556  if (dd_progid_done == 0)
557  pginfo->programId = programid;
558 
559  return pginfo;
560 }
561 
563  QString filename, ChannelInfoList *chanlist,
564  QMap<QString, QList<ProgInfo> > *proglist)
565 {
566  QDomDocument doc;
567  QFile f;
568 
569  if (!dash_open(f, filename, QIODevice::ReadOnly))
570  {
571  LOG(VB_GENERAL, LOG_ERR,
572  QString("Error unable to open '%1' for reading.") .arg(filename));
573  return false;
574  }
575 
576  QString errorMsg = "unknown";
577  int errorLine = 0;
578  int errorColumn = 0;
579 
580  if (!doc.setContent(&f, &errorMsg, &errorLine, &errorColumn))
581  {
582  LOG(VB_GENERAL, LOG_ERR, QString("Error in %1:%2: %3")
583  .arg(errorLine).arg(errorColumn).arg(errorMsg));
584 
585  f.close();
586  return true;
587  }
588 
589  f.close();
590 
591  QDomElement docElem = doc.documentElement();
592 
593  QUrl baseUrl(docElem.attribute("source-data-url", ""));
594  //QUrl sourceUrl(docElem.attribute("source-info-url", ""));
595 
596  QString aggregatedTitle;
597  QString aggregatedDesc;
598 
599  QDomNode n = docElem.firstChild();
600  while (!n.isNull())
601  {
602  QDomElement e = n.toElement();
603  if (!e.isNull())
604  {
605  if (e.tagName() == "channel")
606  {
607  ChannelInfo *chinfo = parseChannel(e, baseUrl);
608  if (!chinfo->xmltvid.isEmpty())
609  chanlist->push_back(*chinfo);
610  delete chinfo;
611  }
612  else if (e.tagName() == "programme")
613  {
614  ProgInfo *pginfo = parseProgram(e);
615 
616  if (pginfo->startts == pginfo->endts)
617  {
618  LOG(VB_GENERAL, LOG_WARNING, QString("Invalid programme (%1), "
619  "identical start and end "
620  "times, skipping")
621  .arg(pginfo->title));
622  }
623  else
624  {
625  if (pginfo->clumpidx.isEmpty())
626  (*proglist)[pginfo->channel].push_back(*pginfo);
627  else
628  {
629  /* append all titles/descriptions from one clump */
630  if (pginfo->clumpidx.toInt() == 0)
631  {
632  aggregatedTitle.clear();
633  aggregatedDesc.clear();
634  }
635 
636  if (!pginfo->title.isEmpty())
637  {
638  if (!aggregatedTitle.isEmpty())
639  aggregatedTitle.append(" | ");
640  aggregatedTitle.append(pginfo->title);
641  }
642 
643  if (!pginfo->description.isEmpty())
644  {
645  if (!aggregatedDesc.isEmpty())
646  aggregatedDesc.append(" | ");
647  aggregatedDesc.append(pginfo->description);
648  }
649  if (pginfo->clumpidx.toInt() ==
650  pginfo->clumpmax.toInt() - 1)
651  {
652  pginfo->title = aggregatedTitle;
653  pginfo->description = aggregatedDesc;
654  (*proglist)[pginfo->channel].push_back(*pginfo);
655  }
656  }
657  }
658  delete pginfo;
659  }
660  }
661  n = n.nextSibling();
662  }
663 
664  return true;
665 }
666