| 127 | |
| 128 | // |
| 129 | // This is an implementation of JaroWinkler : http://en.wikipedia.org/wiki/Jaro-Winkler |
| 130 | // |
| 131 | // Jaro is working with a formula where we look to the: |
| 132 | // Number of matching characters : characters that are both in t1 & t2 with an index position diff < (max(t1.length, t2.length)/2) -1 |
| 133 | // Number of transpose characters : number of characters that are not in the same place in t1 & t2 |
| 134 | // JaroWinkler is an extension of Jaro where we apply a coefficient to the start of the string |
| 135 | // |
| 136 | // the code is transpose from : http://www.cppfrance.com/codes/DISTANCE-JARO-WINKLER_49753.aspx |
| 137 | // |
| 138 | |
| 139 | QString TrouverMatches(const QString &txt,int bl[]) |
| 140 | { |
| 141 | QString res; |
| 142 | QChar ctmp='a'; |
| 143 | |
| 144 | int i=0; |
| 145 | while (i<txt.length()) |
| 146 | { |
| 147 | ctmp=txt[i]; |
| 148 | if (bl[i]==true) |
| 149 | { |
| 150 | res.append(ctmp); |
| 151 | } |
| 152 | i++; |
| 153 | } |
| 154 | return res; |
| 155 | } |
| 156 | |
| 157 | double JaroWinkler(const QString &t1,const QString &t2) |
| 158 | { |
| 159 | int ecartMax,l1,l2,compteMatching,compteTransposition,longueurPrefix,i,j; |
| 160 | QString t1Matche,t2Matche; |
| 161 | double distanceJaro; |
| 162 | |
| 163 | l1=t1.length(); |
| 164 | l2=t2.length(); |
| 165 | ecartMax=(int)(((double)max(l1,l2))/2.00); |
| 166 | compteMatching=0; |
| 167 | |
| 168 | VERBOSE(VB_XMLTV, QString("input (%1) (%2)").arg(t1).arg(t2)); |
| 169 | VERBOSE(VB_XMLTV, QString("ecartMax (%1)").arg(ecartMax)); |
| 170 | |
| 171 | int b1[l1+2],b2[l2+2]; |
| 172 | |
| 173 | for (i=0;i<l1;i++) |
| 174 | b1[i]=false; |
| 175 | for (i=0;i<l2;i++) |
| 176 | b2[i]=false; |
| 177 | |
| 178 | for (i=0;i<l1;i++) |
| 179 | { |
| 180 | for (j=max(i-ecartMax,0);j<min(i+ecartMax,l2);j++) |
| 181 | { |
| 182 | if (t1[i]==t2[j]) |
| 183 | { |
| 184 | b1[i]=true; |
| 185 | b2[i]=true; |
| 186 | compteMatching++; |
| 187 | break; |
| 188 | } |
| 189 | } |
| 190 | } |
| 191 | |
| 192 | VERBOSE(VB_XMLTV, QString("compteMatching (%1)").arg(compteMatching)); |
| 193 | if (compteMatching==0) |
| 194 | return 0.0; |
| 195 | |
| 196 | t1Matche=TrouverMatches(t1,b1); |
| 197 | t2Matche=TrouverMatches(t2,b2); |
| 198 | |
| 199 | VERBOSE(VB_XMLTV, QString("t1Matche (%1)").arg(t1Matche)); |
| 200 | VERBOSE(VB_XMLTV, QString("t2Matche (%1)").arg(t2Matche)); |
| 201 | |
| 202 | compteTransposition=0; |
| 203 | if (strcmp(t1Matche,t2Matche)!=0) |
| 204 | { |
| 205 | for (i=0;i<t1Matche.length();i++) |
| 206 | if (t1Matche[i]!=t2Matche[i]) |
| 207 | compteTransposition++; |
| 208 | } |
| 209 | else |
| 210 | compteTransposition=0; |
| 211 | |
| 212 | |
| 213 | VERBOSE(VB_XMLTV, QString("compteTransposition (%1)").arg(compteTransposition)); |
| 214 | |
| 215 | distanceJaro=(((double)compteMatching/l1)+((double)compteMatching/l2)+((compteMatching-compteTransposition/2.0)/compteMatching))/3.0; |
| 216 | |
| 217 | VERBOSE(VB_XMLTV, QString("dist JARO: (%1)").arg(distanceJaro)); |
| 218 | longueurPrefix=0; |
| 219 | for (i=0;i<min(3,min(l1,l2))+1;i++) //longueur max : 4 |
| 220 | { |
| 221 | if (i<t1.length() && i<t2.length() && t1[i]==t2[i]) |
| 222 | longueurPrefix++; |
| 223 | else |
| 224 | break; |
| 225 | } |
| 226 | |
| 227 | VERBOSE(VB_XMLTV, QString("longueurPrefix (%1)").arg(longueurPrefix)); |
| 228 | |
| 229 | return distanceJaro+(longueurPrefix*0.1*(1-distanceJaro)); |
| 230 | } |
| 231 | |
| 232 | |
| 233 | ChanInfo* _findChanMatch(QList<ChanInfo> *oldchanlist, ChanInfo chan) |
| 234 | { |
| 235 | QList<ChanInfo>::iterator i = oldchanlist->begin(); |
| 236 | QString chanNameUp = chan.name.toUpper(); |
| 237 | double maxJaro = 0.0; |
| 238 | ChanInfo *maxJaroChanInfo; |
| 239 | |
| 240 | VERBOSE(VB_XMLTV,QString("============================================")); |
| 241 | VERBOSE(VB_XMLTV,QString("searching for (%1)").arg(chanNameUp)); |
| 242 | |
| 243 | for (; i != oldchanlist->end(); i++) |
| 244 | { |
| 245 | double d1 = 0; |
| 246 | double d2 = 0; |
| 247 | d1 = JaroWinkler(i->name, chanNameUp); |
| 248 | d2 = JaroWinkler(i->callsign, chanNameUp); |
| 249 | |
| 250 | if (d1>0.9 && d2>0.9) |
| 251 | { |
| 252 | VERBOSE(VB_XMLTV, |
| 253 | QString("Looking for a match (%1), (%2), (%3) <==> (%4) = d1 (%5), d2 (%6)") |
| 254 | .arg((*i).name).arg((*i).callsign).arg((*i).chanstr).arg(chanNameUp).arg(d1).arg(d2)); |
| 255 | } |
| 256 | |
| 257 | // Look to the best match |
| 258 | if (d1>0.5 && d1>maxJaro) |
| 259 | { |
| 260 | maxJaro = d1; |
| 261 | maxJaroChanInfo = &(*i); |
| 262 | } |
| 263 | |
| 264 | if (d2>0.5 && d2>maxJaro) |
| 265 | { |
| 266 | maxJaro = d2; |
| 267 | maxJaroChanInfo = &(*i); |
| 268 | } |
| 269 | } |
| 270 | |
| 271 | // only keep match with score > 0.93, under may be a false positive |
| 272 | if (maxJaro>0.93) |
| 273 | { |
| 274 | VERBOSE(VB_XMLTV, |
| 275 | QString("match is (%1), (%2), (%3) <==> (%4) = d1 (%5)") |
| 276 | .arg(maxJaroChanInfo->name).arg(maxJaroChanInfo->callsign).arg(maxJaroChanInfo->chanstr).arg(chanNameUp).arg(maxJaro)); |
| 277 | return maxJaroChanInfo; |
| 278 | } |
| 279 | else |
| 280 | { |
| 281 | VERBOSE(VB_XMLTV, |
| 282 | QString("no match")); |
| 283 | |
| 284 | return NULL; |
| 285 | } |
| 286 | } |
| 287 | |