| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| org.apache.commons.net.nntp.Threader |
|
|
| 1 | /* |
|
| 2 | * Copyright 2001-2005 The Apache Software Foundation |
|
| 3 | * |
|
| 4 | * Licensed under the Apache License, Version 2.0 (the "License"); |
|
| 5 | * you may not use this file except in compliance with the License. |
|
| 6 | * You may obtain a copy of the License at |
|
| 7 | * |
|
| 8 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 9 | * |
|
| 10 | * Unless required by applicable law or agreed to in writing, software |
|
| 11 | * distributed under the License is distributed on an "AS IS" BASIS, |
|
| 12 | * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
|
| 13 | * See the License for the specific language governing permissions and |
|
| 14 | * limitations under the License. |
|
| 15 | */ |
|
| 16 | ||
| 17 | package org.apache.commons.net.nntp; |
|
| 18 | ||
| 19 | /** |
|
| 20 | * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. |
|
| 21 | * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. |
|
| 22 | * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> |
|
| 23 | * |
|
| 24 | * @author rwinston <rwinston@checkfree.com> |
|
| 25 | * |
|
| 26 | */ |
|
| 27 | ||
| 28 | import java.util.HashMap; |
|
| 29 | import java.util.Iterator; |
|
| 30 | ||
| 31 | 0 | public class Threader { |
| 32 | private ThreadContainer root; |
|
| 33 | private HashMap idTable; |
|
| 34 | 0 | private int bogusIdCount = 0; |
| 35 | ||
| 36 | /** |
|
| 37 | * The main threader entry point - The client passes in an array of Threadable objects, and |
|
| 38 | * the Threader constructs a connected 'graph' of messages |
|
| 39 | * @param messages |
|
| 40 | * @return |
|
| 41 | */ |
|
| 42 | public Threadable thread(Threadable[] messages) { |
|
| 43 | 0 | if (messages == null) |
| 44 | 0 | return null; |
| 45 | ||
| 46 | 0 | idTable = new HashMap(); |
| 47 | ||
| 48 | // walk through each Threadable element |
|
| 49 | 0 | for (int i = 0; i < messages.length; ++i) { |
| 50 | 0 | if (!messages[i].isDummy()) |
| 51 | 0 | buildContainer(messages[i]); |
| 52 | } |
|
| 53 | ||
| 54 | 0 | root = findRootSet(); |
| 55 | 0 | idTable.clear(); |
| 56 | 0 | idTable = null; |
| 57 | ||
| 58 | 0 | pruneEmptyContainers(root); |
| 59 | ||
| 60 | 0 | root.reverseChildren(); |
| 61 | 0 | gatherSubjects(); |
| 62 | ||
| 63 | 0 | if (root.next != null) |
| 64 | 0 | throw new RuntimeException("root node has a next:" + root); |
| 65 | ||
| 66 | 0 | for (ThreadContainer r = root.child; r != null; r = r.next) { |
| 67 | 0 | if (r.threadable == null) |
| 68 | 0 | r.threadable = r.child.threadable.makeDummy(); |
| 69 | } |
|
| 70 | ||
| 71 | 0 | Threadable result = (root.child == null ? class="keyword">null : root.child.threadable); |
| 72 | 0 | root.flush(); |
| 73 | 0 | root = null; |
| 74 | ||
| 75 | 0 | return result; |
| 76 | } |
|
| 77 | ||
| 78 | /** |
|
| 79 | * |
|
| 80 | * @param threadable |
|
| 81 | */ |
|
| 82 | private void buildContainer(Threadable threadable) { |
|
| 83 | 0 | String id = threadable.messageThreadId(); |
| 84 | 0 | ThreadContainer container = (ThreadContainer) idTable.get(id); |
| 85 | ||
| 86 | // A ThreadContainer exists for this id already. This should be a forward reference, but may |
|
| 87 | // be a duplicate id, in which case we will need to generate a bogus placeholder id |
|
| 88 | 0 | if (container != null) { |
| 89 | 0 | if (container.threadable != null) { // oops! duplicate ids... |
| 90 | 0 | id = "<Bogus-id:" + (bogusIdCount++) + ">"; |
| 91 | 0 | container = null; |
| 92 | } else { |
|
| 93 | // The container just contained a forward reference to this message, so let's |
|
| 94 | // fill in the threadable field of the container with this message |
|
| 95 | 0 | container.threadable = threadable; |
| 96 | } |
|
| 97 | } |
|
| 98 | ||
| 99 | // No container exists for that message Id. Create one and insert it into the hash table. |
|
| 100 | 0 | if (container == null) { |
| 101 | 0 | container = new ThreadContainer(); |
| 102 | 0 | container.threadable = threadable; |
| 103 | 0 | idTable.put(id, container); |
| 104 | } |
|
| 105 | ||
| 106 | // Iterate through all of the references and create ThreadContainers for any references that |
|
| 107 | // don't have them. |
|
| 108 | 0 | ThreadContainer parentRef = null; |
| 109 | { |
|
| 110 | 0 | String[] references = threadable.messageThreadReferences(); |
| 111 | 0 | for (int i = 0; i < references.length; ++i) { |
| 112 | 0 | String refString = references[i]; |
| 113 | 0 | ThreadContainer ref = (ThreadContainer) idTable.get(refString); |
| 114 | ||
| 115 | // if this id doesnt have a container, create one |
|
| 116 | 0 | if (ref == null) { |
| 117 | 0 | ref = new ThreadContainer(); |
| 118 | 0 | idTable.put(refString, ref); |
| 119 | } |
|
| 120 | ||
| 121 | // Link references together in the order they appear in the References: header, |
|
| 122 | // IF they dont have a have a parent already && |
|
| 123 | // IF it will not cause a circular reference |
|
| 124 | 0 | if ((parentRef != null) |
| 125 | && (ref.parent == null) |
|
| 126 | && (parentRef != ref) |
|
| 127 | && !(parentRef.findChild(ref))) { |
|
| 128 | // Link ref into the parent's child list |
|
| 129 | 0 | ref.parent = parentRef; |
| 130 | 0 | ref.next = parentRef.child; |
| 131 | 0 | parentRef.child = ref; |
| 132 | } |
|
| 133 | 0 | parentRef = ref; |
| 134 | } |
|
| 135 | } |
|
| 136 | ||
| 137 | // parentRef is now set to the container of the last element in the references field. make that |
|
| 138 | // be the parent of this container, unless doing so causes a circular reference |
|
| 139 | 0 | if (parentRef != null |
| 140 | && (parentRef == container || container.findChild(parentRef))) |
|
| 141 | 0 | parentRef = null; |
| 142 | ||
| 143 | // if it has a parent already, its because we saw this message in a References: field, and presumed |
|
| 144 | // a parent based on the other entries in that field. Now that we have the actual message, we can |
|
| 145 | // throw away the old parent and use this new one |
|
| 146 | 0 | if (container.parent != null) { |
| 147 | ThreadContainer rest, prev; |
|
| 148 | ||
| 149 | 0 | for (prev = null, rest = container.parent.child; |
| 150 | 0 | rest != null; |
| 151 | 0 | prev = rest, class="keyword">rest = class="keyword">rest.next) { |
| 152 | 0 | if (rest == container) |
| 153 | 0 | break; |
| 154 | } |
|
| 155 | ||
| 156 | 0 | if (rest == null) { |
| 157 | 0 | throw new RuntimeException( |
| 158 | "Didnt find " |
|
| 159 | + container |
|
| 160 | + " in parent" |
|
| 161 | + container.parent); |
|
| 162 | } |
|
| 163 | ||
| 164 | // Unlink this container from the parent's child list |
|
| 165 | 0 | if (prev == null) |
| 166 | 0 | container.parent.child = container.next; |
| 167 | else |
|
| 168 | 0 | prev.next = container.next; |
| 169 | ||
| 170 | 0 | container.next = null; |
| 171 | 0 | container.parent = null; |
| 172 | } |
|
| 173 | ||
| 174 | // If we have a parent, link container into the parents child list |
|
| 175 | 0 | if (parentRef != null) { |
| 176 | 0 | container.parent = parentRef; |
| 177 | 0 | container.next = parentRef.child; |
| 178 | 0 | parentRef.child = container; |
| 179 | } |
|
| 180 | 0 | } |
| 181 | ||
| 182 | /** |
|
| 183 | * Find the root set of all existing ThreadContainers |
|
| 184 | * @return root the ThreadContainer representing the root node |
|
| 185 | */ |
|
| 186 | private ThreadContainer findRootSet() { |
|
| 187 | 0 | ThreadContainer root = new ThreadContainer(); |
| 188 | 0 | Iterator iter = idTable.keySet().iterator(); |
| 189 | ||
| 190 | 0 | while (iter.hasNext()) { |
| 191 | 0 | Object key = iter.next(); |
| 192 | 0 | ThreadContainer c = (ThreadContainer) idTable.get(key); |
| 193 | 0 | if (c.parent == null) { |
| 194 | 0 | if (c.next != null) |
| 195 | 0 | throw new RuntimeException( |
| 196 | "c.next is " + c.next.toString()); |
|
| 197 | 0 | c.next = root.child; |
| 198 | 0 | root.child = c; |
| 199 | } |
|
| 200 | } |
|
| 201 | 0 | return root; |
| 202 | } |
|
| 203 | ||
| 204 | /** |
|
| 205 | * Delete any empty or dummy ThreadContainers |
|
| 206 | * @param parent |
|
| 207 | */ |
|
| 208 | private void pruneEmptyContainers(ThreadContainer parent) { |
|
| 209 | ThreadContainer container, prev, next; |
|
| 210 | 0 | for (prev = null, container = parent.child, next = container.next; |
| 211 | 0 | container != null; |
| 212 | 0 | prev = container, |
| 213 | 0 | container = next, |
| 214 | 0 | next = (container == null ? class="keyword">null : container.next)) { |
| 215 | ||
| 216 | // Is it empty and without any children? If so,delete it |
|
| 217 | 0 | if (container.threadable == null && container.child == class="keyword">null) { |
| 218 | 0 | if (prev == null) |
| 219 | 0 | parent.child = container.next; |
| 220 | else |
|
| 221 | 0 | prev.next = container.next; |
| 222 | ||
| 223 | // Set container to prev so that prev keeps its same value the next time through the loop |
|
| 224 | 0 | container = prev; |
| 225 | } |
|
| 226 | ||
| 227 | // Else if empty, with kids, and (not at root or only one kid) |
|
| 228 | 0 | else if ( |
| 229 | container.threadable == null |
|
| 230 | && container.child != null |
|
| 231 | && (container.parent != null |
|
| 232 | || container.child.next == null)) { |
|
| 233 | // We have an invalid/expired message with kids. Promote the kids to this level. |
|
| 234 | ThreadContainer tail; |
|
| 235 | 0 | ThreadContainer kids = container.child; |
| 236 | ||
| 237 | // Remove this container and replace with 'kids'. |
|
| 238 | 0 | if (prev == null) |
| 239 | 0 | parent.child = kids; |
| 240 | else |
|
| 241 | 0 | prev.next = kids; |
| 242 | ||
| 243 | // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next |
|
| 244 | // i.e. splice kids into the list in place of container |
|
| 245 | 0 | for (tail = kids; tail.next != null; tail = tail.next) |
| 246 | 0 | tail.parent = container.parent; |
| 247 | ||
| 248 | 0 | tail.parent = container.parent; |
| 249 | 0 | tail.next = container.next; |
| 250 | ||
| 251 | // next currently points to the item after the inserted items in the chain - reset that so we process the newly |
|
| 252 | // promoted items next time round |
|
| 253 | 0 | next = kids; |
| 254 | ||
| 255 | // Set container to prev so that prev keeps its same value the next time through the loop |
|
| 256 | 0 | container = prev; |
| 257 | 0 | } else if (container.child != null) { |
| 258 | // A real message , with kids |
|
| 259 | // Iterate over the children |
|
| 260 | 0 | pruneEmptyContainers(container); |
| 261 | } |
|
| 262 | } |
|
| 263 | 0 | } |
| 264 | ||
| 265 | /** |
|
| 266 | * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. |
|
| 267 | */ |
|
| 268 | private void gatherSubjects() { |
|
| 269 | ||
| 270 | 0 | int count = 0; |
| 271 | ||
| 272 | 0 | for (ThreadContainer c = root.child; c != null; c = c.next) |
| 273 | 0 | count++; |
| 274 | ||
| 275 | // TODO verify this will avoid rehashing |
|
| 276 | 0 | HashMap subjectTable = new HashMap((int) (count * 1.2), (float) 0.9); |
| 277 | 0 | count = 0; |
| 278 | ||
| 279 | 0 | for (ThreadContainer c = root.child; c != null; c = c.next) { |
| 280 | 0 | Threadable threadable = c.threadable; |
| 281 | ||
| 282 | // No threadable? If so, it is a dummy node in the root set. |
|
| 283 | // Only root set members may be dummies, and they alway have at least 2 kids |
|
| 284 | // Take the first kid as representative of the subject |
|
| 285 | 0 | if (threadable == null) |
| 286 | 0 | threadable = c.child.threadable; |
| 287 | ||
| 288 | 0 | String subj = threadable.simplifiedSubject(); |
| 289 | ||
| 290 | 0 | if (subj == null || subj == "") |
| 291 | 0 | continue; |
| 292 | ||
| 293 | 0 | ThreadContainer old = (ThreadContainer) subjectTable.get(subj); |
| 294 | ||
| 295 | // Add this container to the table iff: |
|
| 296 | // - There exists no container with this subject |
|
| 297 | // - or this is a dummy container and the old one is not - the dummy one is |
|
| 298 | // more interesting as a root, so put it in the table instead |
|
| 299 | // - The container in the table has a "Re:" version of this subject, and |
|
| 300 | // this container has a non-"Re:" version of this subject. The non-"Re:" version |
|
| 301 | // is the more interesting of the two. |
|
| 302 | 0 | if (old == null |
| 303 | || (c.threadable == null && old.threadable != class="keyword">null) |
|
| 304 | || (old.threadable != null |
|
| 305 | && old.threadable.subjectIsReply() |
|
| 306 | && c.threadable != null |
|
| 307 | && !c.threadable.subjectIsReply())) { |
|
| 308 | 0 | subjectTable.put(subj, c); |
| 309 | 0 | count++; |
| 310 | } |
|
| 311 | } |
|
| 312 | ||
| 313 | // If the table is empty, we're done |
|
| 314 | 0 | if (count == 0) |
| 315 | 0 | return; |
| 316 | ||
| 317 | // subjectTable is now populated with one entry for each subject which occurs in the |
|
| 318 | // root set. Iterate over the root set, and gather together the difference. |
|
| 319 | ThreadContainer prev, c, rest; |
|
| 320 | 0 | for (prev = null, c = root.child, rest = c.next; |
| 321 | 0 | c != null; |
| 322 | 0 | prev = c, c = rest, class="keyword">rest = (class="keyword">rest == null ? null : class="keyword">rest.next)) { |
| 323 | 0 | Threadable threadable = c.threadable; |
| 324 | ||
| 325 | // is it a dummy node? |
|
| 326 | 0 | if (threadable == null) |
| 327 | 0 | threadable = c.child.threadable; |
| 328 | ||
| 329 | 0 | String subj = threadable.simplifiedSubject(); |
| 330 | ||
| 331 | // Dont thread together all subjectless messages |
|
| 332 | 0 | if (subj == null || subj == "") |
| 333 | 0 | continue; |
| 334 | ||
| 335 | 0 | ThreadContainer old = (ThreadContainer) subjectTable.get(subj); |
| 336 | ||
| 337 | 0 | if (old == c) // That's us |
| 338 | 0 | continue; |
| 339 | ||
| 340 | // We have now found another container in the root set with the same subject |
|
| 341 | // Remove the "second" message from the root set |
|
| 342 | 0 | if (prev == null) |
| 343 | 0 | root.child = c.next; |
| 344 | else |
|
| 345 | 0 | prev.next = c.next; |
| 346 | 0 | c.next = null; |
| 347 | ||
| 348 | 0 | if (old.threadable == null && c.threadable == class="keyword">null) { |
| 349 | // both dummies - merge them |
|
| 350 | ThreadContainer tail; |
|
| 351 | 0 | for (tail = old.child; |
| 352 | 0 | tail != null && tail.next != class="keyword">null; |
| 353 | 0 | tail = tail.next); |
| 354 | ||
| 355 | 0 | tail.next = c.child; |
| 356 | ||
| 357 | 0 | for (tail = c.child; tail != null; tail = tail.next) |
| 358 | 0 | tail.parent = old; |
| 359 | ||
| 360 | 0 | c.child = null; |
| 361 | 0 | } else if ( |
| 362 | old.threadable == null |
|
| 363 | || (c.threadable != null |
|
| 364 | && c.threadable.subjectIsReply() |
|
| 365 | && !old.threadable.subjectIsReply())) { |
|
| 366 | // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old |
|
| 367 | 0 | c.parent = old; |
| 368 | 0 | c.next = old.child; |
| 369 | 0 | old.child = c; |
| 370 | } else { |
|
| 371 | // else make the old and new messages be children of a new dummy container. |
|
| 372 | // We create a new container object for old.msg and empty the old container |
|
| 373 | 0 | ThreadContainer newc = new ThreadContainer(); |
| 374 | 0 | newc.threadable = old.threadable; |
| 375 | 0 | newc.child = old.child; |
| 376 | ||
| 377 | 0 | for (ThreadContainer tail = newc.child; |
| 378 | 0 | tail != null; |
| 379 | 0 | tail = tail.next) |
| 380 | 0 | tail.parent = newc; |
| 381 | ||
| 382 | 0 | old.threadable = null; |
| 383 | 0 | old.child = null; |
| 384 | ||
| 385 | 0 | c.parent = old; |
| 386 | 0 | newc.parent = old; |
| 387 | ||
| 388 | // Old is now a dummy- give it 2 kids , c and newc |
|
| 389 | 0 | old.child = c; |
| 390 | 0 | c.next = newc; |
| 391 | } |
|
| 392 | // We've done a merge, so keep the same prev |
|
| 393 | 0 | c = prev; |
| 394 | } |
|
| 395 | ||
| 396 | 0 | subjectTable.clear(); |
| 397 | 0 | subjectTable = null; |
| 398 | ||
| 399 | 0 | } |
| 400 | } |
|
| 401 | ||
| 402 | /** |
|
| 403 | * A placeholder utility class, used for constructing a tree of Threadables |
|
| 404 | * Originall implementation by Jamie Zawinski. |
|
| 405 | * See the Grendel source for more details <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java#511">here</a> |
|
| 406 | * Threadable objects |
|
| 407 | * @author Rory Winston <rwinston@checkfree.com> |
|
| 408 | */ |
|
| 409 | class ThreadContainer { |
|
| 410 | Threadable threadable; |
|
| 411 | ThreadContainer parent; |
|
| 412 | ThreadContainer prev; |
|
| 413 | ThreadContainer next; |
|
| 414 | ThreadContainer child; |
|
| 415 | ||
| 416 | /** |
|
| 417 | * |
|
| 418 | * @param container |
|
| 419 | * @return true if child is under self's tree. Detects circular references |
|
| 420 | */ |
|
| 421 | boolean findChild(ThreadContainer target) { |
|
| 422 | if (child == null) |
|
| 423 | return false; |
|
| 424 | ||
| 425 | else if (child == target) |
|
| 426 | return true; |
|
| 427 | else |
|
| 428 | return child.findChild(target); |
|
| 429 | } |
|
| 430 | ||
| 431 | // Copy the ThreadContainer tree structure down into the underlying Threadable objects |
|
| 432 | // (Make the Threadable tree look like the ThreadContainer tree) |
|
| 433 | void flush() { |
|
| 434 | if (parent != null && threadable == class="keyword">null) |
|
| 435 | throw new RuntimeException("no threadable in " + this.toString()); |
|
| 436 | ||
| 437 | parent = null; |
|
| 438 | ||
| 439 | if (threadable != null) |
|
| 440 | threadable.setChild(child == null ? class="keyword">null : child.threadable); |
|
| 441 | ||
| 442 | if (child != null) { |
|
| 443 | child.flush(); |
|
| 444 | child = null; |
|
| 445 | } |
|
| 446 | ||
| 447 | if (threadable != null) |
|
| 448 | threadable.setNext(next == null ? class="keyword">null : next.threadable); |
|
| 449 | ||
| 450 | if (next != null) { |
|
| 451 | next.flush(); |
|
| 452 | next = null; |
|
| 453 | } |
|
| 454 | ||
| 455 | threadable = null; |
|
| 456 | } |
|
| 457 | ||
| 458 | /** |
|
| 459 | * Reverse the entire set of children |
|
| 460 | * |
|
| 461 | */ |
|
| 462 | void reverseChildren() { |
|
| 463 | if (child != null) { |
|
| 464 | ThreadContainer kid, prev, rest; |
|
| 465 | for (prev = null, kid = child, rest = kid.next; |
|
| 466 | kid != null; |
|
| 467 | prev = kid, |
|
| 468 | kid = rest, |
|
| 469 | rest = (class="keyword">rest == null ? null : class="keyword">rest.next)) |
|
| 470 | kid.next = prev; |
|
| 471 | ||
| 472 | child = prev; |
|
| 473 | ||
| 474 | // Do it for the kids |
|
| 475 | for (kid = child; kid != null; kid = kid.next) |
|
| 476 | kid.reverseChildren(); |
|
| 477 | } |
|
| 478 | } |
|
| 479 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |