001 /*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements. See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License. You may obtain a copy of the License at
008 *
009 * http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018
019 package org.apache.commons.net.nntp;
020
021 /**
022 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
023 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
024 * For his Java implementation, see <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
025 *
026 * @author rwinston <rwinston@checkfree.com>
027 *
028 */
029
030 import java.util.HashMap;
031 import java.util.Iterator;
032 import java.util.List;
033
034 public class Threader {
035 private ThreadContainer root;
036 private HashMap<String,ThreadContainer> idTable;
037 private int bogusIdCount = 0;
038
039 /**
040 * The client passes in a list of Threadable objects, and
041 * the Threader constructs a connected 'graph' of messages
042 * @param messages list of messages to thread
043 * @return null if messages == null or root.child == null
044 * @since 2.2
045 */
046 public Threadable thread(List<? extends Threadable> messages) {
047 return thread((Iterable<? extends Threadable>)messages);
048 }
049
050 /**
051 * The client passes in a list of Iterable objects, and
052 * the Threader constructs a connected 'graph' of messages
053 * @param messages iterable of messages to thread
054 * @return null if messages == null or root.child == null
055 * @since 3.0
056 */
057 public Threadable thread(Iterable<? extends Threadable> messages) {
058 if (messages == null)
059 return null;
060
061 idTable = new HashMap<String,ThreadContainer>();
062
063 // walk through each Threadable element
064 for (Threadable t : messages) {
065 if (!t.isDummy())
066 buildContainer(t);
067 }
068
069 root = findRootSet();
070 idTable.clear();
071 idTable = null;
072
073 pruneEmptyContainers(root);
074
075 root.reverseChildren();
076 gatherSubjects();
077
078 if (root.next != null)
079 throw new RuntimeException("root node has a next:" + root);
080
081 for (ThreadContainer r = root.child; r != null; r = r.next) {
082 if (r.threadable == null)
083 r.threadable = r.child.threadable.makeDummy();
084 }
085
086 Threadable result = (root.child == null ? null : root.child.threadable);
087 root.flush();
088 root = null;
089
090 return result;
091 }
092
093 /**
094 *
095 * @param threadable
096 */
097 private void buildContainer(Threadable threadable) {
098 String id = threadable.messageThreadId();
099 ThreadContainer container = idTable.get(id);
100
101 // A ThreadContainer exists for this id already. This should be a forward reference, but may
102 // be a duplicate id, in which case we will need to generate a bogus placeholder id
103 if (container != null) {
104 if (container.threadable != null) { // oops! duplicate ids...
105 id = "<Bogus-id:" + (bogusIdCount++) + ">";
106 container = null;
107 } else {
108 // The container just contained a forward reference to this message, so let's
109 // fill in the threadable field of the container with this message
110 container.threadable = threadable;
111 }
112 }
113
114 // No container exists for that message Id. Create one and insert it into the hash table.
115 if (container == null) {
116 container = new ThreadContainer();
117 container.threadable = threadable;
118 idTable.put(id, container);
119 }
120
121 // Iterate through all of the references and create ThreadContainers for any references that
122 // don't have them.
123 ThreadContainer parentRef = null;
124 {
125 String[] references = threadable.messageThreadReferences();
126 for (int i = 0; i < references.length; ++i) {
127 String refString = references[i];
128 ThreadContainer ref = idTable.get(refString);
129
130 // if this id doesnt have a container, create one
131 if (ref == null) {
132 ref = new ThreadContainer();
133 idTable.put(refString, ref);
134 }
135
136 // Link references together in the order they appear in the References: header,
137 // IF they dont have a have a parent already &&
138 // IF it will not cause a circular reference
139 if ((parentRef != null)
140 && (ref.parent == null)
141 && (parentRef != ref)
142 && !(ref.findChild(parentRef))) {
143 // Link ref into the parent's child list
144 ref.parent = parentRef;
145 ref.next = parentRef.child;
146 parentRef.child = ref;
147 }
148 parentRef = ref;
149 }
150 }
151
152 // parentRef is now set to the container of the last element in the references field. make that
153 // be the parent of this container, unless doing so causes a circular reference
154 if (parentRef != null
155 && (parentRef == container || container.findChild(parentRef)))
156 parentRef = null;
157
158 // if it has a parent already, its because we saw this message in a References: field, and presumed
159 // a parent based on the other entries in that field. Now that we have the actual message, we can
160 // throw away the old parent and use this new one
161 if (container.parent != null) {
162 ThreadContainer rest, prev;
163
164 for (prev = null, rest = container.parent.child;
165 rest != null;
166 prev = rest, rest = rest.next) {
167 if (rest == container)
168 break;
169 }
170
171 if (rest == null) {
172 throw new RuntimeException(
173 "Didnt find "
174 + container
175 + " in parent"
176 + container.parent);
177 }
178
179 // Unlink this container from the parent's child list
180 if (prev == null)
181 container.parent.child = container.next;
182 else
183 prev.next = container.next;
184
185 container.next = null;
186 container.parent = null;
187 }
188
189 // If we have a parent, link container into the parents child list
190 if (parentRef != null) {
191 container.parent = parentRef;
192 container.next = parentRef.child;
193 parentRef.child = container;
194 }
195 }
196
197 /**
198 * Find the root set of all existing ThreadContainers
199 * @return root the ThreadContainer representing the root node
200 */
201 private ThreadContainer findRootSet() {
202 ThreadContainer root = new ThreadContainer();
203 Iterator<String> iter = idTable.keySet().iterator();
204
205 while (iter.hasNext()) {
206 Object key = iter.next();
207 ThreadContainer c = idTable.get(key);
208 if (c.parent == null) {
209 if (c.next != null)
210 throw new RuntimeException(
211 "c.next is " + c.next.toString());
212 c.next = root.child;
213 root.child = c;
214 }
215 }
216 return root;
217 }
218
219 /**
220 * Delete any empty or dummy ThreadContainers
221 * @param parent
222 */
223 private void pruneEmptyContainers(ThreadContainer parent) {
224 ThreadContainer container, prev, next;
225 for (prev = null, container = parent.child, next = container.next;
226 container != null;
227 prev = container,
228 container = next,
229 next = (container == null ? null : container.next)) {
230
231 // Is it empty and without any children? If so,delete it
232 if (container.threadable == null && container.child == null) {
233 if (prev == null)
234 parent.child = container.next;
235 else
236 prev.next = container.next;
237
238 // Set container to prev so that prev keeps its same value the next time through the loop
239 container = prev;
240 }
241
242 // Else if empty, with kids, and (not at root or only one kid)
243 else if (
244 container.threadable == null
245 && container.child != null
246 && (container.parent != null
247 || container.child.next == null)) {
248 // We have an invalid/expired message with kids. Promote the kids to this level.
249 ThreadContainer tail;
250 ThreadContainer kids = container.child;
251
252 // Remove this container and replace with 'kids'.
253 if (prev == null)
254 parent.child = kids;
255 else
256 prev.next = kids;
257
258 // Make each child's parent be this level's parent -> i.e. promote the children. Make the last child's next point to this container's next
259 // i.e. splice kids into the list in place of container
260 for (tail = kids; tail.next != null; tail = tail.next)
261 tail.parent = container.parent;
262
263 tail.parent = container.parent;
264 tail.next = container.next;
265
266 // next currently points to the item after the inserted items in the chain - reset that so we process the newly
267 // promoted items next time round
268 next = kids;
269
270 // Set container to prev so that prev keeps its same value the next time through the loop
271 container = prev;
272 } else if (container.child != null) {
273 // A real message , with kids
274 // Iterate over the children
275 pruneEmptyContainers(container);
276 }
277 }
278 }
279
280 /**
281 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
282 */
283 private void gatherSubjects() {
284
285 int count = 0;
286
287 for (ThreadContainer c = root.child; c != null; c = c.next)
288 count++;
289
290 // TODO verify this will avoid rehashing
291 HashMap<String, ThreadContainer> subjectTable = new HashMap<String, ThreadContainer>((int) (count * 1.2), (float) 0.9);
292 count = 0;
293
294 for (ThreadContainer c = root.child; c != null; c = c.next) {
295 Threadable threadable = c.threadable;
296
297 // No threadable? If so, it is a dummy node in the root set.
298 // Only root set members may be dummies, and they alway have at least 2 kids
299 // Take the first kid as representative of the subject
300 if (threadable == null)
301 threadable = c.child.threadable;
302
303 String subj = threadable.simplifiedSubject();
304
305 if (subj == null || subj == "")
306 continue;
307
308 ThreadContainer old = subjectTable.get(subj);
309
310 // Add this container to the table iff:
311 // - There exists no container with this subject
312 // - or this is a dummy container and the old one is not - the dummy one is
313 // more interesting as a root, so put it in the table instead
314 // - The container in the table has a "Re:" version of this subject, and
315 // this container has a non-"Re:" version of this subject. The non-"Re:" version
316 // is the more interesting of the two.
317 if (old == null
318 || (c.threadable == null && old.threadable != null)
319 || (old.threadable != null
320 && old.threadable.subjectIsReply()
321 && c.threadable != null
322 && !c.threadable.subjectIsReply())) {
323 subjectTable.put(subj, c);
324 count++;
325 }
326 }
327
328 // If the table is empty, we're done
329 if (count == 0)
330 return;
331
332 // subjectTable is now populated with one entry for each subject which occurs in the
333 // root set. Iterate over the root set, and gather together the difference.
334 ThreadContainer prev, c, rest;
335 for (prev = null, c = root.child, rest = c.next;
336 c != null;
337 prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
338 Threadable threadable = c.threadable;
339
340 // is it a dummy node?
341 if (threadable == null)
342 threadable = c.child.threadable;
343
344 String subj = threadable.simplifiedSubject();
345
346 // Dont thread together all subjectless messages
347 if (subj == null || subj == "")
348 continue;
349
350 ThreadContainer old = subjectTable.get(subj);
351
352 if (old == c) // That's us
353 continue;
354
355 // We have now found another container in the root set with the same subject
356 // Remove the "second" message from the root set
357 if (prev == null)
358 root.child = c.next;
359 else
360 prev.next = c.next;
361 c.next = null;
362
363 if (old.threadable == null && c.threadable == null) {
364 // both dummies - merge them
365 ThreadContainer tail;
366 for (tail = old.child;
367 tail != null && tail.next != null;
368 tail = tail.next){}
369
370 if (tail != null) { // protect against possible NPE
371 tail.next = c.child;
372 }
373
374 for (tail = c.child; tail != null; tail = tail.next)
375 tail.parent = old;
376
377 c.child = null;
378 } else if (
379 old.threadable == null
380 || (c.threadable != null
381 && c.threadable.subjectIsReply()
382 && !old.threadable.subjectIsReply())) {
383 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
384 c.parent = old;
385 c.next = old.child;
386 old.child = c;
387 } else {
388 // else make the old and new messages be children of a new dummy container.
389 // We create a new container object for old.msg and empty the old container
390 ThreadContainer newc = new ThreadContainer();
391 newc.threadable = old.threadable;
392 newc.child = old.child;
393
394 for (ThreadContainer tail = newc.child;
395 tail != null;
396 tail = tail.next)
397 tail.parent = newc;
398
399 old.threadable = null;
400 old.child = null;
401
402 c.parent = old;
403 newc.parent = old;
404
405 // Old is now a dummy- give it 2 kids , c and newc
406 old.child = c;
407 c.next = newc;
408 }
409 // We've done a merge, so keep the same prev
410 c = prev;
411 }
412
413 subjectTable.clear();
414 subjectTable = null;
415
416 }
417
418
419 // DEPRECATED METHODS - for API compatibility only - DO NOT USE
420
421 /**
422 * The client passes in an array of Threadable objects, and
423 * the Threader constructs a connected 'graph' of messages
424 * @param messages array of messages to thread
425 * @return null if messages == null or root.child == null
426 * @deprecated (2.2) prefer {@link #thread(List)}
427 */
428 @Deprecated
429 public Threadable thread(Threadable[] messages) {
430 return thread(java.util.Arrays.asList(messages));
431 }
432
433 }