1 package org.apache.jcs.auxiliary.lateral;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.util.Iterator;
23
24 import org.apache.commons.logging.Log;
25 import org.apache.commons.logging.LogFactory;
26 import org.apache.jcs.auxiliary.lateral.behavior.ILateralCacheManager;
27 import org.apache.jcs.engine.CacheConstants;
28
29 /***
30 * Used to monitor and repair any failed connection for the lateral cache
31 * service. By default the monitor operates in a failure driven mode. That is,
32 * it goes into a wait state until there is an error. Upon the notification of a
33 * connection error, the monitor changes to operate in a time driven mode. That
34 * is, it attempts to recover the connections on a periodic basis. When all
35 * failed connections are restored, it changes back to the failure driven mode.
36 *
37 */
38 public class LateralCacheMonitor
39 implements Runnable
40 {
41 private final static Log log = LogFactory.getLog( LateralCacheMonitor.class );
42
43 private static long idlePeriod = 20 * 1000;
44
45 // minimum 20 seconds.
46 //private static long idlePeriod = 3*1000; // for debugging.
47
48 // Must make sure LateralCacheMonitor is started before any lateral error
49 // can be detected!
50 private boolean alright = true;
51
52 private final static int ERROR = 1;
53
54 private static int mode = ERROR;
55
56 private ILateralCacheManager manager;
57
58 /***
59 * Configures the idle period between repairs.
60 *
61 * @param idlePeriod
62 * The new idlePeriod value
63 */
64 public static void setIdlePeriod( long idlePeriod )
65 {
66 if ( idlePeriod > LateralCacheMonitor.idlePeriod )
67 {
68 LateralCacheMonitor.idlePeriod = idlePeriod;
69 }
70 }
71
72 /***
73 * Allows close classes, ie testers to set the idle period to something
74 * testable.
75 *
76 * @param idlePeriod
77 */
78 protected static void forceShortIdlePeriod( long idlePeriod )
79 {
80 LateralCacheMonitor.idlePeriod = idlePeriod;
81 }
82
83 /*** Constructor for the LateralCacheMonitor object
84 * <p>
85 * It's the clients responsibility to decide how many
86 * of these there will be.
87 *
88 * @param manager
89 */
90 public LateralCacheMonitor( ILateralCacheManager manager )
91 {
92 this.manager = manager;
93 }
94
95 /***
96 * Notifies the cache monitor that an error occurred, and kicks off the
97 * error recovery process.
98 */
99 public void notifyError()
100 {
101 bad();
102 synchronized ( this )
103 {
104 notify();
105 }
106 }
107
108 /***
109 * Main processing method for the LateralCacheMonitor object
110 */
111 public void run()
112 {
113 do
114 {
115 if ( mode == ERROR )
116 {
117 if ( log.isDebugEnabled() )
118 {
119 if ( alright )
120 {
121 log.debug( "ERROR DRIVEN MODE: alright = " + alright
122 + ", connection monitor will wait for an error." );
123 }
124 else
125 {
126 log.debug( "ERROR DRIVEN MODE: alright = " + alright + " connection monitor running." );
127 }
128 }
129
130 if ( alright )
131 {
132 synchronized ( this )
133 {
134 if ( alright )
135 {
136 // Failure driven mode.
137 try
138 {
139 wait();
140 // wake up only if there is an error.
141 }
142 catch ( InterruptedException ignore )
143 {
144 //no op, this is expected
145 }
146 }
147 }
148 }
149 }
150 else
151 {
152 log.debug( "TIME DRIVEN MODE: connection monitor will sleep for " + idlePeriod + " after this run." );
153 // Time driven mode: sleep between each round of recovery
154 // attempt.
155 // will need to test not just check status
156 }
157
158 // The "alright" flag must be false here.
159 // Simply presume we can fix all the errors until proven otherwise.
160 synchronized ( this )
161 {
162 alright = true;
163 }
164
165 if ( log.isDebugEnabled() )
166 {
167 log.debug( "Cache monitor running." );
168 }
169
170 // Monitor each LateralCacheManager instance one after the other.
171 // Each LateralCacheManager corresponds to one lateral connection.
172 log.info( "LateralCacheManager.instances.size() = " + manager.getInstances().size() );
173 //for
174 int cnt = 0;
175 Iterator itr = manager.getInstances().values().iterator();
176 while ( itr.hasNext() )
177 {
178 cnt++;
179 ILateralCacheManager mgr = (ILateralCacheManager) itr.next();
180 try
181 {
182 // If any cache is in error, it strongly suggests all caches
183 // managed by the
184 // same LateralCacheManager instance are in error. So we fix
185 // them once and for all.
186 //for
187 //log.info( "\n " + cnt + "- mgr.lca.getTcpServer() = " + mgr.lca.getTcpServer() + " mgr = " + mgr );
188 log.info( "\n " + cnt + "- mgr.getCaches().size() = " + mgr.getCaches().size() );
189
190 if ( mgr.getCaches().size() == 0 )
191 {
192 // there is probably a problem.
193 // monitor may be running when we just started up and
194 // there
195 // is not a cache yet.
196 // if this is error driven mode, mark as bad,
197 // otherwise we will come back around argain.
198 if ( mode == ERROR )
199 {
200 bad();
201 }
202 }
203
204 Iterator itr2 = mgr.getCaches().values().iterator();
205
206 while ( itr2.hasNext() )
207 {
208 LateralCacheNoWait c = (LateralCacheNoWait) itr2.next();
209 if ( c.getStatus() == CacheConstants.STATUS_ERROR )
210 {
211 log.info( "found LateralCacheNoWait in error, " + c.toString() );
212
213 LateralCacheRestore repairer = new LateralCacheRestore( mgr );
214 // If we can't fix them, just skip and re-try in the
215 // next round.
216 if ( repairer.canFix() )
217 {
218 repairer.fix();
219 }
220 else
221 {
222 bad();
223 }
224 //break;
225 }
226 else
227 {
228 log.info( "Lateral Cache No Wait not in error" );
229 }
230 }
231 }
232 catch ( Exception ex )
233 {
234 bad();
235 // Problem encountered in fixing the caches managed by a
236 // LateralCacheManager instance.
237 // Soldier on to the next LateralCacheManager instance.
238 log.error( "Problem encountered in fixing the caches", ex );
239 }
240 }
241
242 try
243 {
244 // don't want to sleep after waking from an error
245 // run immediately and sleep here.
246 if ( log.isDebugEnabled() )
247 {
248 log.debug( "Lateral cache monitor sleeping for " + idlePeriod + " between runs." );
249 }
250
251 Thread.sleep( idlePeriod );
252 }
253 catch ( InterruptedException ex )
254 {
255 // ignore;
256 }
257 }
258 while ( true );
259 }
260
261 /***
262 * Sets the "alright" flag to false in a critial section.
263 */
264 private void bad()
265 {
266 if ( alright )
267 {
268 synchronized ( this )
269 {
270 alright = false;
271 }
272 }
273 }
274 }