1 package org.apache.jcs.auxiliary.remote;
2
3 /*
4 * Licensed to the Apache Software Foundation (ASF) under one
5 * or more contributor license agreements. See the NOTICE file
6 * distributed with this work for additional information
7 * regarding copyright ownership. The ASF licenses this file
8 * to you under the Apache License, Version 2.0 (the
9 * "License"); you may not use this file except in compliance
10 * with the License. You may obtain a copy of the License at
11 *
12 * http://www.apache.org/licenses/LICENSE-2.0
13 *
14 * Unless required by applicable law or agreed to in writing,
15 * software distributed under the License is distributed on an
16 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
17 * KIND, either express or implied. See the License for the
18 * specific language governing permissions and limitations
19 * under the License.
20 */
21
22 import java.util.Iterator;
23
24 import org.apache.jcs.engine.CacheConstants;
25
26 import org.apache.commons.logging.Log;
27 import org.apache.commons.logging.LogFactory;
28
29 /***
30 * Used to monitor and repair any failed connection for the remote cache
31 * service. By default the monitor operates in a failure driven mode. That is,
32 * it goes into a wait state until there is an error. TODO consider moving this
33 * into an active monitoring mode. Upon the notification of a connection error,
34 * the monitor changes to operate in a time driven mode. That is, it attempts to
35 * recover the connections on a periodic basis. When all failed connections are
36 * restored, it changes back to the failure driven mode.
37 *
38 */
39 public class RemoteCacheMonitor
40 implements Runnable
41 {
42 /*** The logger */
43 private final static Log log = LogFactory.getLog( RemoteCacheMonitor.class );
44
45 /*** The remote cache that we are monitoring */
46 private static RemoteCacheMonitor instance;
47
48 /*** Time between checks */
49 private static long idlePeriod = 30 * 1000;
50
51 // minimum 30 seconds.
52 //private static long idlePeriod = 3*1000; // for debugging.
53
54 /*** Must make sure RemoteCacheMonitor is started before any remote error can
55 * be detected! */
56 private boolean alright = true;
57
58 /*** Time driven mode */
59 final static int TIME = 0;
60
61 /*** Error driven mode -- only check on health if there is an error */
62 final static int ERROR = 1;
63
64 /*** The mode to use */
65 static int mode = ERROR;
66
67 /***
68 * Configures the idle period between repairs.
69 *
70 * @param idlePeriod
71 * The new idlePeriod value
72 */
73 public static void setIdlePeriod( long idlePeriod )
74 {
75 if ( idlePeriod > RemoteCacheMonitor.idlePeriod )
76 {
77 RemoteCacheMonitor.idlePeriod = idlePeriod;
78 }
79 }
80
81 /*** Constructor for the RemoteCacheMonitor object */
82 private RemoteCacheMonitor()
83 {
84 super();
85 }
86
87 /***
88 * Returns the singleton instance;
89 *
90 * @return The instance value
91 */
92 static RemoteCacheMonitor getInstance()
93 {
94 synchronized ( RemoteCacheMonitor.class )
95 {
96 if ( instance == null )
97 {
98 return instance = new RemoteCacheMonitor();
99 }
100 }
101 return instance;
102 }
103
104 /***
105 * Notifies the cache monitor that an error occurred, and kicks off the
106 * error recovery process.
107 */
108 public void notifyError()
109 {
110 log.debug( "Notified of an error." );
111 bad();
112 synchronized ( this )
113 {
114 notify();
115 }
116 }
117
118 // Run forever.
119
120 // Avoid the use of any synchronization in the process of monitoring for
121 // performance reason.
122 // If exception is thrown owing to synchronization,
123 // just skip the monitoring until the next round.
124 /*** Main processing method for the RemoteCacheMonitor object */
125 public void run()
126 {
127 log.debug( "Monitoring daemon started" );
128 do
129 {
130 if ( mode == ERROR )
131 {
132 synchronized ( this )
133 {
134 if ( alright )
135 {
136 // make this configurable, comment out wait to enter
137 // time driven mode
138 // Failure driven mode.
139 try
140 {
141 if ( log.isDebugEnabled() )
142 {
143 log.debug( "FAILURE DRIVEN MODE: cache monitor waiting for error" );
144 }
145 wait();
146 // wake up only if there is an error.
147 }
148 catch ( InterruptedException ignore )
149 {
150 // swallow
151 }
152 }
153 }
154 }
155 else
156 {
157 if ( log.isDebugEnabled() )
158 {
159 log.debug( "TIME DRIVEN MODE: cache monitor sleeping for " + idlePeriod );
160 }
161 // Time driven mode: sleep between each round of recovery
162 // attempt.
163 // will need to test not just check status
164 }
165
166 try
167 {
168 Thread.sleep( idlePeriod );
169 }
170 catch ( InterruptedException ex )
171 {
172 // ignore;
173 }
174
175 // The "alright" flag must be false here.
176 // Simply presume we can fix all the errors until proven otherwise.
177 synchronized ( this )
178 {
179 alright = true;
180 }
181 //p("cache monitor running.");
182 // Monitor each RemoteCacheManager instance one after the other.
183 // Each RemoteCacheManager corresponds to one remote connection.
184 for ( Iterator itr = RemoteCacheManager.instances.values().iterator(); itr.hasNext(); )
185 {
186 RemoteCacheManager mgr = (RemoteCacheManager) itr.next();
187 try
188 {
189 // If any cache is in error, it strongly suggests all caches
190 // managed by the
191 // same RmicCacheManager instance are in error. So we fix
192 // them once and for all.
193 for ( Iterator itr2 = mgr.caches.values().iterator(); itr2.hasNext(); )
194 {
195 if ( itr2.hasNext() )
196 {
197 RemoteCacheNoWait c = (RemoteCacheNoWait) itr2.next();
198 if ( c.getStatus() == CacheConstants.STATUS_ERROR )
199 {
200 RemoteCacheRestore repairer = new RemoteCacheRestore( mgr );
201 // If we can't fix them, just skip and re-try in
202 // the next round.
203 if ( repairer.canFix() )
204 {
205 repairer.fix();
206 }
207 else
208 {
209 bad();
210 }
211 break;
212 }
213 }
214 }
215 }
216 catch ( Exception ex )
217 {
218 bad();
219 // Problem encountered in fixing the caches managed by a
220 // RemoteCacheManager instance.
221 // Soldier on to the next RemoteCacheManager instance.
222 log.error( ex );
223 }
224 }
225 }
226 while ( true );
227 }
228
229 /*** Sets the "alright" flag to false in a critial section. */
230 private synchronized void bad()
231 {
232 alright = false;
233 }
234 }