| %line | %branch | |||||||||
|---|---|---|---|---|---|---|---|---|---|---|
| org.apache.jcs.auxiliary.remote.RemoteCacheFailoverRunner |
|
|
| 1 | package org.apache.jcs.auxiliary.remote; |
|
| 2 | ||
| 3 | /* |
|
| 4 | * Licensed to the Apache Software Foundation (ASF) under one |
|
| 5 | * or more contributor license agreements. See the NOTICE file |
|
| 6 | * distributed with this work for additional information |
|
| 7 | * regarding copyright ownership. The ASF licenses this file |
|
| 8 | * to you under the Apache License, Version 2.0 (the |
|
| 9 | * "License"); you may not use this file except in compliance |
|
| 10 | * with the License. You may obtain a copy of the License at |
|
| 11 | * |
|
| 12 | * http://www.apache.org/licenses/LICENSE-2.0 |
|
| 13 | * |
|
| 14 | * Unless required by applicable law or agreed to in writing, |
|
| 15 | * software distributed under the License is distributed on an |
|
| 16 | * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
|
| 17 | * KIND, either express or implied. See the License for the |
|
| 18 | * specific language governing permissions and limitations |
|
| 19 | * under the License. |
|
| 20 | */ |
|
| 21 | ||
| 22 | import org.apache.commons.logging.Log; |
|
| 23 | import org.apache.commons.logging.LogFactory; |
|
| 24 | import org.apache.jcs.engine.CacheConstants; |
|
| 25 | import org.apache.jcs.engine.behavior.ICache; |
|
| 26 | import org.apache.jcs.engine.behavior.ICompositeCacheManager; |
|
| 27 | ||
| 28 | /** |
|
| 29 | * The RemoteCacheFailoverRunner tries to establish a connection with a failover |
|
| 30 | * server, if any are defined. Once a failover connectin is made, it will |
|
| 31 | * attempt to replace the failover with the primary remote server. |
|
| 32 | * <p> |
|
| 33 | * It works by switching out the RemoteCacheNoWait inside the Facade. |
|
| 34 | * <p> |
|
| 35 | * Client (i.e.) the CompositeCache has refernce to a RemoteCacheNoWaitFacade. |
|
| 36 | * This facade is created by the RemoteCacheFactory. The factory maintains a set |
|
| 37 | * of managers, one for each remote server. Typically, there will only be one |
|
| 38 | * manager. |
|
| 39 | * <p> |
|
| 40 | * If you use multipleremote servesr, you may want to set one or more as |
|
| 41 | * failovers. If a local cache cannot connect to the primary server, or looses |
|
| 42 | * its connection to the primary server, it will attempt to restore that |
|
| 43 | * connectin in the background. If failovers are defined, the Failover runner |
|
| 44 | * will try to connect to a failover until the primary is restored. |
|
| 45 | * |
|
| 46 | */ |
|
| 47 | public class RemoteCacheFailoverRunner |
|
| 48 | implements Runnable |
|
| 49 | { |
|
| 50 | 0 | private final static Log log = LogFactory.getLog( RemoteCacheFailoverRunner.class ); |
| 51 | ||
| 52 | private RemoteCacheNoWaitFacade facade; |
|
| 53 | ||
| 54 | 0 | private static long idlePeriod = 20 * 1000; |
| 55 | ||
| 56 | 0 | private boolean alright = true; |
| 57 | ||
| 58 | private ICompositeCacheManager cacheMgr; |
|
| 59 | ||
| 60 | /** |
|
| 61 | * Constructor for the RemoteCacheFailoverRunner object. This allows the |
|
| 62 | * FailoverRunner to modify the facade that the CompositeCache references. |
|
| 63 | * |
|
| 64 | * @param facade |
|
| 65 | * the facade the CompositeCache talks to. |
|
| 66 | * @param cacheMgr |
|
| 67 | */ |
|
| 68 | public RemoteCacheFailoverRunner( RemoteCacheNoWaitFacade facade, ICompositeCacheManager cacheMgr ) |
|
| 69 | 0 | { |
| 70 | 0 | this.facade = facade; |
| 71 | 0 | this.cacheMgr = cacheMgr; |
| 72 | 0 | } |
| 73 | ||
| 74 | /** |
|
| 75 | * Notifies the cache monitor that an error occurred, and kicks off the |
|
| 76 | * error recovery process. |
|
| 77 | */ |
|
| 78 | public void notifyError() |
|
| 79 | { |
|
| 80 | 0 | bad(); |
| 81 | 0 | synchronized ( this ) |
| 82 | { |
|
| 83 | 0 | notify(); |
| 84 | 0 | } |
| 85 | 0 | } |
| 86 | ||
| 87 | /** |
|
| 88 | * Main processing method for the RemoteCacheFailoverRunner object. |
|
| 89 | * <p> |
|
| 90 | * If we do not have a connection with any failover server, this will try to |
|
| 91 | * connect one at a time. If no connection can be made, it goes to sleep for |
|
| 92 | * a while (20 seconds). |
|
| 93 | * <p> |
|
| 94 | * Once a connection with a failover is made, we will try to reconnect to |
|
| 95 | * the primary server. |
|
| 96 | * <p> |
|
| 97 | * The primary server is the first server defines in the FailoverServers |
|
| 98 | * list. |
|
| 99 | */ |
|
| 100 | public void run() |
|
| 101 | { |
|
| 102 | // start the main work of connecting to a failover and then restoring |
|
| 103 | // the primary. |
|
| 104 | 0 | connectAndRestore(); |
| 105 | ||
| 106 | 0 | if ( log.isInfoEnabled() ) |
| 107 | { |
|
| 108 | 0 | log.info( "Exiting failover runner. Failover index = " + facade.remoteCacheAttributes.getFailoverIndex() ); |
| 109 | 0 | if ( facade.remoteCacheAttributes.getFailoverIndex() <= 0 ) |
| 110 | { |
|
| 111 | 0 | log.info( "Failover index is <= 0, meaning we are not " + "connected to a failover server." ); |
| 112 | 0 | } |
| 113 | 0 | else if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 ) |
| 114 | { |
|
| 115 | 0 | log.info( "Failover index is > 0, meaning we are " + "connected to a failover server." ); |
| 116 | } |
|
| 117 | // log if we are alright or not. |
|
| 118 | } |
|
| 119 | 0 | return; |
| 120 | } |
|
| 121 | ||
| 122 | /** |
|
| 123 | * This is the main loop. If there are failovers defined, then this will |
|
| 124 | * continue until the primary is re-connected. If no failovers are defined, |
|
| 125 | * this will exit automatically. |
|
| 126 | */ |
|
| 127 | private void connectAndRestore() |
|
| 128 | { |
|
| 129 | do |
|
| 130 | { |
|
| 131 | 0 | log.info( "Remote cache FAILOVER RUNNING." ); |
| 132 | ||
| 133 | // there is no active listener |
|
| 134 | 0 | if ( !alright ) |
| 135 | { |
|
| 136 | // Monitor each RemoteCacheManager instance one after the other. |
|
| 137 | // Each RemoteCacheManager corresponds to one remote connection. |
|
| 138 | 0 | String[] failovers = facade.remoteCacheAttributes.getFailovers(); |
| 139 | // we should probalby check to see if there are any failovers, |
|
| 140 | // even though the caller |
|
| 141 | // should have already. |
|
| 142 | ||
| 143 | 0 | if ( failovers == null ) |
| 144 | { |
|
| 145 | 0 | log.warn( "Remote is misconfigured, failovers was null." ); |
| 146 | 0 | return; |
| 147 | } |
|
| 148 | 0 | else if ( failovers.length == 1 ) |
| 149 | { |
|
| 150 | // if there is only the primary, return out of this |
|
| 151 | 0 | if ( log.isInfoEnabled() ) |
| 152 | { |
|
| 153 | 0 | log.info( "No failovers defined, exiting failover runner." ); |
| 154 | 0 | return; |
| 155 | } |
|
| 156 | } |
|
| 157 | ||
| 158 | 0 | int fidx = facade.remoteCacheAttributes.getFailoverIndex(); |
| 159 | 0 | log.debug( "fidx = " + fidx + " failovers.length = " + failovers.length ); |
| 160 | ||
| 161 | // shouldn't we see if the primary is backup? |
|
| 162 | // If we don't check the primary, if it gets connected in the |
|
| 163 | // backgorund, |
|
| 164 | // we will disconnect it only to put it right back |
|
| 165 | 0 | int i = fidx; // + 1; // +1 skips the primary |
| 166 | 0 | if ( log.isDebugEnabled() ) |
| 167 | { |
|
| 168 | 0 | log.debug( "stating at failover i = " + i ); |
| 169 | } |
|
| 170 | ||
| 171 | // try them one at a time until successful |
|
| 172 | 0 | for ( ; i < failovers.length && !alright; i++ ) |
| 173 | { |
|
| 174 | 0 | String server = failovers[i]; |
| 175 | 0 | if ( log.isDebugEnabled() ) |
| 176 | { |
|
| 177 | 0 | log.debug( "Trying server [" + server + "] at failover index i = " + i ); |
| 178 | } |
|
| 179 | ||
| 180 | 0 | RemoteCacheAttributes rca = null; |
| 181 | try |
|
| 182 | { |
|
| 183 | 0 | rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy(); |
| 184 | 0 | rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) ); |
| 185 | 0 | rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) ); |
| 186 | 0 | RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr ); |
| 187 | ||
| 188 | 0 | if ( log.isDebugEnabled() ) |
| 189 | { |
|
| 190 | 0 | log.debug( "RemoteCacheAttributes for failover = " + rca.toString() ); |
| 191 | } |
|
| 192 | ||
| 193 | // add a listener if there are none, need to tell rca |
|
| 194 | // what number it is at |
|
| 195 | 0 | ICache ic = rcm.getCache( rca.getCacheName() ); |
| 196 | 0 | if ( ic != null ) |
| 197 | { |
|
| 198 | 0 | if ( ic.getStatus() == CacheConstants.STATUS_ALIVE ) |
| 199 | { |
|
| 200 | // may need to do this more gracefully |
|
| 201 | 0 | log.debug( "reseting no wait" ); |
| 202 | 0 | facade.noWaits = new RemoteCacheNoWait[1]; |
| 203 | 0 | facade.noWaits[0] = (RemoteCacheNoWait) ic; |
| 204 | 0 | facade.remoteCacheAttributes.setFailoverIndex( i ); |
| 205 | ||
| 206 | 0 | synchronized ( this ) |
| 207 | { |
|
| 208 | 0 | if ( log.isDebugEnabled() ) |
| 209 | { |
|
| 210 | 0 | log.debug( "setting ALRIGHT to true" ); |
| 211 | 0 | if ( i > 0 ) |
| 212 | { |
|
| 213 | 0 | log.debug( "Moving to Primary Recovery Mode, failover index = " + i ); |
| 214 | 0 | } |
| 215 | else |
|
| 216 | { |
|
| 217 | 0 | if ( log.isInfoEnabled() ) |
| 218 | { |
|
| 219 | 0 | String message = "No need to connect to failover, the primary server is back up."; |
| 220 | 0 | log.info( message ); |
| 221 | } |
|
| 222 | } |
|
| 223 | } |
|
| 224 | ||
| 225 | 0 | alright = true; |
| 226 | ||
| 227 | 0 | if ( log.isInfoEnabled() ) |
| 228 | { |
|
| 229 | 0 | log.info( "CONNECTED to host = [" + rca.getRemoteHost() + "] port = [" |
| 230 | + rca.getRemotePort() + "]" ); |
|
| 231 | } |
|
| 232 | 0 | } |
| 233 | 0 | } |
| 234 | } |
|
| 235 | else |
|
| 236 | { |
|
| 237 | 0 | log.info( "noWait is null" ); |
| 238 | } |
|
| 239 | } |
|
| 240 | 0 | catch ( Exception ex ) |
| 241 | { |
|
| 242 | 0 | bad(); |
| 243 | // Problem encountered in fixing the caches managed by a |
|
| 244 | // RemoteCacheManager instance. |
|
| 245 | // Soldier on to the next RemoteCacheManager instance. |
|
| 246 | 0 | if ( i == 0 ) |
| 247 | { |
|
| 248 | 0 | log.warn( "FAILED to connect, as expected, to primary" + rca.getRemoteHost() + ":" |
| 249 | + rca.getRemotePort(), ex ); |
|
| 250 | 0 | } |
| 251 | else |
|
| 252 | { |
|
| 253 | 0 | log.error( "FAILED to connect to failover [" + rca.getRemoteHost() + ":" |
| 254 | + rca.getRemotePort() + "]", ex ); |
|
| 255 | } |
|
| 256 | 0 | } |
| 257 | } |
|
| 258 | 0 | } |
| 259 | // end if !alright |
|
| 260 | // get here if while index >0 and alright, meaning that we are |
|
| 261 | // connected to some backup server. |
|
| 262 | else |
|
| 263 | { |
|
| 264 | 0 | if ( log.isDebugEnabled() ) |
| 265 | { |
|
| 266 | 0 | log.debug( "ALRIGHT is true " ); |
| 267 | } |
|
| 268 | 0 | if ( log.isInfoEnabled() ) |
| 269 | { |
|
| 270 | 0 | log.info( "Failover runner is in primary recovery mode. Failover index = " |
| 271 | + facade.remoteCacheAttributes.getFailoverIndex() + "\n" + "Will now try to reconnect to primary server." ); |
|
| 272 | } |
|
| 273 | } |
|
| 274 | ||
| 275 | 0 | boolean primaryRestoredSuccessfully = false; |
| 276 | // if we are not connected to the primary, try. |
|
| 277 | 0 | if ( facade.remoteCacheAttributes.getFailoverIndex() > 0 ) |
| 278 | { |
|
| 279 | 0 | primaryRestoredSuccessfully = restorePrimary(); |
| 280 | 0 | if ( log.isDebugEnabled() ) |
| 281 | { |
|
| 282 | 0 | log.debug( "Primary recovery success state = " + primaryRestoredSuccessfully ); |
| 283 | } |
|
| 284 | } |
|
| 285 | ||
| 286 | 0 | if ( !primaryRestoredSuccessfully ) |
| 287 | { |
|
| 288 | // Time driven mode: sleep between each round of recovery |
|
| 289 | // attempt. |
|
| 290 | try |
|
| 291 | { |
|
| 292 | 0 | log.warn( "Failed to reconnect to primary server. Cache failover runner is going to sleep for " |
| 293 | + idlePeriod + " milliseconds." ); |
|
| 294 | 0 | Thread.sleep( idlePeriod ); |
| 295 | } |
|
| 296 | 0 | catch ( InterruptedException ex ) |
| 297 | { |
|
| 298 | // ignore; |
|
| 299 | 0 | } |
| 300 | } |
|
| 301 | ||
| 302 | // try to bring the listener back to the primary |
|
| 303 | } |
|
| 304 | 0 | while ( facade.remoteCacheAttributes.getFailoverIndex() > 0 || !alright ); |
| 305 | // continue if the primary is not restored or if things are not alright. |
|
| 306 | ||
| 307 | 0 | } |
| 308 | ||
| 309 | /** |
|
| 310 | * Try to restore the primary server. |
|
| 311 | * <p> |
|
| 312 | * Once primary is restored the failover listener must be deregistered. |
|
| 313 | * <p> |
|
| 314 | * The primary server is the first server defines in the FailoverServers |
|
| 315 | * list. |
|
| 316 | * |
|
| 317 | * @return boolean value indicating whether the resoration was successful |
|
| 318 | */ |
|
| 319 | private boolean restorePrimary() |
|
| 320 | { |
|
| 321 | // try to move back to the primary |
|
| 322 | 0 | String[] failovers = facade.remoteCacheAttributes.getFailovers(); |
| 323 | 0 | String server = failovers[0]; |
| 324 | ||
| 325 | 0 | if ( log.isInfoEnabled() ) |
| 326 | { |
|
| 327 | 0 | log.info( "Trying to restore connection to primary remote server [" + server + "]" ); |
| 328 | } |
|
| 329 | ||
| 330 | try |
|
| 331 | { |
|
| 332 | 0 | RemoteCacheAttributes rca = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy(); |
| 333 | 0 | rca.setRemoteHost( server.substring( 0, server.indexOf( ":" ) ) ); |
| 334 | 0 | rca.setRemotePort( Integer.parseInt( server.substring( server.indexOf( ":" ) + 1 ) ) ); |
| 335 | 0 | RemoteCacheManager rcm = RemoteCacheManager.getInstance( rca, cacheMgr ); |
| 336 | ||
| 337 | // add a listener if there are none, need to tell rca what number it |
|
| 338 | // is at |
|
| 339 | 0 | ICache ic = rcm.getCache( rca.getCacheName() ); |
| 340 | // by default the listener id should be 0, else it will be the |
|
| 341 | // listener |
|
| 342 | // orignally associated with the remote cache. either way is fine. |
|
| 343 | // We just don't want the listener id from a failover being used. |
|
| 344 | // If the remote server was rebooted this couldbe a problem if new |
|
| 345 | // locals were also added. |
|
| 346 | ||
| 347 | 0 | if ( ic != null ) |
| 348 | { |
|
| 349 | 0 | if ( ic.getStatus() == CacheConstants.STATUS_ALIVE ) |
| 350 | { |
|
| 351 | try |
|
| 352 | { |
|
| 353 | // we could have more than one listener registered right |
|
| 354 | // now. |
|
| 355 | // this will not result in a loop, only duplication |
|
| 356 | // stop duplicate listening. |
|
| 357 | 0 | if ( facade.noWaits[0] != null && facade.noWaits[0].getStatus() == CacheConstants.STATUS_ALIVE ) |
| 358 | { |
|
| 359 | 0 | int fidx = facade.remoteCacheAttributes.getFailoverIndex(); |
| 360 | ||
| 361 | 0 | if ( fidx > 0 ) |
| 362 | { |
|
| 363 | 0 | String serverOld = failovers[fidx]; |
| 364 | ||
| 365 | 0 | if ( log.isDebugEnabled() ) |
| 366 | { |
|
| 367 | 0 | log.debug( "Failover Index = " + fidx + " the server at that index is [" |
| 368 | + serverOld + "]" ); |
|
| 369 | } |
|
| 370 | ||
| 371 | 0 | if ( serverOld != null ) |
| 372 | { |
|
| 373 | // create attributes that reflect the |
|
| 374 | // previous failed over configuration. |
|
| 375 | 0 | RemoteCacheAttributes rcaOld = (RemoteCacheAttributes) facade.remoteCacheAttributes.copy(); |
| 376 | 0 | rcaOld.setRemoteHost( serverOld.substring( 0, serverOld.indexOf( ":" ) ) ); |
| 377 | 0 | rcaOld.setRemotePort( Integer.parseInt( serverOld.substring( serverOld |
| 378 | .indexOf( ":" ) + 1 ) ) ); |
|
| 379 | 0 | RemoteCacheManager rcmOld = RemoteCacheManager.getInstance( rcaOld, cacheMgr ); |
| 380 | ||
| 381 | 0 | if ( rcmOld != null ) |
| 382 | { |
|
| 383 | // manager can remove by name if |
|
| 384 | // necessary |
|
| 385 | 0 | rcmOld.removeRemoteCacheListener( rcaOld ); |
| 386 | } |
|
| 387 | 0 | if ( log.isInfoEnabled() ) |
| 388 | { |
|
| 389 | 0 | log.info( "Successfully deregistered from FAILOVER remote server = " |
| 390 | + serverOld ); |
|
| 391 | } |
|
| 392 | } |
|
| 393 | 0 | } |
| 394 | 0 | else if ( fidx == 0 ) |
| 395 | { |
|
| 396 | // this should never happen. If there are no |
|
| 397 | // failovers this shouldn't get called. |
|
| 398 | 0 | if ( log.isDebugEnabled() ) |
| 399 | { |
|
| 400 | 0 | log.debug( "No need to restore primary, it is already restored." ); |
| 401 | 0 | return true; |
| 402 | } |
|
| 403 | } |
|
| 404 | 0 | else if ( fidx < 0 ) |
| 405 | { |
|
| 406 | // this should never happen |
|
| 407 | 0 | log.warn( "Failover index is less than 0, this shouldn't happen" ); |
| 408 | } |
|
| 409 | } |
|
| 410 | } |
|
| 411 | 0 | catch ( Exception e ) |
| 412 | { |
|
| 413 | // TODO, should try again, or somehow stop the listener |
|
| 414 | 0 | log.error( |
| 415 | "Trouble trying to deregister old failover listener prior to restoring the primary = " |
|
| 416 | + server, e ); |
|
| 417 | 0 | } |
| 418 | ||
| 419 | // Restore primary |
|
| 420 | // may need to do this more gracefully, letting the failover finish in the background |
|
| 421 | 0 | RemoteCacheNoWait failoverNoWait = facade.noWaits[0]; |
| 422 | ||
| 423 | // swap in a new one |
|
| 424 | 0 | facade.noWaits = new RemoteCacheNoWait[1]; |
| 425 | 0 | facade.noWaits[0] = (RemoteCacheNoWait) ic; |
| 426 | 0 | facade.remoteCacheAttributes.setFailoverIndex( 0 ); |
| 427 | ||
| 428 | 0 | if ( log.isInfoEnabled() ) |
| 429 | { |
|
| 430 | 0 | log.info( "Successfully reconnected to PRIMARY remote server. Substituted primary for failoverNoWait [" + failoverNoWait + "]" ); |
| 431 | } |
|
| 432 | 0 | return true; |
| 433 | } |
|
| 434 | ||
| 435 | // else alright |
|
| 436 | // if the failover index was at 0 here, we would be in a bad |
|
| 437 | // situation, unless there were jsut |
|
| 438 | // no failovers configured. |
|
| 439 | 0 | if ( log.isDebugEnabled() ) |
| 440 | { |
|
| 441 | 0 | log.debug( "Primary server status in error, not connected." ); |
| 442 | 0 | } |
| 443 | } |
|
| 444 | else |
|
| 445 | { |
|
| 446 | 0 | if ( log.isDebugEnabled() ) |
| 447 | { |
|
| 448 | 0 | log.debug( "Primary server is null, not connected." ); |
| 449 | } |
|
| 450 | } |
|
| 451 | } |
|
| 452 | 0 | catch ( Exception ex ) |
| 453 | { |
|
| 454 | 0 | log.error( ex ); |
| 455 | 0 | } |
| 456 | 0 | return false; |
| 457 | } |
|
| 458 | ||
| 459 | /** |
|
| 460 | * Sets the "alright" flag to false in a critial section. This flag |
|
| 461 | * indicates whether or not we are connected to any server at all. If we are |
|
| 462 | * connected to a secondary server, then alright will be true, but we will |
|
| 463 | * continue to try to restore the connetion with the primary server. |
|
| 464 | * <p> |
|
| 465 | * The primary server is the first server defines in the FailoverServers |
|
| 466 | * list. |
|
| 467 | */ |
|
| 468 | private void bad() |
|
| 469 | { |
|
| 470 | 0 | if ( alright ) |
| 471 | { |
|
| 472 | 0 | synchronized ( this ) |
| 473 | { |
|
| 474 | 0 | alright = false; |
| 475 | 0 | } |
| 476 | } |
|
| 477 | 0 | } |
| 478 | } |
| This report is generated by jcoverage, Maven and Maven JCoverage Plugin. |