<?php
//--
/*
  Conncerning PHP code:
  - This code is part of the "A1 Website Search Engine" program and therefore the same copyright and license.
  - You ARE allowed to use, edit, upload and distribute this ONLY WHEN used as part of an "A1 Website Search Engine" created search solution.
  - Please do not remove this comment.
*/
//--
/*
   We parse the Javascript "data" instead of generate+read DB/SQL or PHP array data structures:
   - Downside (medium): Speed loss reading data.
   - Downside (medium): Returns a little too much data.
   - Upside (small): Easy to return JS code back to client-side javascript.
   - Upside (small): Not implementing same algorithms in each server-side language.
*/
//--
/*
  http://cgp.localhost/msa1-search-code.php?search_p=game&search_f=jsraw
  http://microsys.localhost/search/msa1-search-code.php?search_p=sitemap&search_f=jsraw
  http://microsys.localhost/search/msa1-search-code.php?search_p=site%20map&search_f=jsraw
  http://microsys.localhost/search/msa1-search-code.php?search_p=sitemap generator&search_f=jsraw
  http://microsys.localhost/search/msa1-search-code.php?search_ud=474,192,200,191,209,215,229,204,210,202,214,216,193,224,211&search_f=jsraw
*/
/*
  TODO:CONSIDER
      insert: ini_set(memory_limit,16M); ?

  TODO:CONSIDER:
    read file in chunks
    stackoverflow.com/questions/2749441/php-fastest-way-possible-to-read-contents-of-a-file
*/
//--
?>
<?php
  $MSA1S_GV__file_content_u = '';
  $MSA1S_GV__file_content_w = '';
  $MSA1S_GV__output_content = '';
  $MSA1S_GV__URIsListArr = array();
?>
<?php

    //--
    function MSA1S_PHP_SUB__StrItemsIntoArr(&$a__l_rest, $start_sep_str, $end_sep_str, &$into_array, $ensure_unique) {
/*
  generally speaking this seems to use less peak memory than e.g. "explode"

  we pass huge strings and array by reference
  - www.miqrogroove.com/writing/php-references-for-speed/

  we use "foreach" (optimized compiled code)
  - mikegerwitz.com/2010/03/28/php-performance-array-iteration/
  
  update 2014 sep new tests show:
  * slightly more memory usage?
  * much slower when data hits a certain size?  
*/
      $start_sep_len = strlen($start_sep_str);
      $end_sep_len = strlen($end_sep_str);
      $ps = 0;
      $pe = 0;
      while (true) {
        if ($ps < strlen($a__l_rest)) {
          //--
          // look for start separator (if any)
          //--
          if ($start_sep_len > 0) {
            $ps = strpos($a__l_rest, $start_sep_str, $ps);
            if ($ps !== false) {
              $ps += $start_sep_len;
            }
            else {
              break;
            }
          }
          //--
          // look for end separator (required)
          //--
          $pe = strpos($a__l_rest, $end_sep_str, $ps);
          if ($pe !== false) {
            $s = substr($a__l_rest, $ps, $pe - $ps);
          }
          else {
            $s = substr($a__l_rest, $ps);
          }
          if ($ensure_unique == false) {
            $into_array[] = $s;
          }
          else {
            $f = false;
            foreach ($into_array as $v) {
              if ($v == $s) {
                $f = true;
                break;
              }
            }
            if ($f == false) {
              $into_array[] = $s;
            }
          }
          if ($pe === false) {
            break;
          }
          $ps = $pe + $end_sep_len;
        }
        else {
          break;
        }
      }
    }

    //--
    function MSA1S_PHP_SUB__URIs($a__include_ext) {
      //--
      global $MSA1S_GV__output_content;
      global $MSA1S_GV__URIsListArr;
      //--
      $file_name = 'msa1-search-data-u.js.inc';
      $h = fopen($file_name, 'rb');
      $MSA1S_GV__file_content_u = fread($h, filesize($file_name));
      fclose($h);
      //--
      //           
      //--
      //$c_arr = array();      
      //MSA1S_PHP_SUB__StrItemsIntoArr($MSA1S_GV__file_content_u, '', "\n", $c_arr, false);
      //--     
      $c_arr = array();      
      $c_arr = explode("\n", $MSA1S_GV__file_content_u); // NOTE: adding "array_unique" seems not to increase time // NOTE: There is ZERO need for call "array_unique" here (!)
      //--
      //
      //--
/*
Results:

MSA1S_PHP_SUB__StrItemsIntoArr($MSA1S_GV__file_content_u, '', "\n", $c_arr, false);
      before big-load CC1:1827976 PEAK: 4381936 TIME: 0.26773300 1333206826
      after big-load CC2:2182184 PEAK: 4381936 TIME: 0.26879600 1333206826
      after big-load CC3:2653992 PEAK: 4381936 TIME: 0.84319200 1333206826

explode
      before big-load CC1:1827776 PEAK: 4381640 TIME: 0.53335400 1333206924
      after big-load CC2:2181984 PEAK: 4381640 TIME: 0.53436300 1333206924
      after big-load CC3:2653680 PEAK: 4381640 TIME: 0.53490600 1333206924
*/
      //--
      //
      //--
      $MSA1S_GV__file_content_u = null;
      unset($MSA1S_GV__file_content_u);      
      //echo '<br><br><br>after big-load CC4:' . memory_get_usage() . '  PEAK: ' . memory_get_peak_usage() . '<br><br><br>';      
      //--
      // *$MSA1S_GV__URIsListArr* stores indices, hence he name *$idx*
      //--
      foreach ($MSA1S_GV__URIsListArr as $idx) {
        $s = $c_arr[$idx];
        if ($s == '') {
          //echo '__DEBUG__' . 'C_ARR_EMPTY' . '__' . $idx . '__<br>';
        }
        //--
        if ($a__include_ext == false) {
          $p_start = strpos($s, ',P:');
          $s = substr($s, 0, $p_start);
          $s .= '};';
        }
        //--
        $MSA1S_GV__output_content .= $s . "\n";
      }
    }
?>
<?php
  function MSA1S_PHP__search_as_arr($a__search_str) {
    $r__search_arr = array();
    if ($a__search_str != '') {
      //echo '<br><br>' . 'arr str: ' . $a__search_str . '<br><br>';
      $a__search_str = str_replace("\s\s", ' ', $a__search_str);
      $l__search_cnt = substr_count($a__search_str, ' ');
      if ($l__search_cnt > 0) {
        $r__search_arr = explode(' ', $a__search_str); //MSA1S_PHP_SUB__StrItemsIntoArr($search_str, '', ' ', $search_arr, false);
      }
      else {
        $r__search_arr = array($a__search_str);
      }
    }
    //echo '<br><br>' . 'arr count: ' . count($r__search_arr) . '<br><br>';
    return $r__search_arr;
  }
?>
<?php
  function MSA1S_PHP__output() {
    //--
    global $MSA1S_GV__output_content;
    global $MSA1S_GV__URIsListArr;


    //echo '<br><br><br>MSA1S_PHP__output(): CUR:' . memory_get_usage() . '  PEAK: ' . memory_get_peak_usage() . ' TIME: ' . microtime() . '<br><br><br>';


    //--
    // $get_search_var_base = 'search';
    // $get_search_var_base = '< !-- %A1WSE_GET_SEARCH% -- >'; // remove first/last space
    $get_search_var_base = '<!-- %A1WSE_GET_SEARCH% -->';


    //--
    // _ud = uri details
    //--
    if (isset($_GET[$get_search_var_base . '_ud'])) {
      $a__l_rest = trim($_GET[$get_search_var_base . '_ud']);
      //MSA1S_PHP_SUB__StrItemsIntoArr($a__l_rest, '', ',', $MSA1S_GV__URIsListArr, true);
      $MSA1S_GV__URIsListArr = explode(',',$a__l_rest);
      $a__l_rest = null;
      unset($a__l_rest);      
      $MSA1S_GV__URIsListArr = array_unique($MSA1S_GV__URIsListArr);
    }


    //--
    // _p = phrase
    //--
    $search_str = '';
    if (count($MSA1S_GV__URIsListArr) == 0) {
      if (isset($_GET[$get_search_var_base . '_p'])) {
        $search_str = $_GET[$get_search_var_base . '_p'];
      }
    }
    $search_arr = MSA1S_PHP__search_as_arr($search_str);


    //--
    // _f = format
    //--
    $search_format = '';
    if (isset($_GET[$get_search_var_base . '_f'])) {
      $search_format = $_GET[$get_search_var_base . '_f'];
    }
    if ($search_format == '')
      $search_format = 'jsraw';


    //--
    $word_lists = array(
      'W1' => array()
      ,
      'W2' => array()
      ,
      'W3' => array()
      ,
      'W4' => array()
      ,
      'W5' => array()
    );
    //--
    if (count($MSA1S_GV__URIsListArr) > 0) {
      MSA1S_PHP_SUB__URIs(true);
    }


    //--
    //
    //--
    function sub_search($a__search_arr, $a__search_format, &$a__listof_search_arr, &$a__word_lookup_arr) { // &
      //--
      global $MSA1S_GV__output_content;
      global $MSA1S_GV__URIsListArr;
      //--
      if (count($a__search_arr) < 1)
        return;
      //--
      foreach ($a__search_arr as $l__search_str) {
        //echo '<br><br>SEARCH:' . $l__search_str . '<br><br>';
      }
      //--
      // we try make sure we only have single-pass
      //--
      $p_start_global = 0;
      //--
      $l__search_cnt = count($a__search_arr);
      //--
      // run through word lists
      //--
      //if ($a__search_arr[0] == 'sitemap') { echo 'DEBUG::sitemap:1' . "\n"; }
      for ($i = 0; $i < 5; $i++) {
        //--
        if ( ($i+1) > $l__search_cnt ) {
          continue;
        }
        //--
        $tmp__output_content = '';
        $tmp__output_iter = -1;
        //--
        $MSA1S_GV__file_content_w = '';        
        $file_name = 'msa1-search-data-w' . ($i+1) . '.js.inc';
        if (file_exists($file_name)) {
          $fs = filesize($file_name);
		  if ($fs > 0) {
            $h = fopen($file_name, 'rb');        
            $MSA1S_GV__file_content_w = fread($h, $fs);
            fclose($h);          
          }
        }  
        //--
        //
        //--
        $c_lines_arr = array();
        //MSA1S_PHP_SUB__StrItemsIntoArr($c_raw, '', "\n", $c_lines_arr, false);
        $c_lines_arr = explode("\n",$MSA1S_GV__file_content_w);
        $MSA1S_GV__file_content_w = null;
        unset($MSA1S_GV__file_content_w);
        //--
        //
        //--
        $w_lookahead_use = false;
        $w_lookahead_stop = '';
        $w_lookahead_addbuf = array();
        //--
        //
        //--
        foreach($c_lines_arr as $l_raw) {
          //--
          $l_rest = $l_raw;
          //--
          $p_start = false;
          if ($p_start === false)
            $p_start = strpos($l_rest, '{W:"');
          //--
          if ($p_start !== false) {
            $p_end = strpos($l_rest, '"', $p_start + 4);
            $l_word = substr($l_rest, $p_start + 4, $p_end - $p_start - 4);
            if ($l_word == '')
              continue;

            //if ($l_word == 'sitemap') { echo 'DEBUG::sitemap:2' . "\n"; }

            //--
            // after having retrieved "word" we have "rest" of line
            //--
            $l_rest = substr($l_rest, $p_end + 2);
            //--
            // if we are in lookahead mode, store all lines (both if we find lookahed-stop or just still-in-lookahead)
            //--
            if
            (
              ($w_lookahead_stop == $l_word)
              or
              ($w_lookahead_stop != '')
            )
            {
              $w_lookahead_addbuf[] = $l_raw;
            }
            //--
            // if we are not in look ahead mode, check if we should be (so if we find match later, we have beginning of its group)
            //--
            else {
              $p_start = strpos($l_rest, 'GE:"');
              if ($p_start !== false) {
                $p_end = strpos($l_rest, '"', $p_start + 4);
                $w_lookahead_stop = substr($l_rest, $p_start + 4, $p_end-$p_start - 4);
                $w_lookahead_addbuf[] = $l_raw;
              }
            }
            //--
            // break search phrase "word1 word2 word3", test valid combos, if we find any match with current line, all is good
            //--
            for ($i_search = 0; $i_search < $l__search_cnt; $i_search++) {
               $match_found = false;
              //--
              // build search word/phrase
              //--
              if ($i == 0) {
                //--
                // *when* dealing with one-word list, we only enter this case
                //--
                if (($i_search + 0) < $l__search_cnt) {
                  $match_found = ( ($a__search_arr[$i_search + 0]) == $l_word );
                }
              }
              else
              if ($i == 1) {
                //--
                // *when* dealing with two-word list, we only enter this case
                //--
                if (($i_search + 1) < $l__search_cnt) {
                  $match_found = ( ($a__search_arr[$i_search + 0] . ' ' . $a__search_arr[$i_search + 1]) == $l_word );
                }
              }
              else
              if ($i == 2) {
                //--
                // *when* dealing with three-word list, we only enter this case
                //--
                if (($i_search + 2) < $l__search_cnt) {
                  $match_found = ( ($a__search_arr[$i_search + 0] . ' ' . $a__search_arr[$i_search + 1] . ' ' . $a__search_arr[$i_search + 2]) == $l_word );
                }
              }
              else {
              }
              //--
              if ($match_found) {
                //echo '__DEBUG__' . 'MATCH-FOUND-BREAK' . '__' . $l_word . '__';
                break;
              }
            }
            //--
            // if we found match
            //--
            if ($match_found) {
              //echo '__DEBUG__' . 'MATCH-FOUND' . '__' . $l_word . '__' . "\n";
              //--
              if ($w_lookahead_stop != '') {
                //--
                // match found in block, we store *$w_lookahead_use* for later
                //--
                $w_lookahead_use = true;
                //echo '__DEBUG__' . 'MATCH-FOUND-LOOKAHEAD-STOP-VALUE-YES' . '__' . $l_word . '__' . $w_lookahead_stop . '__' . "\n";
              }
              else {
                //--
                // we found match and were not part of a group-chain, we add *$l_raw*,
                //--
                //echo '__DEBUG__' . 'MATCH-FOUND-LOOKAHEAD-STOP-VALUE-NO' . '__' . $l_word . "\n";
                //--
                // pair
                //--
                $p_start = strpos($l_raw, 'P:"');
                if ($p_start !== false) {
                  $p_end = strpos($l_raw, '"', $p_start + 3);
                  $l_pair_str = substr($l_raw, $p_start + 3, $p_end-$p_start - 3);
                  //echo 'PAIR-DONE0-YYY: ' . $l_pair_str . '. "\n"';
                  $l_pair_arr = MSA1S_PHP__search_as_arr($l_pair_str);
                  $a__listof_search_arr[] = $l_pair_arr;
                }
                //--
                // group
                //--
                MSA1S_PHP_SUB__StrItemsIntoArr($l_raw, 'U:', ',', $MSA1S_GV__URIsListArr, true);
                if ($a__search_format == 'jsraw') {
                  $tmp__output_iter++;
                  //echo 'GROUP-DONE1-YYY: ' . $l_raw . "\n";
                  //--
                  $p_start = strpos($l_raw, 'W');
                  $p_end = strpos($l_raw, '=', $p_start);
                  $s = substr($l_raw, $p_start, $p_end-$p_start+1);
                  //echo 'GROUP-DONE2-YYY: ' . $s . "\n";
                  foreach ($a__word_lookup_arr as $a_word) {
                    if ($a_word == $s) {
                      //echo 'GROUP-DONE3-YYY: ' . $s . "\n";
                      $s = '';
                      break;
                    }
                  }
                  if ($s != '') {
                    $a__word_lookup_arr[] = $s;
                    //echo 'GROUP-DONE4-YYY: ' . $s . "\n";
                    $tmp__output_content .= $l_raw . "\n";
                  }
                }
                //--
                //
                //--
              }
            }
            //--
            // if word match end-of-group lookahead, and we found a match (*w_lookahead_use*), include whole chain
            //--
            if
            (
              ($w_lookahead_stop == $l_word)
            )
            {
              //echo '__DEBUG__' . 'MATCH-FOUND-LOOKAHEAD-STOP-VALUE-SAME' . '__' . $l_word . '__' . "\n";
              if ($w_lookahead_use) {
                $l_pair_arr = array();
                //echo '__DEBUG__' . 'MATCH-FOUND-LOOKAHEAD-STOP-VALUE-USE' . '__' . $l_word . '__' . "\n";
                foreach($w_lookahead_addbuf as $l_raw) {
                  //--
                  // pair
                  //--
                  $p_start = strpos($l_raw, 'P:"');
                  if ($p_start !== false) {
                    $p_end = strpos($l_raw, '"', $p_start + 3);
                    //echo 'PAIR-DONE1-QQQ: ' . $l_raw . "\n";
                    $l_pair_str = substr($l_raw, $p_start + 3, $p_end-$p_start - 3);
                    //echo 'PAIR-DONE2-QQQ: ' . $l_pair_str . "\n";
                    $l_pair_arr = MSA1S_PHP__search_as_arr($l_pair_str);
                    $a__listof_search_arr[] = $l_pair_arr;
                  }
                  //--
                  // group
                  //--
                  MSA1S_PHP_SUB__StrItemsIntoArr($l_raw, 'U:', ',', $MSA1S_GV__URIsListArr, true);
                  if ($a__search_format == 'jsraw') {
                    $tmp__output_iter++;
                    //echo 'GROUP-DONE1-QQQ: ' . $l_raw . "\n";
                    //--
                    $p_start = strpos($l_raw, 'W');
                    $p_end = strpos($l_raw, '=', $p_start);
                    $s = substr($l_raw, $p_start, $p_end-$p_start+1);
                    //echo 'GROUP-DONE2-QQQ: ' . $s . "\n";
                    foreach ($a__word_lookup_arr as $a_word) {
                      if ($a_word == $s) {
                        //echo 'GROUP-DONE3-QQQ: ' . $s . "\n";
                        $s = '';
                        break;
                      }
                    }
                    if ($s != '') {
                      $a__word_lookup_arr[] = $s;
                      //echo 'GROUP-DONE4-QQQ: ' . $s . "\n";
                      $tmp__output_content .= $l_raw . "\n";
                    }
/*
                    $ps = strpos($MSA1S_GV__output_content, $s);
                    if ($ps !== false) {
                      $tmp__output_content .= 'Q1Q1Q1' . $s . 'Q1Q1Q1' . ($ps+0) . 'Q1Q1Q1';
                    }
                    else {
                      $tmp__output_content .= $l_raw . "\n" . 'Q2Q2Q2' . $s . 'Q2Q2Q2' . ($ps+0) . 'Q2Q2Q2' . "\n";
                    }
*/



                  }
                }
                //--


              }
              else {
              }
              //--
              $w_lookahead_use = false;
              $w_lookahead_stop = '';
              $w_lookahead_addbuf = array();
            }
            //--
          }
        }
        //--
        $MSA1S_GV__output_content .= $tmp__output_content;
        //--
        //$MSA1S_GV__output_content .= '<br><br><br>before reset BB:' . memory_get_usage() . '<br><br><br>';
        $w_lookahead_addbuf = null;
        unset($w_lookahead_addbuf);
        $tmp__output_content = null;
        unset($tmp__output_content);
        $c_lines_arr = null;
        unset($c_lines_arr);
        //$MSA1S_GV__output_content .= '<br><br><br>after reset BB:' . memory_get_usage() . '<br><br><br>';
      }
    }




    //--
    //
    //--
    $search_arr = MSA1S_PHP__search_as_arr($search_str);
    $listof_search_arr = array($search_arr);
    $word_lookup_arr = array();
    $listof_iter = 0;
    while ($listof_iter < (count($listof_search_arr))) {
      //echo '<br><br> START: array count: ' . count($listof_search_arr) . '<br><br>';
      //--
      $search_arr = $listof_search_arr[$listof_iter];
      $search_arr_cnt = count($search_arr);
      //--
      // we compare search-arr to see if we already handled this (only if "$listof_iter > 0")
      //--
      $same = false;
      //echo 'MSA1S_PHP__search_as_arr: ' . var_dump($search_arr) . ': start' . "\n";
      //--
      for ($i = $listof_iter-1; $i > -1; $i--) {
        if (count($listof_search_arr[$i]) != $search_arr_cnt) {
          continue;
        }
        for ($j = 0; $j < $search_arr_cnt; $j++) {
          if ($listof_search_arr[$i][$j] != $search_arr[$j]) {
            continue;
          }
          else
          if ($j == $search_arr_cnt-1) {
            $same = true;
            //echo '<br><br>same J' . $j . ' I' . $i . '<br><br>';
            break;
          }
        }
        if ($same) {
           break;
        }
      }
      //--
      if ($same == false) {
        //echo 'MSA1S_PHP__search_as_arr: ' . var_dump($search_arr) . ': call' . "\n";
        sub_search($search_arr, $search_format, $listof_search_arr, $word_lookup_arr);
      }
      //--
      $listof_iter++;
    }



    //--
    //
    //--
    if ($search_str != '') {
      if (count($MSA1S_GV__URIsListArr) > 0) {
        MSA1S_PHP_SUB__URIs(false);
      }
    }

  }
?>
<?php
  MSA1S_PHP__output();
  echo $MSA1S_GV__output_content;
?>