<?php
    set_time_limit(5000); // 5mins
    include_once dirname(__DIR__).'/config.php';
    require_once '../client/bingc.lib.php'; // client for search service calls to get results metadata
    require_once dirname(__DIR__) . '/lib/mysqlidb/MysqliDb.php';

    global $CFG;

    // read lines in array
    $lines = file(dirname(__DIR__) . '/data/raw_kw_data_piped.txt', FILE_IGNORE_NEW_LINES);
    echo $lines[0];

    $i = 0;
    $kcount = 0;
    $kw_a = array();
    $slug_a = array();

    $db = new \MysqliDb (Array(
            'host' => $CFG->dbhost,
            'username' => $CFG->dbuser,
            'password' => $CFG->dbpass,
            'db' => $CFG->dbname,
            'port' => 3306,
            'prefix' => $CFG->prefix,
            'charset' => 'utf8'));

    foreach ($lines as $ln) {
        $cols = explode('|', $ln);
        $title = $cols[2];
        $path = $cols[1];
        //echo 'title->' . $cols[2] . ', path->' . $cols[1] . '<br>';

        if (strpos($path, '?dq=') !== false || strpos($path, '/worksheet/') !== false) {
            $kcount++;
            $keyword = str_replace('- Printable Worksheets', '', $title );
            $keyword = trim(str_replace('Worksheets', '', $keyword));
            //echo $keyword . '<br>';
            // filter any invalid keywords, does not end with number attached to chars, junk chars and keywords smaller than 3 chars
            if ($keyword !== '' && $keyword !== '(not set)' && !preg_match('/^[0-9 ]+$/', $keyword) && preg_match('/^[a-zA-Z0-9\s]*$/', $keyword) && strpos('?', $keyword) === false && substr_count($path, 'cache:') < 1 && substr_count($path, '&') < 1 && strlen($keyword) > 2) {
                //echo 'kw->' . $keyword . '<br>';
                array_push($kw_a, $keyword);
                // path

                //echo 'raw path->' . $path . '<br>';

                if(strpos($path, '?dq=') !== false) {
                    $path = explode('=', $path)[1];
                    $path = preg_replace('/\s\s+/', ' ', $path);
                    $path = str_replace(' ', '-', trim(strtolower($path)));
                    //echo 'path dq->' . $path . '<br>';
                }
                else {
                    $path = explode('/', $path)[2];
                    $path = str_replace('-', ' ', $path);
                    $path = preg_replace('/\s\s+/', ' ', $path);
                    $path = str_replace(' ', '-', trim(strtolower($path)));
                    //echo 'path ws->' . $path . '<br>';
                }
                array_push($slug_a, $path);
                $match = '<span style="color:red">NO</span>';
                if($path === str_replace(' ', '-', trim(strtolower($keyword)))) {
                    $match = '<span style="color:green">YES</span>';
                }
                //echo $keyword . '|' . $path . '|' . $match . '<br>';

                // this is only for testing with less data
                /* if($i > 1000) {
                    break;
                }
                $i++; */
            }
            //echo 'worksheet->' . $keyword .'<br>';
        }
        else {
            //echo 'category' . '<br>';
        }
    }

    sort($kw_a);
    echo 'Total Keywords->' . count($kw_a) . '<br>';
    echo 'Total Slugs->' . count($slug_a) . '<br>';
    $kw_a = array_unique($kw_a);
    $slug_a = array_unique($slug_a);
    echo 'Total Unique Keywords->' . count($kw_a) . '<br>';
    echo 'Total Unique Slugs->' . count($slug_a) . '<br>';

    //print_r($kw_a);

    // insert keywords into db
    /*
    foreach($kw_a as $kw) {
        $db->insert('ascga_keyword',  Array ("keyword" => $kw, "slug" => $kw));
    }
    */

    //echo json_encode($kw_a, JSON_PRETTY_PRINT);

    //build db insert script
    $sql_script = '';
    $sql_script .= 'SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";' . "\r\n";
    $sql_script .= 'SET AUTOCOMMIT = 0;' . "\r\n";
    $sql_script .= 'START TRANSACTION;' . "\r\n";
    $sql_script .= 'SET time_zone = "+00:00";' . "\r\n" . "\r\n";

    $sql_script .= 'CREATE TABLE bng_ascga_keyword ('. "\r\n";
    $sql_script .= 'keyword varchar(200) COLLATE utf8mb4_unicode_ci NOT NULL,'. "\r\n";
    $sql_script .= 'slug varchar(200) COLLATE utf8mb4_unicode_ci NOT NULL'. "\r\n";
    $sql_script .= ') ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE=utf8mb4_unicode_ci;' . "\r\n" . "\r\n";

    $sql_script .= 'INSERT INTO bng_ascga_keyword (keyword, slug) VALUES'. "\r\n";

    $kwi = 1;
    $kwcount = count($kw_a);

    $bclient = new bingc\client();

    foreach($kw_a as $kw) {

        //$slug = str_replace(' ', '-', trim(strtolower($kw)));

        // using keyword service to build slug
        $response = $bclient->check_keyword($kw, 'ws', 'ws.asc-ga.rokkada.com');

        // check if keyword service returns response and keyword is valid
        if(isset($response['data']['query']) && $response['data']['query']['keyword']['is_valid']) {

            $slug = $response['data']['query']['keyword']['slug'];
            $title = $response['data']['query']['keyword']['title_text'];

            $sql_script .= '(\'' . $title . '\', \'' . $slug . '\')';
            if ($kwi < $kwcount) {
                $sql_script .= ',' . "\r\n";
            } else {
                $sql_script .= ';' . "\r\n";
            }
        }
        else {
            echo 'INVALID KEYWORD->' . $kw . '<br>';
        }
        $kwi++;
    }


    $sql_script .= 'COMMIT;' . "\r\n";

    file_put_contents(dirname(__DIR__) . '/data/keywords.txt', implode(PHP_EOL, $kw_a));

    file_put_contents(dirname(__DIR__) . '/data/sql_test.sql', $sql_script);