/*
 * Logserver
 * Copyright (C) 2017-2025 Joel Reardon
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <https://www.gnu.org/licenses/>.
 */

#ifndef __EXPLORED_RANGE__H__
#define __EXPLORED_RANGE__H__

#include <cassert>
#include <map>
#include <set>

#include <iostream>

#include "config.h"
#include "constants.h"

using namespace std;

/* This class manages searching results. When logserver is handling huge files,
 * searching can take a while. Instead of doing it linearlly, it uses the
 * current position in navigation to shoot search probes in both directions. As
 * the user moves, this can cause more searched and unsearched areas. This
 * manages where those overlap and collapes them. Explored range is not
 * threadsafe, assumes user handles threading. */
class ExploredRange {
public:
	ExploredRange(size_t range) : _range(range), _dir(false) {}

	virtual ~ExploredRange() {}

	/* loglines was cleared. reset the explored range to no findings and all
	 * future lines will be matched in tail mode
	 */
	virtual void clear() {
		_findings.clear();
		_explored.clear();
		_explored[0] = G::EOF_POS;
		_end = 0;
	}

	/* dump out explored segments to ostream os */
	virtual void trace(ostream& os) {
		os << "tot=" << _explored.size() << " ";
		for (auto & x : _explored) {
		        os << "(" << x.first << " -> " << x.second << ") ";
		}
	}

	/* This inserts findings even if the range has not been explored
	 * according to the range search. In practice this is not likely to
	 * happen, since any insertion would involve setting the whence to this
	 * position and exploring it. */
	virtual void add_finding(size_t pos) {
		_findings.insert(pos);
	}

	/* This removes a finding from a pos, e.g., because the line was edited
	 * to no longer match the search */
	virtual void remove_finding(size_t pos) {
		_findings.erase(pos);
	}

	// occurs when loglines inserts new elements in the middle, e.g.,
	// because a long line is broken
	virtual void insert_or_delete_lines(size_t pos, size_t amount,
					    bool insert) {
		/* TODO: right now deletions are only size one (or wholesale
		 * clear). If that changes, we need to handle the case where a
		 * deletion occurs outside a range, but eats into some of all of
		 * the next range, or spans two distinct ranges */
		assert(insert || amount == 1);
		set<size_t> replace;
		// handle the existing findings
		for (auto &x : _findings) {
			// keep findings prior to event pos
			if (x < pos) replace.insert(x);
			else {
				// insert push back by amount
				if (insert) replace.insert(x + amount);
				// deletions within [pos, pos+amount) are
				// removed, otherwise pulled by amount
				else if (x >= pos + amount) {
					replace.insert(x - amount);
				}
			}
		}
		_findings = std::move(replace);

		// if explored map is no longer relevant no need to update the
		// indices of all the searched areas
		if (completed()) return;

		map<size_t, size_t> explore;
		// TODO: can put each range handling as its own function
		for (const auto &x : _explored) {
			if (x.second <= pos) {
				// prior to insertions uneffected
				explore[x.first] = x.second;
			} else if (x.first >= pos) {
				// segments after insertion are pushed unless
				// EOF already
				size_t idx;
				if (insert) idx = x.first + amount;
				else {
					assert(amount <= x.first);
					idx = x.first - amount;
				}
				if (x.second != G::EOF_POS) {
					if (insert) {
						explore[idx] = x.second + amount;
					} else {
						explore[idx] = x.second - amount;
					}
				} else {
					// adjust position but keep eof pos
					explore[idx] = G::EOF_POS;
				}
			} else {
				// insertion or deletion happened within a range
				// grow only the end of the range unless EOF
				// already
				assert(x.first < pos && x.second > pos);
				if (x.second != G::EOF_POS) {
					if (insert) {
						explore[x.first] =
							x.second + amount;
					// a deletion could break out the range
					} else if (amount <= x.second - x.first) {
						explore[x.first] =
							x.second - amount;
					} else {
						/* TODO: with multiple delete,
						 * this will trigger. now it can
						 * only happen if there is
						 * nothing explored, a
						 * placeholder is held, and the
						 * deletion will anyways make
						 * the received results ignored.
						 */
						assert(x.second == x.first);
						explore[x.first] = x.second;
					}
				} else {
					// keep explored range
					explore[x.first] = x.second;
				}
			}
		}
		_explored = std::move(explore);

	}

	/* given the current line user is look at in whence, provides search
	 * range of lines in [start, end] to look for matches as output
	 * variables. If end < start, it means search upwards. Alternates the
	 * search direction each time */
	virtual void explore(size_t whence, size_t* start, size_t* end) {
		*start = 0;
		*end = 0;

		// step 1. determine if whence is already within a range we have
		// explored, or if we have to insert a new position in the
		// _explored map.
		bool insert = false;
		// either it->first == whence, > whence, or it is at end
		auto it = _explored.lower_bound(whence);

		// insert is true if whence is not part of any segment in
		// _explored, either before the first start, or between some
		// [end, start] sequence.
		if (it == _explored.end() || it->first != whence) {
			// if _explored is empty, or first element is past
			// whence, then we have to insert
			if (it == _explored.begin()) {
				insert = true;
			} else {
				// otherwise we can go back, check if whence is
				// part of the previous range, if not then
				// insert
				--it;
				if (it->second < whence) insert = true;
			}
		}

		// step 2. insert if we have to. revalidate the iterator after
		// insertion
		if (insert) {
			_explored[whence] = whence;
			it = _explored.find(whence);
		}
		assert(it != _explored.end());
		assert(_explored.size());

		// step 3. it now points to a valid element in _explored.
		// alternate directions and grow out its explored range
		// accounting for other elements in _explored that it will
		// overlap
		_dir = !_dir;
		// if we start at an endpoint, configure the direction
		if (it->first == 0) _dir = true;
		if (_end && it->second >= *_end) _dir = G::DIR_UP;
		if (!_dir) {
			// search up
			*start = it->first;
			if (*start < _range) {
				*end = 0;
			} else {
				*end = *start - _range;
			}
			if (it != _explored.begin()) {
				--it;
				if (it->second > *end) *end = it->second;
			}
		} else {
			// search down
			*start = it->second;
			*end = *start + _range;
			++it;
			if (it != _explored.end()) {
				if (it->first < *end) *end = it->first;
			} else if (_end != nullopt) {
				// if we have an _end marker, all new lines
				// would be searched at the time of insertion
				// into loglines, so no need to search
				if (*end > *_end) *end = *_end;
			}
		}
	}

	// mark the last element we need to search for based on the size of the
	// log right now. all subsequent matches will be determined at the time
	// of insertion
	virtual void mark_end(size_t end) {
		_end = end;
		/* inserts a new explored range from the marked end to a pseudo
		 * max loglines size to simplify logic regarding gaps between
		 * positions and explored ranges across all keywords */
		auto it = _explored.end();
		if (_explored.size() && (--it)->second == end) {
			it->second = G::EOF_POS;
		} else {
			_explored[end] = G::EOF_POS;
		}
	}

	/* returns the percent that this keyword has done searching */
	virtual int percent() {
		assert(_end);
		size_t done = 0;
		size_t total = *_end;
		for (const auto& x : _explored) {
			if (x.second == G::EOF_POS) {
				done += (*_end - x.first);
			} else {
				done += (x.second - x.first);
			}
		}
		return (done * 100 / total);
	}

	// log lines follower has found a new match in an new line, add it at
	// the end of _findings. Usually due to streaming data, but can also
	// occur when user hits break on a line, so insert with end() hint when
	// it is past the _end pos.
	virtual void post_end(size_t pos) {
		assert(_end != nullopt);
		if (pos > *_end) [[likely]] _findings.insert(_findings.end(), pos);
		else [[unlikely]] _findings.insert(pos);
	}

	// return true if our search is complete, becuase there is a single
	// element in explored from 0->_end
	virtual bool completed() {
		// loglines has been cleared, everything is matched in tail mode
		if (_end && !*_end) return true;

		// if there are more than one range there will be a gap between
		// them
		if (_explored.size() != 1) return false;
		auto it = _explored.begin();

		// there is one range with a gap before
		if ((it->first) != 0) return false;
		// range has sentinel value indicating full exploration
		if (it->second == G::EOF_POS) {
			assert(_end);
			return true;
		}
		// there is one range with a gap after
		return false;
	}

	/* we have results back after exploring the range start to end.
	 * based on how explore assigns start / end we know there is an element
	 * in _explored with either key==start or value==start. find it and
	 * extend the correct direction, then check if it bumps against another
	 * element and merge them */
	virtual void extend(size_t start, size_t end,
			    const set<size_t>& results) {
		assert(_explored.size());
		assert(start != end);
		for (auto & x : results) {
			_findings.insert(x);
		}

		// it is possible we assigned a search before the _end got
		// marked, but it got marked before the search finished.
		// in this case ignore the results past _end since
		// we'll get those through insert follower
		if (_end != nullopt) {
			if (start >= *_end) start = *_end;
			if (end >= *_end) end = *_end;
		}

		// step 1. find the relevant element in _explored
		auto it = _explored.lower_bound(start);
		if (it == _explored.end() || it->first != start) --it;


		// step 2. it is now valid and equal to the key or value for
		// some elemented in _explored (or equal to both). depending on
		// the direction we explored, assert start is in the correct
		// position of it and extend the range.
		if (start < end) {
			assert(it->second == start);
			it->second = end;
			++it;
		} else {
			assert(it->first == start);
			_explored[end] = it->second;
			_explored.erase(it);
			it = _explored.find(end);
			assert(it != _explored.end());
		}

		// step 3. check if our extension allows two adjacent elements
		// in _explored to be merged. it now points to the element whose
		// new explored range is earlier, so if it is either the
		// beginning or the invalid end element then it meant there was
		// no element it could bump into
		if (it != _explored.begin() && it != _explored.end()) {
			auto two = it;
			--it;
			// we have covered the gap
			if (it->second == two->first) {
				size_t pos = it->first;
				it->second = two->second;
				_explored.erase(two);
				it = _explored.lower_bound(pos);
			}
		}
	}

	/* returns true if the line at position pos is a match */
	virtual inline bool is_match(size_t pos) {
		return _findings.count(pos);
	}

	/* returns the position of next matching line given a starting point in
	 * whence and a direction to look. It returns G::NO_POS if there is no
	 * finding in that direction past whence, or if there is a gap in the
	 * searched ranges between whence and the result */
	virtual inline size_t next_match(size_t whence, bool dir) {
		if (dir) {
			auto it = _findings.lower_bound(whence);
			if (it == _findings.end()) return G::NO_POS;
			if (gapped(*it, whence)) return G::NO_POS;
			return *it;
		} else {
			auto it = _findings.lower_bound(whence);
			if (it == _findings.begin()) return G::NO_POS;
			--it;
			if (gapped(*it, whence)) return G::NO_POS;
			return *it;
		}
	}

	/* returns the position where exploration is gap-free from parameter
	 * whence in direction parameter dir. If whence is not in an explored
	 * range returns G::NO_POS */
	virtual inline size_t next_range(size_t whence, bool dir) {
		return next_range_locked(whence, dir);
	}

	/* inserts all the findings we have into the set parameter lines, i.e.,
	 * doing an OR search on matching lines */
	virtual void disjunctive_join(set<size_t>* lines) const {
		set<size_t> ret;

		set_union(lines->begin(), lines->end(),
			  _findings.begin(), _findings.end(),
			  inserter(ret, ret.begin()));
		lines->swap(ret);
	}

	/* replaces input set parameter lines with only those lines that also
	 * appear in our findings, i.e., an AND search on matching lines */
	virtual void conjunctive_join(set<size_t>* lines) {
		set<size_t> ret;

		set_intersection(lines->begin(), lines->end(),
				_findings.begin(), _findings.end(),
				inserter(ret, ret.begin()));
		lines->swap(ret);
	}

protected:
	/* returns true if there is a gap between pos1 and pos2, so that we
	 * don't display results beyond the gap. because follow up lines can
	 * appear as findings but aren't representing explored ranges we cannot
	 * be sure pos1 is in an explored range */
	virtual inline bool gapped(size_t pos1, size_t pos2) {
		if (pos1 == pos2) return false;
		auto it = _explored.lower_bound(pos1);
		if (it == _explored.end() || it->first != pos1) {
			if (it == _explored.begin()) return true;
			--it;
		}
		if (!is_within(it->first, pos1, it->second)) return true;
		return !is_within(it->first, pos2, it->second);
	}

	/* returns true if start <= val < end, i.e., val is in the searched
	 * segment [start, end] */
	static inline bool is_within(size_t start, size_t val, size_t end) {
		return (start <= val && val < end);
	}

	/* returns G::NO_POS if whence is not inside a searched segment.
	 * Otherwise it finds the segment it is in and returns the start/end of
	 * it based on direction */
	virtual inline size_t next_range_locked(size_t whence, bool dir) {
		auto it = _explored.upper_bound(whence);
		// first explored segment is after range
		if (it == _explored.begin()) return G::NO_POS;
		--it;
		// whence is after a segment
		if (it->second < whence) return G::NO_POS;

		assert(it->first <= whence);
		assert(whence <= it->second);
		if (dir) return it->second;
		else return it->first;
	}

	// map of all the searched segments in loglines. empty map means we
	// havn't search anything, otherwise it consists of [start, end) pairs
	// indicating all positions from start to end-1 have been looked at.
	// gaps occur when searching is done from different places and continues
	// until the entire map is a single element from [0, loglines size).
	map<size_t, size_t> _explored;

	// TODO: on copy, invalidate findings if its within a blocksize and
	// validate on those findings instead of restarting the search

	// the set of matches we've found by searching
	set<size_t> _findings;

	// when issuing searching tasks, the number of elements to search for.
	// this is to prevent time wasted on locking and unlocking loglines, as
	// well as holding a loglines lock for too long
	size_t _range;

	// the direction that we last searched, so that we can alternate going
	// up and down if we are at some particular line and the search takes a
	// while
	bool _dir;

	// stores the number of lines of loglines so we know when we are done
	// searching. while new lines can appear in loglines for streaming
	// input, when _end is set we require that those lines are processed as
	// they are added instead of being searched for later
	optional<size_t> _end;
};

#endif  // __EXPLORED_RANGE__H__
