//  Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// 		http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package simple

import (
	"unicode/utf8"

	"github.com/blevesearch/bleve/registry"
	"github.com/blevesearch/bleve/search/highlight"
)

const Name = "simple"

const defaultFragmentSize = 200

type Fragmenter struct {
	fragmentSize int
}

func NewFragmenter(fragmentSize int) *Fragmenter {
	return &Fragmenter{
		fragmentSize: fragmentSize,
	}
}

func (s *Fragmenter) Fragment(orig []byte, ot highlight.TermLocations) []*highlight.Fragment {
	var rv []*highlight.Fragment
	maxbegin := 0
OUTER:
	for currTermIndex, termLocation := range ot {
		// start with this
		// it should be the highest scoring fragment with this term first
		start := termLocation.Start
		end := start
		used := 0
		for end < len(orig) && used < s.fragmentSize {
			r, size := utf8.DecodeRune(orig[end:])
			if r == utf8.RuneError {
				continue OUTER // bail
			}
			end += size
			used++
		}

		// if we still have more characters available to us
		// push back towards beginning
		// without cross maxbegin
		for start > 0 && used < s.fragmentSize {
			if start > len(orig) {
				// bail if out of bounds, possibly due to token replacement
				// e.g with a regexp replacement
				continue OUTER
			}
			r, size := utf8.DecodeLastRune(orig[0:start])
			if r == utf8.RuneError {
				continue OUTER // bail
			}
			if start-size >= maxbegin {
				start -= size
				used++
			} else {
				break
			}
		}

		// however, we'd rather have the tokens centered more in the frag
		// lets try to do that as best we can, without affecting the score
		// find the end of the last term in this fragment
		minend := end
		for _, innerTermLocation := range ot[currTermIndex:] {
			if innerTermLocation.End > end {
				break
			}
			minend = innerTermLocation.End
		}

		// find the smaller of the two rooms to move
		roomToMove := utf8.RuneCount(orig[minend:end])
		roomToMoveStart := 0
		if start >= maxbegin {
			roomToMoveStart = utf8.RuneCount(orig[maxbegin:start])
		}
		if roomToMoveStart < roomToMove {
			roomToMove = roomToMoveStart
		}

		offset := roomToMove / 2

		for offset > 0 {
			r, size := utf8.DecodeLastRune(orig[0:start])
			if r == utf8.RuneError {
				continue OUTER // bail
			}
			start -= size

			r, size = utf8.DecodeLastRune(orig[0:end])
			if r == utf8.RuneError {
				continue OUTER // bail
			}
			end -= size
			offset--
		}

		rv = append(rv, &highlight.Fragment{Orig: orig, Start: start - offset, End: end - offset})
		// set maxbegin to the end of the current term location
		// so that next one won't back up to include it
		maxbegin = termLocation.End

	}
	if len(ot) == 0 {
		// if there were no terms to highlight
		// produce a single fragment from the beginning
		start := 0
		end := start + s.fragmentSize
		if end > len(orig) {
			end = len(orig)
		}
		rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
	}

	return rv
}

func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Fragmenter, error) {
	size := defaultFragmentSize
	sizeVal, ok := config["size"].(float64)
	if ok {
		size = int(sizeVal)
	}
	return NewFragmenter(size), nil
}

func init() {
	registry.RegisterFragmenter(Name, Constructor)
}