// Copyright 2020 Ross Spencer, Richard Lehane. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.

// Satisfies the Identifier interface.

package wikidata

import (
	"encoding/json"
	"fmt"
	"strings"
	"time"

	"github.com/richardlehane/siegfried/internal/identifier"
	"github.com/richardlehane/siegfried/pkg/config"
	"github.com/richardlehane/siegfried/pkg/core"
)

const unknown = "UNKNOWN"
const identifierDateFormat = "2006-01-02"

// Initialize the variables needed by this file.
func init() {
	core.RegisterIdentifier(core.Wikidata, Load)
}

// Identifier contains a set of Wikidata records and an implementation
// of the identifier interface for consuming.
type Identifier struct {
	infos map[string]formatInfo
	*identifier.Base
}

// Global that allows us to do is keep track of the PUIDs going to be
// output in the identifier which need provenance. At least it felt
// needed at the time, but need to look at in more detail. We may
// eventually delete this in favor of something "less-global".
var sourcePuids []string

// New is the entry point for an Identifier when it is compiled by the Roy tool
// to a brand new signature file.
//
// New will read a Wikidata report, and parse its information into structures
// suitable for compilation by Roy.
//
// New will also update its identification information with provenance-like
// info. It will enable signature extensions to be added by the utility, and
// enables configuration to be applied as well.
func New(opts ...config.Option) (core.Identifier, error) {
	for _, v := range opts {
		v()
	}
	logln("congratulations: doing something with the Wikidata identifier package!")
	wikidata, puids, err := newWikidata()
	if err != nil {
		return nil, fmt.Errorf("error in Wikidata New(): %w", err)
	}
	// Having retrieved our PUIDs from newWikidata, assign them to our
	// provenance global to generate source information from Wikidata.
	sourcePuids = puids
	updatedDate := time.Now().Format(identifierDateFormat)
	wikidata = identifier.ApplyConfig(wikidata)
	base := identifier.New(
		wikidata,
		"Wikidata Name: I don't think this field is used...",
		updatedDate,
	)
	infos := infos(wikidata.Infos())
	return &Identifier{
		infos: infos,
		Base:  base,
	}, nil
}

// Recorder provides a recorder for matching.
func (i *Identifier) Recorder() core.Recorder {
	return &Recorder{
		Identifier: i,
		ids:        make(matchIDs, 0, 1),
	}
}

// Identification contains the result of a single ID for a file. There may be
// multiple, per file. The identification to the user looks something like as
// follows:
//
//   - ns      : 'wikidata'
//     id      : 'Q1343830'
//     format  : 'Executable and Linkable Format'
//     URI     : 'http://www.wikidata.org/entity/Q1343830'
//     mime    :
//     basis   : 'byte match at 0, 4 (signature 1/5); byte match at 0, 7 (signature 4/5)'
//     source  : 'Gary Kessler”s File Signature Table (source date: 2017-08-08) PRONOM (Official (fmt/689))'
//     warning :
type Identification struct {
	Namespace  string         // Namespace of the identifier, e.g. this will be the 'wikidata' namespace.
	ID         string         // QID of the file format according to Wikidata.
	Name       string         // Complete name of the format identification. Often includes version.
	LongName   string         // IRI of the Wikidata record.
	MIME       string         // MIMEtypes associated with the record.
	Basis      []string       // Basis for the result returned by Siegfried.
	Source     []string       // Provenance information associated with the result.
	Permalink  string         // Permalink from the Wikibase record used to build the signature definition.
	Warning    string         // Warnings generated by Siegfried.
	archive    config.Archive // Is it an Archive format?
	confidence int            // Identification confidence for sorting.
}

// String creates a human readable representation of an identifier for output
// by fmt-like functions.
func (id Identification) String() string {
	str, err := json.MarshalIndent(id, "", "  ")
	if err != nil {
		return ""
	}
	return fmt.Sprintf("%s", str)
}

// Fields describes a portion of YAML that will be output by Siegfried's
// identifier for an individual match. E.g.
//
//	matches  :
//	  - ns      : 'wikidata'
//	    id      : 'Q475488'
//	    format  : 'EPUB'
//	    ...     : '...'
//	    ...     : '...'
//	    custom  : 'your custom field'
//	    custom  : '...'
//
// siegfried/pkg/writer/writer.go normalizes the output of this field
// grouping so that if it sees certain fields, e.g. namespace, then it
// can convert that to something anticipated by the consumer,
//
//	e.g. namespace => becomes => ns
func (i *Identifier) Fields() []string {
	// Result fields. Basis is used by Wikidata to reflect both the
	// details of the signature used to match (or other identifiers) as
	// well as the source of binary signatures.
	//
	// e.g. byte match at 0, 4 (Gary Kessler''s File Signature Table (source date: 2017-08-08))
	//
	return []string{
		"namespace",
		"id",
		"format",
		"URI",
		"permalink",
		"mime",
		"basis",
		"warning",
	}
}

// Archive should tell us if any identifiers match those considered to
// be an archive format so that they can be extracted and the contents
// identified.
func (id Identification) Archive() config.Archive {
	return id.archive
}

// Known returns false if the ID isn't recognized or true if so.
func (id Identification) Known() bool {
	return id.ID != unknown
}

// Warn returns the warning associated with an identification.
func (id Identification) Warn() string {
	return id.Warning
}

// Values returns a string slice containing each of the identifier segments.
func (id Identification) Values() []string {
	var basis string
	if len(id.Basis) > 0 {
		basis = strings.Join(id.Basis, "; ")
	}
	// Slice must match the order of resultsFueldsWithoutSource.
	return []string{
		id.Namespace,
		id.ID,
		id.Name,
		id.LongName,
		id.Permalink,
		id.MIME,
		basis,
		id.Warning,
	}
}
