/* SPDX-FileCopyrightText: 2019, 2025 - Sébastien Wilmet <swilmet@gnome.org>
 * SPDX-License-Identifier: LGPL-2.1-or-later
 */

#include "config.h"
#include "gtksourceiconv.h"
#include <errno.h>
#include <glib/gi18n-lib.h>

/**
 * SECTION:iconv
 * @Title: GtkSourceIconv
 * @Short_description: To use iconv more comfortably
 *
 * #GtkSourceIconv is a small wrapper for the g_iconv() family of functions, to
 * use iconv more comfortably. #GtkSourceIconv returns #GError's and enum values
 * for the different situations.
 *
 * Call the functions in this order:
 * - gtk_source_iconv_new()
 * - gtk_source_iconv_open()
 * - gtk_source_iconv_feed() in a loop.
 * - gtk_source_iconv_feed() with @inbuf and @inbytes_left set to %NULL (in a
 *   loop too if the output buffer is full).
 * - gtk_source_iconv_close()
 * - gtk_source_iconv_free()
 *
 * Note that GLib provides the g_convert() family of functions. As the
 * g_convert() documentation explains, it is necessary to use g_iconv() (or
 * #GtkSourceIconv) for streaming conversions. One other use-case is to have
 * more control over invalid characters: in the case of the GtkSourceView
 * library, it is desirable to apply a #GtkTextTag to them.
 *
 * # Avoid iconv if possible
 *
 * However, if you have the choice, don't use iconv! It's a crappy API and has
 * several design flaws:
 * - When `iconv()` returns `(size_t)-1` (e.g., when the output buffer is full),
 *   we don't know if it has performed lossy conversions. In a normal situation,
 *   the number of lossy conversions performed is returned by `iconv()` as its
 *   return value.
 *
 *   To fix this flaw, an `iconv2()` function could return the number of lossy
 *   conversions performed as an output parameter. And also an
 *   `iconv_open2` `()` function could take a flags parameter to configure
 *   whether lossy conversions are allowed.
 *
 * - The `EILSEQ` error returned by `iconv()` can mean two different things:
 *   there is either an invalid character in the input buffer (so invalid wrt.
 *   the *origin* codeset), or there is a valid character in the input buffer
 *   that cannot be represented in the *target* codeset.
 *
 *   To fix this flaw, an `iconv2()` function could simply return two different
 *   error codes.
 *
 * - When the `EILSEQ` error is returned by `iconv()`, “`*inbuf` is left
 *   pointing to the beginning of the invalid multibyte sequence”, however we
 *   don't know the length of the multibyte sequence! So in practice we assume
 *   only one invalid byte, and call `iconv()` again with `inbuf` pointing to
 *   the next byte, which may fail again, and so on.
 *
 *   An `iconv2()` function would need a more elaborate API to return this kind
 *   of information. For example by returning/filling a struct as an optional
 *   output parameter.
 *
 * Additionally, `iconv()` has different implementations (so working code on
 * Linux can behave differently on a BSD or Solaris).
 */

#define CLOSED_CONV_DESCRIPTOR ((GIConv) -1)

struct _GtkSourceIconv
{
	GIConv conv_descriptor;
};

static gboolean
is_opened (GtkSourceIconv *conv)
{
	return conv->conv_descriptor != CLOSED_CONV_DESCRIPTOR;
}

/**
 * gtk_source_iconv_new: (skip)
 *
 * Returns: (transfer full): a new #GtkSourceIconv.
 * Since: 299.6
 */
GtkSourceIconv *
gtk_source_iconv_new (void)
{
	GtkSourceIconv *conv;

	conv = g_new0 (GtkSourceIconv, 1);
	conv->conv_descriptor = CLOSED_CONV_DESCRIPTOR;

	return conv;
}

/**
 * gtk_source_iconv_open: (skip)
 * @conv: a #GtkSourceIconv.
 * @to_codeset: destination codeset.
 * @from_codeset: source codeset.
 * @error: location to a #GError, or %NULL to ignore errors.
 *
 * Similar to g_iconv_open(). The difference is that it returns a #GError
 * instead of `errno`.
 *
 * Returns: %TRUE on success, %FALSE on error.
 * Since: 299.6
 */
gboolean
gtk_source_iconv_open (GtkSourceIconv  *conv,
		       const gchar     *to_codeset,
		       const gchar     *from_codeset,
		       GError         **error)
{
	g_return_val_if_fail (conv != NULL, FALSE);
	g_return_val_if_fail (to_codeset != NULL, FALSE);
	g_return_val_if_fail (from_codeset != NULL, FALSE);
	g_return_val_if_fail (error == NULL || *error == NULL, FALSE);
	g_return_val_if_fail (!is_opened (conv), FALSE);

	conv->conv_descriptor = g_iconv_open (to_codeset, from_codeset);

	if (conv->conv_descriptor == CLOSED_CONV_DESCRIPTOR)
	{
		gint saved_errno = errno;
		errno = 0;

		if (saved_errno == EINVAL)
		{
			g_set_error (error,
				     G_CONVERT_ERROR,
				     G_CONVERT_ERROR_NO_CONVERSION,
				     _("Conversion from character set “%s” to “%s” is not supported."),
				     from_codeset,
				     to_codeset);
		}
		else
		{
			g_set_error (error,
				     G_CONVERT_ERROR,
				     G_CONVERT_ERROR_FAILED,
				     _("Failed to open a character set converter from “%s” to “%s”: %s"),
				     from_codeset,
				     to_codeset,
				     g_strerror (saved_errno));
		}

		return FALSE;
	}

	return TRUE;
}

/**
 * gtk_source_iconv_feed: (skip)
 * @conv: a #GtkSourceIconv.
 * @inbuf: (nullable): bytes to convert.
 * @inbytes_left: (nullable) (inout): bytes remaining to convert in @inbuf.
 * @outbuf: (not nullable): converted output bytes.
 * @outbytes_left: (not nullable) (inout): bytes available to fill in @outbuf.
 * @error: location to a #GError, or %NULL to ignore errors.
 *
 * Similar to g_iconv(). This function reads the `errno` value and converts it
 * either to a #GError, or to a #GtkSourceIconvResult enumeration value.
 *
 * @error is set only when %GTK_SOURCE_ICONV_RESULT_ERROR is returned.
 *
 * Returns: a #GtkSourceIconvResult enumeration value.
 * Since: 299.6
 */
GtkSourceIconvResult
gtk_source_iconv_feed (GtkSourceIconv  *conv,
		       gchar          **inbuf,
		       gsize           *inbytes_left,
		       gchar          **outbuf,
		       gsize           *outbytes_left,
		       GError         **error)
{
	gsize iconv_ret;

	g_return_val_if_fail (conv != NULL, FALSE);
	g_return_val_if_fail (outbuf != NULL, FALSE);
	g_return_val_if_fail (outbytes_left != NULL, FALSE);
	g_return_val_if_fail (error == NULL || *error == NULL, FALSE);

	iconv_ret = g_iconv (conv->conv_descriptor,
			     inbuf, inbytes_left,
			     outbuf, outbytes_left);

	if (iconv_ret == (gsize)-1)
	{
		gint saved_errno = errno;
		errno = 0;

		if (saved_errno == EILSEQ)
		{
			return GTK_SOURCE_ICONV_RESULT_ILLEGAL_SEQUENCE;
		}
		else if (saved_errno == EINVAL)
		{
			return GTK_SOURCE_ICONV_RESULT_INCOMPLETE_INPUT;
		}
		else if (saved_errno == E2BIG)
		{
			return GTK_SOURCE_ICONV_RESULT_OUTPUT_BUFFER_FULL;
		}
		else
		{
			g_set_error (error,
				     G_CONVERT_ERROR,
				     G_CONVERT_ERROR_FAILED,
				     _("Error during character set conversion: %s"),
				     g_strerror (saved_errno));

			return GTK_SOURCE_ICONV_RESULT_ERROR;
		}
	}
	else if (iconv_ret > 0)
	{
		/* Note: in the other cases above (e.g., buffer full), how to
		 * know if iconv() has performed lossy conversions before
		 * encountering the error condition? So the outbuf might contain
		 * lossy conversions without a way to know it!
		 */
		return GTK_SOURCE_ICONV_RESULT_LOSSY_CONVERSION;
	}

	return GTK_SOURCE_ICONV_RESULT_OK;
}

/**
 * gtk_source_iconv_feed_discard_output: (skip)
 * @conv: a #GtkSourceIconv.
 * @inbuf: (nullable): bytes to convert.
 * @inbytes_left: (nullable) (inout): bytes remaining to convert in @inbuf.
 * @error: location to a #GError, or %NULL to ignore errors.
 *
 * Similar to gtk_source_iconv_feed() but without the output buffer parameters.
 *
 * Returns: a #GtkSourceIconvResult enumeration value.
 *   %GTK_SOURCE_ICONV_RESULT_OUTPUT_BUFFER_FULL is never returned by this
 *   function.
 * Since: 299.6
 */
GtkSourceIconvResult
gtk_source_iconv_feed_discard_output (GtkSourceIconv  *conv,
				      gchar          **inbuf,
				      gsize           *inbytes_left,
				      GError         **error)
{
#define OUTPUT_BUFFER_SIZE (1024)
	gchar output_buffer[OUTPUT_BUFFER_SIZE];
	GtkSourceIconvResult result;

	g_return_val_if_fail (conv != NULL, FALSE);
	g_return_val_if_fail (error == NULL || *error == NULL, FALSE);

	do
	{
		gchar *output_buffer_pos = output_buffer;
		gsize output_buffer_n_bytes_left = OUTPUT_BUFFER_SIZE;

		result = gtk_source_iconv_feed (conv,
						inbuf, inbytes_left,
						&output_buffer_pos, &output_buffer_n_bytes_left,
						error);
	}
	while (result == GTK_SOURCE_ICONV_RESULT_OUTPUT_BUFFER_FULL);

	return result;
#undef OUTPUT_BUFFER_SIZE
}

/**
 * gtk_source_iconv_close: (skip)
 * @conv: a #GtkSourceIconv.
 * @error: location to a #GError, or %NULL to ignore errors.
 *
 * Similar to g_iconv_close(). The difference is that it returns a #GError
 * instead of `errno`.
 *
 * Returns: %TRUE on success, %FALSE on error.
 * Since: 299.6
 */
gboolean
gtk_source_iconv_close (GtkSourceIconv  *conv,
			GError         **error)
{
	g_return_val_if_fail (conv != NULL, FALSE);
	g_return_val_if_fail (error == NULL || *error == NULL, FALSE);

	if (is_opened (conv))
	{
		gint close_ret = g_iconv_close (conv->conv_descriptor);

		if (close_ret == -1)
		{
			gint saved_errno = errno;
			errno = 0;

			g_set_error (error,
				     G_CONVERT_ERROR,
				     G_CONVERT_ERROR_FAILED,
				     _("Failed to close the character set converter: %s"),
				     g_strerror (saved_errno));

			return FALSE;
		}

		conv->conv_descriptor = CLOSED_CONV_DESCRIPTOR;
	}

	return TRUE;
}

/**
 * gtk_source_iconv_free: (skip)
 * @conv: (nullable): a #GtkSourceIconv, or %NULL.
 *
 * Frees @conv.
 *
 * Since: 299.6
 */
void
gtk_source_iconv_free (GtkSourceIconv *conv)
{
	if (conv != NULL)
	{
		gtk_source_iconv_close (conv, NULL);
		g_free (conv);
	}
}
