<?php

if ( ! defined( 'ABSPATH' ) ) {
	exit;
}

class WP_AIW_Rewriter {
	private $llm;
	private $fetcher;

	private function get_output_length_hint( $language, $max_tokens ) {
		$language = ( $language === 'en' ) ? 'en' : 'zh';
		$max_tokens = (int) $max_tokens;
		if ( $max_tokens > 0 ) {
			return ( $language === 'en' )
				? ("\n\n[Output length constraint]\nThe output budget is approximately: {$max_tokens} tokens (hard limit; cannot be exceeded). If the content might be too long, proactively compress content_html (fewer paragraphs, shorter code samples, merge lists/tables, remove non-essential explanations), but ALWAYS return one complete, parseable, fully-closed JSON object. Never output partial JSON.")
				: ("\n\n【输出长度约束】\n本次输出最大长度上限约为：{$max_tokens}（硬上限，无法超过）。如可能超长，请主动压缩 content_html（减少段落、缩短示例代码、合并列表/表格、去掉非关键解释），但必须输出 1 个完整、可解析且闭合的 JSON；禁止输出半截 JSON。" );
		}

		return ( $language === 'en' )
			? "\n\n[Output length constraint]\nThis request does not specify max_tokens (provider/model default). Control your output length so you can reliably finish; prefer brevity over being cut off. ALWAYS return one complete, parseable, fully-closed JSON object. Never output partial JSON."
			: "\n\n【输出长度约束】\n本次未指定 max_tokens（使用模型默认）。请自行控制输出长度，宁可精简也不要因过长导致截断。必须输出 1 个完整、可解析且闭合的 JSON；禁止输出半截 JSON。";
	}

	public function __construct() {
		$this->llm = new WP_AIW_LLM_Client();
		$this->fetcher = class_exists( 'WP_AIW_Url_Fetcher' ) ? new WP_AIW_Url_Fetcher() : null;
	}

	/**
	 * Generate rewritten content/tags (and optional title) without writing to DB.
	 *
	 * @param string $title
	 * @param string $content
	 * @param array $settings
	 * @param array $runner_options
	 * @return array{ok:bool, content_html?:string, tags?:array<int,string>, title?:string, debug?:array, raw?:string, error?:string}
	 */
	public function generate_rewrite( $title, $content, $settings, $runner_options = array() ) {
		$title = is_string( $title ) ? (string) $title : '';
		$content = is_string( $content ) ? (string) $content : '';
		$settings = is_array( $settings ) ? $settings : array();

		$runner_options = is_array( $runner_options ) ? $runner_options : array();
		$language = isset( $runner_options['language'] ) ? (string) $runner_options['language'] : 'zh';
		if ( $language !== 'en' ) {
			$language = 'zh';
		}
		$rewrite_title = ! empty( $runner_options['rewrite_title'] );

		$requirements = isset( $runner_options['requirements'] ) ? trim( (string) $runner_options['requirements'] ) : '';
		$reference_text = isset( $runner_options['reference_text'] ) ? trim( (string) $runner_options['reference_text'] ) : '';
		$requirements = $this->truncate_text( $requirements, 4000 );
		$reference_text = $this->truncate_text( $reference_text, 20000 );

		$urls = array();
		if ( isset( $runner_options['urls'] ) && is_array( $runner_options['urls'] ) ) {
			foreach ( $runner_options['urls'] as $u ) {
				$u = trim( (string) $u );
				if ( $u !== '' ) {
					$urls[] = $u;
				}
			}
			$urls = array_values( array_unique( $urls ) );
		}

		$max_urls = isset( $settings['url_fetch_max_urls'] ) ? (int) $settings['url_fetch_max_urls'] : 3;
		$max_urls = max( 1, min( 10, $max_urls ) );
		if ( count( $urls ) > $max_urls ) {
			$urls = array_slice( $urls, 0, $max_urls );
		}

		$url_timeout = isset( $settings['url_fetch_timeout_seconds'] ) ? (int) $settings['url_fetch_timeout_seconds'] : 15;
		$url_max_chars = isset( $settings['url_fetch_max_chars'] ) ? (int) $settings['url_fetch_max_chars'] : 12000;

		$references = array();
		$url_errors = array();
		if ( ! empty( $urls ) && $this->fetcher ) {
			foreach ( $urls as $u ) {
				$r = $this->fetcher->fetch( $u, $url_timeout, $url_max_chars );
				if ( empty( $r['ok'] ) ) {
					$url_errors[] = $u . '：' . ( isset( $r['error'] ) ? (string) $r['error'] : '抓取失败' );
					continue;
				}
				$references[] = $r['data'];
			}
		}

		$references_block = '';
		if ( $reference_text !== '' ) {
			$references_block .= ( $language === 'en' ? "\n\n[User pasted reference]\n" : "\n\n[用户粘贴参考]\n" ) . $reference_text;
		}
		if ( ! empty( $references ) ) {
			foreach ( $references as $idx => $ref ) {
				$ref_title = isset( $ref['title'] ) ? trim( (string) $ref['title'] ) : '';
				$ref_text = isset( $ref['text'] ) ? trim( (string) $ref['text'] ) : '';
				$ref_url = isset( $ref['url'] ) ? trim( (string) $ref['url'] ) : '';
				$references_block .= ( $language === 'en'
					? ("\n\n[URL reference " . ( $idx + 1 ) . "]\nURL: " . $ref_url)
					: ("\n\n[URL参考 " . ( $idx + 1 ) . "]\nURL: " . $ref_url)
				);
				if ( $ref_title !== '' ) {
					$references_block .= "\nTitle: " . $ref_title;
				}
				$references_block .= "\nContent: " . $ref_text;
			}
		}

		$prompt_zh = isset( $settings['prompt_system_zh'] ) ? (string) $settings['prompt_system_zh'] : '';
		$prompt_en = isset( $settings['prompt_system_en'] ) ? (string) $settings['prompt_system_en'] : '';
		$system = $language === 'en' ? trim( $prompt_en ) : trim( $prompt_zh );
		if ( $system === '' ) {
			$system = $language === 'en'
				? 'You are an English technical editor. Output strict JSON with content_html/tags/title.'
				: '你是中文技术写作编辑。输出严格 JSON，包含 content_html/tags/title。';
		}
		if ( $rewrite_title ) {
			$system .= $language === 'en'
				? "\n\nExtra requirement: also output a non-empty 'title' field (string) to rewrite the post title."
				: "\n\n额外要求：请同时输出 title 字段（string），用于重写文章标题。";
		} else {
			$system .= $language === 'en'
				? "\n\nExtra requirement: do NOT rewrite the title; you may omit 'title' or leave it empty."
				: "\n\n额外要求：不要改写标题；可以忽略 title 字段或留空。";
		}

		$system .= $language === 'en'
			? "\n\nOutput rules: output ONLY one JSON object. Do NOT wrap in Markdown code fences. Do NOT add any explanation text. Use valid JSON (double quotes, no trailing commas). Do NOT put literal newlines inside JSON strings; if needed, use \\n. Must include: content_html (string), tags (array of strings). IMPORTANT: content_html is a JSON string, so any double quotes inside it MUST be escaped (\\\"). Prefer using single quotes for HTML attributes to reduce escaping issues."
			: "\n\n输出规则：只能输出 1 个 JSON 对象，不要使用 Markdown 代码块（不要输出 ```），不要输出任何解释文字。必须是严格 JSON（双引号、无尾逗号）。JSON 字符串里不要出现真实换行；如需换行请用 \\n。必须包含：content_html（string）与 tags（string 数组）。重要：content_html 是 JSON 字符串，内部出现的双引号必须用 \\\" 转义；建议 HTML 属性尽量使用单引号，减少转义错误。";

		if ( $language === 'en' ) {
			$user = "Task: rewrite and improve the post content based on the original content. Do not change factual meaning.\n";
			$user .= "Rewrite requirements:\n" . ( $requirements !== '' ? $requirements : '(none)' );
			if ( $references_block !== '' ) {
				$user .= "\n\nSupplemental references (optional, may be incomplete):\n" . $references_block;
			}
			$user .= "\n\nPost title: " . $title . "\n\nOriginal content (HTML / block editor content):\n" . $content;
		} else {
			$user = "任务：基于原始内容进行优化润色重写，不要改变事实含义。\n";
			$user .= "重写要求：\n" . ( $requirements !== '' ? $requirements : '（无）' );
			if ( $references_block !== '' ) {
				$user .= "\n\n补充参考（可选，可能不完整）：\n" . $references_block;
			}
			$user .= "\n\n文章标题：" . $title . "\n\n原始内容（HTML/块编辑器内容）：\n" . $content;
		}

		$user .= $this->get_output_length_hint(
			$language,
			isset( $settings['llm_max_tokens'] ) ? (int) $settings['llm_max_tokens'] : 0
		);

		$messages = array(
			array( 'role' => 'system', 'content' => $system ),
			array( 'role' => 'user', 'content' => $user ),
		);

		$resp = $this->llm->chat_completions(
			$settings['llm_base_url'],
			$settings['llm_api_key'],
			$settings['llm_model'],
			$messages,
			isset( $settings['llm_timeout_seconds'] ) ? (int) $settings['llm_timeout_seconds'] : 30,
			isset( $settings['llm_max_tokens'] ) ? (int) $settings['llm_max_tokens'] : 0
		);

		if ( empty( $resp['ok'] ) ) {
			return array( 'ok' => false, 'error' => isset( $resp['error'] ) ? (string) $resp['error'] : 'LLM 调用失败' );
		}

		$text = '';
		$data = $resp['data'];
		if ( isset( $data['choices'][0]['message']['content'] ) ) {
			$text = (string) $data['choices'][0]['message']['content'];
		}
		$text = trim( $text );
		if ( $text === '' ) {
			return array( 'ok' => false, 'error' => 'LLM 返回为空' );
		}

		$parsed = $this->try_decode_json_any( $text );
		if ( ! is_array( $parsed ) ) {
			$json_text = $this->extract_first_json_object( $text );
			if ( $json_text === '' ) {
				$this->maybe_log_parse_failure( $text, 'no_json_object' );
				return array( 'ok' => false, 'error' => 'LLM 返回不是严格 JSON：未找到可解析的 JSON 对象（可能是内容中双引号未转义导致 JSON 不完整；可勾选 debug 查看原始输出）', 'raw' => $text );
			}

			$parsed = $this->try_decode_json_any( $json_text );
			if ( ! is_array( $parsed ) ) {
				$this->maybe_log_parse_failure( $text, 'json_decode_failed' );
				$err = function_exists( 'json_last_error_msg' ) ? json_last_error_msg() : 'JSON parse error';
				return array( 'ok' => false, 'error' => 'LLM 返回不是严格 JSON：' . $err . '（请检查提示词或模型输出；可勾选 debug 查看原始输出）', 'raw' => $text );
			}
		}

		$content_html = isset( $parsed['content_html'] ) ? (string) $parsed['content_html'] : '';
		$content_html = trim( $content_html );
		if ( $content_html === '' ) {
			$this->maybe_log_parse_failure( $text, 'missing_content_html' );
			return array( 'ok' => false, 'error' => 'JSON 缺少 content_html', 'raw' => $text );
		}

		$tags = array();
		if ( isset( $parsed['tags'] ) && is_array( $parsed['tags'] ) ) {
			foreach ( $parsed['tags'] as $t ) {
				$t = trim( (string) $t );
				if ( $t !== '' ) {
					$tags[] = $t;
				}
			}
			$tags = array_values( array_unique( $tags ) );
		}

		$out = array(
			'ok' => true,
			'content_html' => $content_html,
			'tags' => $tags,
		);

		$debug = array(
			'urls_requested' => $urls,
			'url_errors' => $url_errors,
			'urls_fetched' => array_map( function ( $r ) { return isset( $r['url'] ) ? (string) $r['url'] : ''; }, $references ),
		);
		if ( ! empty( $debug['urls_requested'] ) || ! empty( $debug['url_errors'] ) || ! empty( $debug['urls_fetched'] ) ) {
			$out['debug'] = $debug;
		}

		if ( $rewrite_title && isset( $parsed['title'] ) ) {
			$new_title = trim( (string) $parsed['title'] );
			if ( $new_title !== '' ) {
				$out['title'] = $new_title;
			}
		}

		return $out;
	}

	/**
	 * Try decoding JSON from raw LLM text.
	 * Accepts either a JSON object, or a JSON array whose first element is an object.
	 */
	private function try_decode_json_any( $text ) {
		$text = is_string( $text ) ? trim( $text ) : '';
		if ( $text === '' ) {
			return null;
		}

		// Drop UTF-8 BOM if present.
		if ( substr( $text, 0, 3 ) === "\xEF\xBB\xBF" ) {
			$text = substr( $text, 3 );
			$text = trim( $text );
		}

		// If fenced, prefer the fenced body.
		if ( preg_match( '/```(?:json)?\s*([\s\S]*?)\s*```/i', $text, $m ) ) {
			$text = trim( (string) $m[1] );
		}

		$parsed = json_decode( $text, true );
		if ( ! is_array( $parsed ) ) {
			$sanitized = $this->sanitize_json_string_literals( $text );
			if ( $sanitized !== '' ) {
				$parsed = json_decode( $sanitized, true );
			}
		}

		if ( ! is_array( $parsed ) ) {
			return null;
		}

		// If it's a list like [ { ... } ], accept the first element.
		if ( ! isset( $parsed['content_html'] ) && isset( $parsed[0] ) && is_array( $parsed[0] ) ) {
			$first = $parsed[0];
			if ( isset( $first['content_html'] ) ) {
				return $first;
			}
		}

		return $parsed;
	}

	private function maybe_log_parse_failure( $raw, $reason ) {
		if ( ! defined( 'WP_DEBUG' ) || ! WP_DEBUG ) {
			return;
		}
		$raw = is_string( $raw ) ? $raw : '';
		$reason = is_string( $reason ) ? $reason : 'unknown';
		$snippet = $this->truncate_text( $raw, 800 );
		// phpcs:ignore WordPress.PHP.DevelopmentFunctions
		error_log( '[wp-aiw][rewrite] parse_failed=' . $reason . ' raw_snippet=' . $snippet );
	}

	public function find_next_post_id( $criteria = array() ) {
		$criteria = is_array( $criteria ) ? $criteria : array();
		$category_id = isset( $criteria['category_id'] ) ? (int) $criteria['category_id'] : 0;
		$tag_id      = isset( $criteria['tag_id'] ) ? (int) $criteria['tag_id'] : 0;
		$date_after  = isset( $criteria['date_after'] ) ? (string) $criteria['date_after'] : '';
		$date_before = isset( $criteria['date_before'] ) ? (string) $criteria['date_before'] : '';
		$author_id   = isset( $criteria['author_id'] ) ? (int) $criteria['author_id'] : 0;
		$force_rewrite = ! empty( $criteria['force_rewrite'] );
		$include_children = true;
		if ( isset( $criteria['category_include_children'] ) ) {
			$include_children = (bool) $criteria['category_include_children'];
		}
		$order_mode  = isset( $criteria['order_mode'] ) ? (string) $criteria['order_mode'] : 'oldest';
		$exclude_ids = array();
		if ( isset( $criteria['exclude_ids'] ) ) {
			$raw = $criteria['exclude_ids'];
			if ( is_string( $raw ) ) {
				$parts = preg_split( '/[^0-9]+/', $raw );
				if ( is_array( $parts ) ) {
					foreach ( $parts as $p ) {
						$id = (int) $p;
						if ( $id > 0 ) {
							$exclude_ids[] = $id;
						}
					}
				}
			} elseif ( is_array( $raw ) ) {
				foreach ( $raw as $p ) {
					$id = (int) $p;
					if ( $id > 0 ) {
						$exclude_ids[] = $id;
					}
				}
			}
			$exclude_ids = array_values( array_unique( $exclude_ids ) );
			if ( count( $exclude_ids ) > 500 ) {
				$exclude_ids = array_slice( $exclude_ids, 0, 500 );
			}
		}

		$orderby = 'ID';
		$order   = 'ASC';
		if ( $order_mode === 'modified_desc' ) {
			$orderby = 'modified';
			$order   = 'DESC';
		}

		$args = array(
			'post_type' => 'post',
			'post_status' => 'publish',
			'posts_per_page' => 1,
			'orderby' => $orderby,
			'order' => $order,
			'fields' => 'ids',
		);

		if ( ! $force_rewrite ) {
			$args['meta_query'] = array(
				'relation' => 'OR',
				array(
					'key' => 'wp_aiw_rewritten',
					'compare' => 'NOT EXISTS',
				),
				array(
					'key' => 'wp_aiw_rewritten',
					'value' => '1',
					'compare' => '!=',
				),
			);
		}

		if ( ! empty( $exclude_ids ) ) {
			$args['post__not_in'] = $exclude_ids;
		}

		if ( $author_id > 0 ) {
			$args['author'] = $author_id;
		}

		$tax_query = array();
		if ( $category_id > 0 ) {
			$tax_query[] = array(
				'taxonomy' => 'category',
				'field' => 'term_id',
				'terms' => array( $category_id ),
				'include_children' => $include_children,
			);
		}
		if ( $tag_id > 0 ) {
			$tax_query[] = array(
				'taxonomy' => 'post_tag',
				'field' => 'term_id',
				'terms' => array( $tag_id ),
			);
		}
		if ( ! empty( $tax_query ) ) {
			if ( count( $tax_query ) > 1 ) {
				$tax_query = array_merge( array( 'relation' => 'AND' ), $tax_query );
			}
			$args['tax_query'] = $tax_query;
		}

		if ( $date_after !== '' || $date_before !== '' ) {
			$date_clause = array( 'inclusive' => true );
			if ( $date_after !== '' ) {
				$date_clause['after'] = $date_after;
			}
			if ( $date_before !== '' ) {
				$date_clause['before'] = $date_before;
			}
			$args['date_query'] = array( $date_clause );
		}

		$q = new WP_Query( $args );
		if ( ! $q->have_posts() ) {
			return 0;
		}
		$ids = $q->posts;
		return ! empty( $ids[0] ) ? (int) $ids[0] : 0;
	}

	public function rewrite_post( $post_id, $settings, $runner_options = array() ) {
		$post_id = (int) $post_id;
		$post = get_post( $post_id );
		if ( ! $post || $post->post_type !== 'post' ) {
			return array( 'ok' => false, 'error' => '文章不存在或类型不支持' );
		}

		$title = (string) get_the_title( $post_id );
		$content = (string) $post->post_content;

		$runner_options = is_array( $runner_options ) ? $runner_options : array();
		$rewrite_title = ! empty( $runner_options['rewrite_title'] );

		$gen = $this->generate_rewrite( $title, $content, $settings, $runner_options );
		if ( empty( $gen['ok'] ) ) {
			return $gen;
		}
		$content_html = isset( $gen['content_html'] ) ? (string) $gen['content_html'] : '';
		$tags = isset( $gen['tags'] ) && is_array( $gen['tags'] ) ? $gen['tags'] : array();
		$new_title = isset( $gen['title'] ) ? trim( (string) $gen['title'] ) : '';

		// Update post
		$update = array(
			'ID' => $post_id,
			'post_content' => $content_html,
		);
		if ( $rewrite_title && $new_title !== '' ) {
			$update['post_title'] = $new_title;
		}
		$r = wp_update_post( $update, true );
		if ( is_wp_error( $r ) ) {
			return array( 'ok' => false, 'error' => $r->get_error_message() );
		}

		if ( ! empty( $tags ) ) {
			wp_set_post_tags( $post_id, $tags, true );
		}

		// 仅使用一个标记字段：wp_aiw_rewritten=1
		update_post_meta( $post_id, 'wp_aiw_rewritten', 1 );

		return array(
			'ok' => true,
			'post_id' => $post_id,
			'tags_added' => $tags,
			'title_rewritten' => ( $rewrite_title && $new_title !== '' ) ? 1 : 0,
		);
	}

	private function truncate_text( $text, $max_chars ) {
		$text = is_string( $text ) ? trim( $text ) : '';
		$max_chars = (int) $max_chars;
		if ( $text === '' ) {
			return '';
		}
		if ( $max_chars <= 0 ) {
			return $text;
		}
		if ( function_exists( 'mb_strlen' ) && function_exists( 'mb_substr' ) ) {
			if ( mb_strlen( $text, 'UTF-8' ) > $max_chars ) {
				return mb_substr( $text, 0, $max_chars, 'UTF-8' );
			}
			return $text;
		}
		if ( strlen( $text ) > $max_chars ) {
			return substr( $text, 0, $max_chars );
		}
		return $text;
	}

	/**
	 * Extract the first JSON object from LLM output.
	 * Supports ```json fenced blocks and surrounding text.
	 */
	private function extract_first_json_object( $text ) {
		$text = is_string( $text ) ? trim( $text ) : '';
		if ( $text === '' ) {
			return '';
		}

		// Prefer fenced content if present.
		if ( preg_match( '/```(?:json)?\s*([\s\S]*?)\s*```/i', $text, $m ) ) {
			$candidate = trim( (string) $m[1] );
			$found = $this->extract_balanced_json_object( $candidate );
			if ( $found !== '' ) {
				return $found;
			}
		}

		return $this->extract_balanced_json_object( $text );
	}

	/**
	 * Extract a balanced {...} JSON object, ignoring braces inside double-quoted strings.
	 */
	private function extract_balanced_json_object( $text ) {
		$text = is_string( $text ) ? $text : '';
		$len = strlen( $text );
		if ( $len === 0 ) {
			return '';
		}

		$start = -1;
		$depth = 0;
		$in_string = false;
		$escape = false;

		for ( $i = 0; $i < $len; $i++ ) {
			$ch = $text[ $i ];

			if ( $escape ) {
				$escape = false;
				continue;
			}
			if ( $ch === "\\" ) {
				if ( $in_string ) {
					$escape = true;
				}
				continue;
			}
			if ( $ch === '"' ) {
				$in_string = ! $in_string;
				continue;
			}
			if ( $in_string ) {
				continue;
			}

			if ( $ch === '{' ) {
				if ( $start === -1 ) {
					$start = $i;
				}
				$depth++;
				continue;
			}
			if ( $ch === '}' ) {
				if ( $depth > 0 ) {
					$depth--;
					if ( $depth === 0 && $start !== -1 ) {
						return trim( substr( $text, $start, $i - $start + 1 ) );
					}
				}
			}
		}

		return '';
	}

	/**
	 * Escape control characters (\r/\n/\t) that appear inside double-quoted JSON strings.
	 * This helps when LLM outputs "almost JSON" with literal newlines inside string values.
	 */
	private function sanitize_json_string_literals( $text ) {
		$text = is_string( $text ) ? $text : '';
		$len = strlen( $text );
		if ( $len === 0 ) {
			return '';
		}

		$out = '';
		$in_string = false;
		$escape = false;

		for ( $i = 0; $i < $len; $i++ ) {
			$ch = $text[ $i ];

			if ( $escape ) {
				$out .= $ch;
				$escape = false;
				continue;
			}

			if ( $ch === "\\" ) {
				$out .= $ch;
				if ( $in_string ) {
					$escape = true;
				}
				continue;
			}

			if ( $ch === '"' ) {
				$out .= $ch;
				$in_string = ! $in_string;
				continue;
			}

			if ( $in_string ) {
				// Replace literal control characters inside JSON strings.
				if ( $ch === "\r" ) {
					$out .= '\\r';
					continue;
				}
				if ( $ch === "\n" ) {
					$out .= '\\n';
					continue;
				}
				if ( $ch === "\t" ) {
					$out .= '\\t';
					continue;
				}
			}

			$out .= $ch;
		}

		return $out;
	}
}
