//----------------------------------------------------------------------------------------
//Copyright (C) 2012 Macnica Inc. All Rights Reserved.
//
//Use in source and binary forms, with or without modification, are permitted provided
//by agreeing to the following terms and conditions:
//
//REDISTRIBUTIONS OR SUBLICENSING IN SOURCE AND BINARY FORM ARE NOT ALLOWED.
//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS"
//AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
//IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
//DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE
//FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
//DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
//SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
//OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
//OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//AND ALSO REGARDING THE REFERENCE SOFTWARE, REDISTRIBUTION OR SUBLICENSING
//IN SOURCE AND BINARY FORM ARE NOT ALLOWED.
//----------------------------------------------------------------------------------------
// DESCRIPTION
//		horizontal direction scaling circuit
//----------------------------------------------------------------------------------------
// REVISION HISTORY
//		v1.0 Mar. 13 2012	: Initial Version Release
//----------------------------------------------------------------------------------------
// PARAMETERS
//		Q_BIT 			: quantized bit width per color plane
//		PLANE			: color plane count
//		SIZE_BIT		: size setting signal bit width
//		RATE_FRAC_BIT	: fractional part bit width for coordinate calculation
//		RAM_COUNT		: ram count for line buffer
//
//		RATE_BIT		: scaling rate calculation bit width ( SIZE_BIT + RATE_FRAC_BIT )
//		REF_BIT			: reference pixel pointer ( multiplication curcuit result ) bit width ( SIZE_BIT + RATE_BIT )
//		DLT_FRAC_BIT	: depending on filter table resolution ( fixed value )
//
//		PIXEL_BIT		: pixel bit width ( Q_BIT * PLANE )
//		RAM_ADR_BIT		: ram address bus width ( SIZE_BIT )
//		RAM_DATA_BIT	: ram data bus width ( PIXEL_BIT )
//
// ------ ring state machine ------
//		PIXEL_INIT		: initial state
//		PIXEL_LSTART	: standby for source input
//		PIXEL_SRC_EN	: source counter side pipeline -> enable  / terget counter side pipeline -> diseble
//		PIXEL_TGT_EN	: source counter side pipeline -> diseble / terget counter side pipeline -> enable
//		PIXEL_BOTH_EN	: source counter side pipeline -> enable  / terget counter side pipeline -> enable
//		PIXEL_BOTH_DIS	: source counter side pipeline -> diseble / terget counter side pipeline -> diseble
//		PIXEL_SRC_LEND	: termination of source counter side process and standby for terget counter side process
//		PIXEL_TGT_LEND	: termination of terget counter side process and standby for source counter side process
//		PIXEL_BOTH_LEND	: termination of source & terget counter side process
//
// ------ frame state machine ------
//		FRAME_INIT		: initial state
//		FRAME_CALC		: scaling rate calculation state
//		FRAME_OPE		: scaling operation state
//		FRAME_WAIT		: waiting for the end processing
//
//----------------------------------------------------------------------------------------
// I/O PORTS
//		clk				: clock for all circuit
//		rst_n			: asynchronous reset ( low active )
//		srst			: synchronous reset
//		enable			: clock enable
//
//		smpl_mode_i		: chroma sampling mode 0:422 1:444
//		scan_mode_i		: output scan mode 0:interlace 1:progressive
//
//		src_wdt_i		: horizontal size of input frame data
//		src_hgt_i		: vertical size of input frame data
//		tgt_wdt_i		: horizontal size of output frame data
//		h_scl_rate_i	: horizontal scaling rate
//		init_end_i		: initial calculation end pulse at power on sequence
//		rd_line_pos_i	: read line position
//
//		valid_fx_dltx_o	: data valid to horizontal scaling core block
//		fxm1_o			: horizontal x-1 pixel data to horizontal scaling core block
//		fx0_o			: horizontal x	 pixel data to horizontal scaling core block
//		fxp1_o			: horizontal x+1 pixel data to horizontal scaling core block
//		fxp2_o			: horizontal x+2 pixel data to horizontal scaling core block
//		deltax_o		: fractional value of horizontal reference coordinate
//
//		valid_fxd_i		: data valid from horizontal scaling core block
//		fxd_i			: interpolated pixel data from horizontal scaling core block
//
//		ram_wadr_o		: ram ( line buffer ) write address
//		ram_wen_o		: ram ( line buffer ) write enable
//		ram_wdata_o		: ram ( line buffer ) write data
//
//		wr_line_pos_o	: write line position
//
//		frame_start_i	: frame start of input frame data
//		frame_end_i		: frame end of input frame data
//		valid_i			: data valid of input frame data
//		pixel_i			: pixel data of input frame data
//		field_i			: field status of input frame data 1:even field / 0:odd field
//		ready_o			: data reception ready of input frame data
//
//		field_o			: gated field status 1:even field / 0:odd field
//						  when scan_mode_i = 0, field_o toggles per feild
//						  when scan_mode_i = 1, field_o keeps same polar of field_i
//		pix_error_o		: pixel count error
//
//----------------------------------------------------------------------------------------
`timescale 1ps/1ps
`default_nettype none

module	scl16_din_ctl (
	clk				,
	rst_n			,
	srst			,
	enable			,

	smpl_mode_i		,
	scan_mode_i		,

	src_wdt_i		,
	src_hgt_i		,
	tgt_wdt_i		,
	h_scl_rate_i	,
	init_end_i		,

	rd_line_pos_i	,
	tgt_end_i		,

	valid_fx_dltx_o	,
	fxm1_o			,
	fx0_o			,
	fxp1_o			,
	fxp2_o			,
	deltax_o		,

	valid_fxd_i		,
	fxd_i			,

	ram_wadr_o		,
	ram_wen_o		,
	ram_wdata_o		,

	wr_line_pos_o	,
	field_o			,

	frame_start_i	,
	frame_end_i		,
	valid_i			,
	pixel_i			,
	field_i			,
	ready_o			,

	pix_error_o		,
	fstart_asrt_o
) ;

// =============================================================================
// DEFINE INCLUDE
// =============================================================================

// =============================================================================
// PARAMETER DEFINITION
// =============================================================================
	function integer	log2 ;
		input integer	value ;
		begin
			value	= value - 1 ;
			for (log2 = 0 ; value > 0 ; log2 = log2 + 1 ) begin
				value	= value >> 1 ;
			end
		end
	endfunction

	// ---------------------------------------------------------------------
	// Below parameters have to be defined from upper module
	// ---------------------------------------------------------------------
	parameter PPC					= 4				; // add by sugino
	parameter Q_BIT					= 8				;
	parameter PLANE					= 3				;
	parameter SIZE_BIT				= 12			;
	parameter RATE_FRAC_BIT			= SIZE_BIT+ 2	;
	parameter RAM_COUNT				= 5				;

	// ---------------------------------------------------------------------
	// Please do not change the following parameters
	// ---------------------------------------------------------------------
	parameter RATE_BIT				= SIZE_BIT + RATE_FRAC_BIT	;
	parameter REF_BIT				= SIZE_BIT + RATE_BIT		;
	parameter DLT_FRAC_BIT			= 5							;

	parameter PIXEL_BIT				= Q_BIT * PLANE				;
	parameter PXL_BIT_PER_CLK		= PIXEL_BIT * PPC			; // add by sugino
	parameter DLT_BIT_PER_CLK		= DLT_FRAC_BIT * PPC		; // add by sugino
	parameter SFT_DATA_NUM			= ( PPC == 1 ) ? 8 : 16		;
	parameter FX_SEL_BIT			= log2(SFT_DATA_NUM)		;

	parameter RAM_ADR_BIT			= SIZE_BIT - log2(PPC - 1)	; // add by sugino
//	parameter RAM_ADR_BIT			= SIZE_BIT					;
	parameter RAM_DATA_BIT			= PXL_BIT_PER_CLK			; // add by sugino
//	parameter RAM_DATA_BIT			= PIXEL_BIT					;

	parameter PIXEL_INIT			= 4'h0						;
	parameter PIXEL_LSTART			= 4'h1						;
	parameter PIXEL_EN				= 4'h2						;
//	parameter PIXEL_SRC_EN			= 4'h2						;
//	parameter PIXEL_TGT_EN			= 4'h3						;
//	parameter PIXEL_BOTH_EN			= 4'h4						;
//	parameter PIXEL_BOTH_DIS		= 4'h5						;
	parameter PIXEL_SRC_LEND		= 4'h6						;
	parameter PIXEL_TGT_LEND		= 4'h7						;
	parameter PIXEL_BOTH_LEND		= 4'h8						;

	parameter FRAME_INIT			= 2'h0						;
	parameter FRAME_CALC			= 2'h1						;
	parameter FRAME_OPE				= 2'h2						;
	parameter FRAME_WAIT			= 2'h3						;


// =============================================================================
// PORT DECLARATION
// =============================================================================
	input	wire								clk				;
	input	wire								rst_n			;
	input	wire								srst			;
	input	wire								enable			;

	input	wire								smpl_mode_i		;
	input	wire								scan_mode_i		;

	input	wire	[ SIZE_BIT-1 : 0 ]			src_wdt_i		;
	input	wire	[ SIZE_BIT-1 : 0 ]			src_hgt_i		;
	input	wire	[ SIZE_BIT-1 : 0 ]			tgt_wdt_i		;
	input	wire	[ RATE_BIT-1 : 0 ]			h_scl_rate_i	;
	input	wire								init_end_i		;

	input	wire	[ SIZE_BIT-1 : 0 ]			rd_line_pos_i	;
	input	wire								tgt_end_i		;

	output	wire								valid_fx_dltx_o	;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	fxm1_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	fx0_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	fxp1_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	fxp2_o			;
	output	wire	[ DLT_BIT_PER_CLK-1 : 0 ]	deltax_o		;

	input	wire								valid_fxd_i		;
	input	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	fxd_i			;

	output	wire	[ RAM_ADR_BIT-1 : 0 ]		ram_wadr_o		;
	output	wire	[ RAM_COUNT-1 : 0 ]			ram_wen_o		;
	output	wire	[ RAM_DATA_BIT-1 : 0 ]		ram_wdata_o		;

	output	wire	[ SIZE_BIT-1 : 0 ]			wr_line_pos_o	;
	output	wire								field_o			;

	input	wire								frame_start_i	;
	input	wire								frame_end_i		;
	input	wire								valid_i			;
	input	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	pixel_i			;
	input	wire								field_i			;
	output	wire								ready_o			;

	output	wire								pix_error_o		;
	output	wire								fstart_asrt_o	;

// =============================================================================
// REG / WIRE DECLARATION
// =============================================================================
	genvar								i				;
	genvar								j				;

	reg		[ RATE_BIT-1 : 0 ]			hrate_lat_ff	;

	reg									fstart_pre_ff	;
	reg									fend_pre_ff		;
	reg									valid_pre_ff	;
	reg		[ PXL_BIT_PER_CLK-1 : 0 ]	pixel_pre_ff	;
	reg									field_pre_ff	;

	wire								fstart_in		;
	wire								fend_in			;
	wire								valid_in		;
	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	pixel_in		;
	wire								field_in		;

	wire								fstart_lat_in	;
	wire								fend_trg_in		;

	reg		[ 1 : 0 ]					vsi_state_ff	;
	reg									ready_out_ff	;
	reg									fstart_asrt_ff	;
	reg									fstart_hold_ff	;
	reg									calc_term_ff	;

	reg									valid_ipkt_ff	;
	reg									proc_wait_ff	;

	wire								dtct_trigger	;
	wire								pixel_error		;
	reg									pix_error_ff	;

	reg									fstart_in1_ff	;
	reg									valid_in1_ff	;
	reg		[ PXL_BIT_PER_CLK-1 : 0 ]	pixel_in1_ff	;

	reg									field_lat1_ff	;
	reg									fstart_asrt2_ff	;

	reg									sel_field_ff	;

	wire								swdt_cnt_en		;
	wire								swdt_cnt_clr	;
	reg		[ SIZE_BIT-1 : 0 ]			swdt_cnt_ff		;

	wire								shgt_cnt_en		;
	wire								shgt_cnt_clr	;
	reg		[ SIZE_BIT-1 : 0 ]			shgt_cnt_ff		;

	reg		[ SIZE_BIT-1 : 0 ]			src_wdt3_ff		;
	reg		[ SIZE_BIT-1 : 0 ]			src_wdt4_ff		;

	reg									valid_in2_ff	;
	reg		[ PXL_BIT_PER_CLK-1 : 0 ]	pixel_in2_ff	;
	reg		[ PIXEL_BIT-1 : 0 ]			pixel_sft_ff	[ 0 : SFT_DATA_NUM - 1 ]	;

	wire								twdt_cnt_en		;
	wire								twdt_cnt_clr	;
	reg		[ SIZE_BIT-1 : 0 ]			twdt_cnt_ff		;
	wire	[ RATE_BIT-1 : 0 ] 			h_scl_rate		;

	wire	[ REF_BIT-1 : 0 ]			href_position	[ 0 : PPC - 1 ]	;
	wire	[ SIZE_BIT-1 : 0 ]			href_pos		[ 0 : PPC - 1 ]	;
	wire	[ DLT_FRAC_BIT-1 : 0 ]		href_dlt		[ 0 : PPC - 1 ]	;

	reg		[ SIZE_BIT-1 : 0 ]			tgt_wdt1_ff		;
	reg		[ SIZE_BIT-1 : 0 ]			tgt_wdt2_ff		;
	reg		[ SIZE_BIT-1 : 0 ]			tgt_wdt3_ff		;
	reg		[ SIZE_BIT-1 : 0 ]			tgt_wdt4_ff		;
	reg									tgt_wdt5_ff		;
	reg		[ SIZE_BIT-1 : 0 ]			href_pos3_ff	[ 0 : PPC - 1 ]	;
	reg		[ SIZE_BIT-1 : 0 ]			href_pos4_ff	[ 0 : PPC - 1 ]	;
	reg									href_pos5_ff	[ 0 : PPC - 1 ]	;
	reg		[ DLT_FRAC_BIT-1 : 0 ]		href_dlt3_ff	[ 0 : PPC - 1 ]	;
	reg		[ DLT_FRAC_BIT-1 : 0 ]		href_dlt4_ff	[ 0 : PPC - 1 ]	;

	wire	[ SIZE_BIT : 0 ]			delta_pix_p1	[ 0 : PPC - 1 ]	;
	wire	[ SIZE_BIT : 0 ]			delta_pix_p2	[ 0 : PPC - 1 ]	;
	wire	[ SIZE_BIT : 0 ]			delta_pix_p3	[ 0 : PPC - 1 ]	;
	wire	[ SIZE_BIT : 0 ]			delta_pix_p4	[ 0 : PPC - 1 ]	;
//	wire	[ PPC - 1 : 0 ]				pipe_tgt_en1	;
//	wire	[ PPC - 1 : 0 ]				pipe_tgt_en2	;
//	wire	[ PPC - 1 : 0 ]				pipe_tgt_en3	;
//	wire	[ PPC - 1 : 0 ]				pipe_tgt_en4	;
//	wire	[ PPC - 1 : 0 ]				pipe_src_en1	;
//	wire	[ PPC - 1 : 0 ]				pipe_src_en2	;
//	wire	[ PPC - 1 : 0 ]				pipe_src_en3	;

	reg		[ SIZE_BIT : 0 ] 			delta_pix_p		[ 0 : PPC - 1 ]	;
//	reg		[ PPC - 1 : 0 ]				pipe_tgt_en		;
//	reg		[ PPC - 1 : 0 ]				pipe_src_en		;

	reg		[ 3 : 0 ]					ring_state_ff	;
	reg									valid_umsk_ff	;
	reg									src_en_ff		;
	reg									tgt_en_ff		;

	reg		[ SIZE_BIT-1 : 0 ]			wr_line_pos_ff	;
	reg		[ FX_SEL_BIT-1 : 0 ]		fx_sel_ff		[ 0 : PPC - 1 ]	;

	wire	[ PIXEL_BIT-1 : 0 ]			fxm2_sel		[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fxm1_sel		[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fx0_sel			[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fxp1_sel		[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fxp2_sel		[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fxp3_sel		[ 0 : PPC - 1 ]	;

	reg		[ PIXEL_BIT-1 : 0 ]			fxm2_sel_ff		[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxm1_sel_ff		[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fx0_sel_ff		[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxp1_sel_ff		[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxp2_sel_ff		[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxp3_sel_ff		[ 0 : PPC - 1 ]	;

	wire	[ PIXEL_BIT-1 : 0 ]			fxm1			[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fx0				[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fxp1			[ 0 : PPC - 1 ]	;
	wire	[ PIXEL_BIT-1 : 0 ]			fxp2			[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxm1_ff			[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fx0_ff			[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxp1_ff			[ 0 : PPC - 1 ]	;
	reg		[ PIXEL_BIT-1 : 0 ]			fxp2_ff			[ 0 : PPC - 1 ]	;

	reg		[ DLT_FRAC_BIT-1 : 0 ]		dltx_dly1_ff	[ 0 : PPC - 1 ]	;
	reg		[ DLT_FRAC_BIT-1 : 0 ]		dltx_dly2_ff	[ 0 : PPC - 1 ]	;

	reg									vdltx_dly1_ff	;
	reg									vdltx_dly2_ff	;

	reg		[ RAM_ADR_BIT-1 : 0 ]		adr_cnt_ff		;
	wire								adr_clr			;
	reg 	[ RAM_COUNT-1 : 0 ]			sel_cnt_ff		;

	reg									vfxd_dly_ff		;
	reg		[ PXL_BIT_PER_CLK-1 : 0 ]	fxd_dly_ff		;
	reg		[ RAM_COUNT-1 : 0 ]			ram_wen_ff		;
	reg		[ RAM_ADR_BIT-1 : 0 ]		ram_wadr_ff		;
	reg		[ RAM_DATA_BIT-1 : 0 ]		ram_wdata_ff	;

	wire	[ SIZE_BIT : 0 ]			delta_line_p	;
	wire								write_grant		;
	reg									en_ff			;

// =============================================================================
// FUNCTION DESCRIPTION
// =============================================================================

	//-----------------------------
	// output assign
	//-----------------------------
	assign ready_o			= ready_out_ff ;
	assign fstart_asrt_o	= fstart_asrt_ff ;

	assign wr_line_pos_o	= wr_line_pos_ff ;

	generate
		for ( i = 0 ; i < PPC ; i = i + 1 ) begin	: fx_out_gen
			assign fxm1_o[ PIXEL_BIT * ( i + 1 ) -1 : PIXEL_BIT * i ]			= fxm1_ff[ i ] ;
			assign fx0_o[ PIXEL_BIT * ( i + 1 ) -1 : PIXEL_BIT * i ]			= fx0_ff[ i ] ;
			assign fxp1_o[ PIXEL_BIT * ( i + 1 ) -1 : PIXEL_BIT * i ]			= fxp1_ff[ i ] ;
			assign fxp2_o[ PIXEL_BIT * ( i + 1 ) -1 : PIXEL_BIT * i ]			= fxp2_ff[ i ] ;
			assign deltax_o[ DLT_FRAC_BIT * ( i + 1 ) -1 : DLT_FRAC_BIT * i ]	= dltx_dly2_ff[ i ] ;
		end
	endgenerate

	assign valid_fx_dltx_o	= vdltx_dly2_ff ;

	assign ram_wen_o		= ram_wen_ff ;
	assign ram_wadr_o		= ram_wadr_ff ;
	assign ram_wdata_o		= ram_wdata_ff ;

	//-----------------------------
	// operating condition setting
	//-----------------------------
	// latching picture size setting signal
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			hrate_lat_ff	<= {RATE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			hrate_lat_ff	<= h_scl_rate_i ;
		end
		else if ( init_end_i ) begin
			hrate_lat_ff	<= h_scl_rate_i ;
		end
	end

	//-----------------------
	// source side data flow
	//-----------------------
	// control signal buffering register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			fstart_pre_ff	<= 1'b0 ;
			fend_pre_ff		<= 1'b0 ;
			valid_pre_ff	<= 1'b0 ;
			pixel_pre_ff	<= {PIXEL_BIT{1'b0}} ;
			field_pre_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			fstart_pre_ff	<= 1'b0 ;
			fend_pre_ff		<= 1'b0 ;
			valid_pre_ff	<= 1'b0 ;
			pixel_pre_ff	<= {PIXEL_BIT{1'b0}} ;
			field_pre_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			if ( ready_out_ff ) begin
				fstart_pre_ff	<= frame_start_i ;
				fend_pre_ff		<= frame_end_i ;
				valid_pre_ff	<= valid_i ;
				pixel_pre_ff	<= pixel_i ;
				field_pre_ff	<= field_i ;
			end
		end
	end

	// selecting buffered signal
	assign fstart_in	= ( ready_out_ff ) ? frame_start_i : fstart_pre_ff ;
	assign fend_in		= ( ready_out_ff ) ? frame_end_i : fend_pre_ff ;
	assign valid_in		= ( ready_out_ff ) ? valid_i : valid_pre_ff ;
	assign pixel_in		= ( ready_out_ff ) ? pixel_i : pixel_pre_ff ;
	assign field_in		= ( ready_out_ff ) ? field_i : field_pre_ff ;

	assign fstart_lat_in	= fstart_in & valid_in & ( pixel_in[3:0] == 4'h0 ) ;
	assign fend_trg_in		= fend_in & valid_in & ~fstart_lat_in ;

	// VSI statemachine
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			vsi_state_ff	<=	FRAME_INIT ;
			ready_out_ff	<=	1'b0 ;
			fstart_asrt_ff	<=	1'b0 ;
			fstart_hold_ff	<=	1'b0 ;
			calc_term_ff	<=	1'b0 ;
			valid_ipkt_ff	<=	1'b0 ;
		end
		else if ( srst ) begin
			vsi_state_ff	<=	FRAME_INIT ;
			ready_out_ff	<=	1'b0 ;
			fstart_asrt_ff	<=	1'b0 ;
			fstart_hold_ff	<=	1'b0 ;
			calc_term_ff	<=	1'b0 ;
			valid_ipkt_ff	<=	1'b0 ;
		end
		else if ( enable ) begin
			case ( vsi_state_ff )
				FRAME_INIT : begin
					if ( fstart_lat_in ) begin
						vsi_state_ff	<=	FRAME_CALC ;
						ready_out_ff	<=	1'b0 ;
						fstart_asrt_ff	<=	1'b1 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b1 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
					else begin
						vsi_state_ff	<=	FRAME_INIT ;
						ready_out_ff	<=	1'b1 ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b0 ;
						valid_ipkt_ff	<=	1'b0 ;
					end
				end
				FRAME_CALC : begin
					if ( init_end_i ) begin
						vsi_state_ff	<=	FRAME_OPE ;
						ready_out_ff	<=	src_en_ff ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b0 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
					else begin
						vsi_state_ff	<=	FRAME_CALC ;
						ready_out_ff	<=	1'b0 ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b1 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
				end
				FRAME_OPE : begin
					if ( fstart_lat_in && tgt_end_i ) begin
						vsi_state_ff	<=	FRAME_CALC ;
						ready_out_ff	<=	1'b0 ;
						fstart_asrt_ff	<=	1'b1 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b1 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
					else if ( tgt_end_i ) begin
						vsi_state_ff	<=	FRAME_INIT ;
						ready_out_ff	<=	1'b1 ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b0 ;
						valid_ipkt_ff	<=	1'b0 ;
					end
					else if ( fstart_lat_in && ready_out_ff ) begin
						vsi_state_ff	<=	FRAME_WAIT ;
						ready_out_ff	<=	1'b0 ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b1 ;
						calc_term_ff	<=	1'b0 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
					else begin
						vsi_state_ff	<=	FRAME_OPE ;
						ready_out_ff	<=	src_en_ff ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b0 ;
						if ( fend_trg_in & src_en_ff ) begin
							valid_ipkt_ff	<= 1'b0 ;
						end
						else begin
							valid_ipkt_ff	<= valid_ipkt_ff ;
						end
					end
				end
				FRAME_WAIT : begin
					if ( tgt_end_i ) begin
						vsi_state_ff	<=	FRAME_CALC ;
						ready_out_ff	<=	1'b0 ;
						fstart_asrt_ff	<=	1'b1 ;
						fstart_hold_ff	<=	1'b0 ;
						calc_term_ff	<=	1'b1 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
					else begin
						vsi_state_ff	<=	FRAME_WAIT ;
						ready_out_ff	<=	1'b0 ;
						fstart_asrt_ff	<=	1'b0 ;
						fstart_hold_ff	<=	1'b1 ;
						calc_term_ff	<=	1'b0 ;
						valid_ipkt_ff	<=	1'b1 ;
					end
				end
				default : begin
					vsi_state_ff	<=	FRAME_INIT ;
					ready_out_ff	<=	1'bx ;
					fstart_asrt_ff	<=	1'bx ;
					fstart_hold_ff	<=	1'bx ;
					calc_term_ff	<=	1'bx ;
					valid_ipkt_ff	<=	1'bx ;
				end
			endcase
		end
	end

	// scaling transaction wait register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			proc_wait_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			proc_wait_ff	<= 1'b0 ;
		end
		else if ( tgt_end_i || pix_error_ff ) begin
			proc_wait_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			if ( src_en_ff ) begin
				if ( fend_trg_in && valid_ipkt_ff ) begin
					proc_wait_ff	<= 1'b1 ;
				end
			end
		end
	end

	assign dtct_trigger		= ( proc_wait_ff & ~valid_in1_ff )
							| ( valid_in1_ff & fstart_hold_ff & ~( swdt_cnt_ff == src_wdt_i ) ) ;

	assign pixel_error		= ( dtct_trigger & ( ( swdt_cnt_ff != {SIZE_BIT{1'b0}} ) | ( shgt_cnt_ff <= src_hgt_i ) ) ) ;

	// pixel count error register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			pix_error_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			pix_error_ff	<= 1'b0 ;
		end
		else if ( tgt_end_i ) begin
			pix_error_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			if ( src_en_ff ) begin
				if ( pixel_error ) begin
					pix_error_ff	<= 1'b1 ;
				end
			end
		end
	end

	assign pix_error_o	= pix_error_ff ;

	// control/valid/data delay register ( source side pipeline 1st - 2nd stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			fstart_in1_ff	<= 1'b0 ;
			valid_in1_ff	<= 1'b0 ;
			pixel_in1_ff	<= {PIXEL_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			fstart_in1_ff	<= 1'b0 ;
			valid_in1_ff	<= 1'b0 ;
			pixel_in1_ff	<= {PIXEL_BIT{1'b0}} ;
		end
		else if ( enable ) begin
			if ( src_en_ff ) begin
				fstart_in1_ff	<= fstart_lat_in ;
				valid_in1_ff	<= valid_in & valid_ipkt_ff ;
				pixel_in1_ff	<= pixel_in ;
			end
		end
	end

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			field_lat1_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			field_lat1_ff	<= 1'b0 ;
		end
		else if ( ready_out_ff && fstart_lat_in ) begin
			field_lat1_ff	<= field_in ;
		end
	end


// field signal setting

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			fstart_asrt2_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			fstart_asrt2_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			fstart_asrt2_ff	<= fstart_asrt_ff ;
		end
	end

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			sel_field_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			sel_field_ff	<= 1'b0 ;
		end
		else if ( fstart_asrt2_ff ) begin
			if ( scan_mode_i ) begin
				sel_field_ff	<= ~field_lat1_ff ;
			end
			else begin
				sel_field_ff	<= ~sel_field_ff ;
			end
		end
	end

	assign field_o	= ~sel_field_ff ;

	// source width counter ( source side pipeline 2nd stage )

	assign swdt_cnt_en		= ( shgt_cnt_ff <= src_hgt_i ) & valid_in1_ff & src_en_ff ;
	assign swdt_cnt_clr		= ( ( ( swdt_cnt_ff + PPC - 1 ) == src_wdt_i ) & valid_in1_ff & src_en_ff ) | ( fstart_in1_ff & src_en_ff ) ;
//	assign swdt_cnt_clr		= ( ( swdt_cnt_ff == src_wdt_i ) & valid_in1_ff & src_en_ff ) | ( fstart_in1_ff & src_en_ff ) ;

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			swdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			swdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( enable ) begin
			if ( swdt_cnt_clr ) begin
				swdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
			end
			else if ( swdt_cnt_en ) begin
				swdt_cnt_ff		<= swdt_cnt_ff + PPC ;
			end
		end
	end

	// source height counter ( source side pipeline 2nd stage )

	assign shgt_cnt_en		= ( ( swdt_cnt_ff + PPC - 1 ) == src_wdt_i ) & ( shgt_cnt_ff <= src_hgt_i ) & valid_in1_ff & src_en_ff ;
//	assign shgt_cnt_en		= ( swdt_cnt_ff == src_wdt_i ) & ( shgt_cnt_ff <= src_hgt_i ) & valid_in1_ff & src_en_ff ;
	assign shgt_cnt_clr		= fstart_asrt_ff ;

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			shgt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			shgt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( enable ) begin
			if ( shgt_cnt_clr ) begin
				shgt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
			end
			else if ( shgt_cnt_en ) begin
				shgt_cnt_ff		<= shgt_cnt_ff + 1'b1 ;
			end
		end
	end

	// source side signal buffering register ( source side pipeline 3rd - 9th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			valid_in2_ff	<= 1'b0 				;		// pipeline 2th stage
			pixel_in2_ff	<= {PIXEL_BIT{1'b0}}	;		// pipeline 2rd stage
		end
		else if ( srst ) begin
			valid_in2_ff	<= 1'b0 				;
			pixel_in2_ff	<= {PIXEL_BIT{1'b0}}	;
		end
		else if ( tgt_end_i ) begin
			valid_in2_ff	<= 1'b0 				;
			pixel_in2_ff	<= {PIXEL_BIT{1'b0}}	;
		end
		else if ( enable ) begin
			if ( src_en_ff ) begin
				valid_in2_ff	<= ( shgt_cnt_ff <= src_hgt_i ) & valid_in1_ff ;
				if ( valid_in1_ff ) begin
					pixel_in2_ff	<= pixel_in1_ff ;
				end
			end
		end
	end

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			src_wdt3_ff 	<= {SIZE_BIT{1'b0}}		;		// pipeline 2nd stage
			src_wdt4_ff 	<= {SIZE_BIT{1'b0}}		;		// pipeline 3rd stage
		end
		else if ( srst ) begin
			src_wdt3_ff 	<= {SIZE_BIT{1'b0}}		;
			src_wdt4_ff 	<= {SIZE_BIT{1'b0}}		;
		end
		else if ( tgt_end_i ) begin
			src_wdt3_ff 	<= {SIZE_BIT{1'b0}}		;
			src_wdt4_ff 	<= {SIZE_BIT{1'b0}}		;
		end
		else if ( enable ) begin
			if ( src_en_ff ) begin
				if ( valid_in1_ff ) begin
					src_wdt3_ff		<= swdt_cnt_ff ;
				end
				if ( valid_in2_ff ) begin
					src_wdt4_ff		<= src_wdt3_ff ;
				end
			end
		end
	end

	generate
		for ( i = 0 ; i < PPC ; i = i + 1 ) begin	: shift_data_gen1

			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					pixel_sft_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( srst ) begin
					pixel_sft_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( tgt_end_i ) begin
					pixel_sft_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( enable ) begin
					if ( src_en_ff ) begin
						if ( valid_in2_ff ) begin
							pixel_sft_ff[ i ]	<= pixel_in2_ff[ PIXEL_BIT * ( PPC - i ) - 1 : PIXEL_BIT * ( PPC - 1 - i ) ] ;
						end
					end
				end
			end

		end

		for ( i = PPC ; i < SFT_DATA_NUM ; i = i + PPC ) begin	: shift_data_gen2
			for ( j = 0 ; j < PPC ; j = j + 1 ) begin	: shift_data_gen3

				always @( posedge clk or negedge rst_n ) begin
					if ( !rst_n ) begin
						pixel_sft_ff[ i + j ]	<= {PIXEL_BIT{1'b0}} ;
					end
					else if ( srst ) begin
						pixel_sft_ff[ i + j ]	<= {PIXEL_BIT{1'b0}} ;
					end
					else if ( tgt_end_i ) begin
						pixel_sft_ff[ i + j ]	<= {PIXEL_BIT{1'b0}} ;
					end
					else if ( enable ) begin
						if ( src_en_ff ) begin
							if ( valid_in2_ff ) begin
								pixel_sft_ff[ i + j ]	<= pixel_sft_ff[ i + j - PPC ] ;
							end
						end
					end
				end

			end
		end
	endgenerate

	//-----------------------
	// target side data flow
	//-----------------------
	// target width counter ( target side pipeline 1st stage )
	assign twdt_cnt_en		= tgt_en_ff ;
//	assign twdt_cnt_en		= ( twdt_cnt_ff != tgt_wdt_i ) & tgt_en_ff & ~calc_term_ff ;
	assign twdt_cnt_clr		= ( ( ( twdt_cnt_ff + PPC - 1 ) == tgt_wdt_i ) & tgt_en_ff & ~calc_term_ff ) | fstart_asrt_ff ;
//	assign twdt_cnt_clr		= ( ( twdt_cnt_ff == tgt_wdt_i ) & tgt_en_ff & ~calc_term_ff ) | fstart_asrt_ff ;

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			twdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			twdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( enable ) begin
			if ( twdt_cnt_clr ) begin
				twdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
			end
			else if ( twdt_cnt_en ) begin
				twdt_cnt_ff		<= twdt_cnt_ff + PPC ;
//				twdt_cnt_ff		<= twdt_cnt_ff + 1'b1 ;
			end
		end
	end

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			tgt_wdt1_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt2_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt3_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt4_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt5_ff		<= 1'b0 ;
		end
		else if ( srst ) begin
			tgt_wdt1_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt2_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt3_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt4_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt5_ff		<= 1'b0 ;
		end
		else if ( tgt_end_i || fstart_asrt_ff ) begin
			tgt_wdt1_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt2_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt3_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt4_ff		<= {SIZE_BIT{1'b0}} ;
			tgt_wdt5_ff		<= 1'b0 ;
		end
		else if ( enable ) begin
			if ( tgt_en_ff ) begin
				tgt_wdt1_ff		<= twdt_cnt_ff ;
				tgt_wdt2_ff		<= tgt_wdt1_ff ;
				tgt_wdt3_ff		<= tgt_wdt2_ff ;
				tgt_wdt4_ff		<= tgt_wdt3_ff ;
				tgt_wdt5_ff		<= tgt_wdt4_ff[0] ;
			end
		end
	end
	// reference position calculator ( target side pipeline 2nd - 3rd stage )
	assign h_scl_rate	= hrate_lat_ff ;

	generate
		for ( i = 0 ; i < PPC ; i = i + 1 ) begin	: href_position_gen

			scl16_unsigned_mult
				#(
					.APORT_BIT	( SIZE_BIT				) ,
					.BPORT_BIT	( RATE_BIT				)
				)
				u_mult_for_din_ctl (
					.clk		( clk					) ,
					.rst_n		( rst_n					) ,
					.enable		( tgt_en_ff				) ,

					.dataa_i	( twdt_cnt_ff + i		) ,
					.datab_i	( h_scl_rate			) ,

					.result_o	( href_position[ i ]	)
				);

			assign href_pos[ i ]	= href_position[ i ][ RATE_FRAC_BIT+SIZE_BIT-1 : RATE_FRAC_BIT ] ;
			assign href_dlt[ i ]	= href_position[ i ][ RATE_FRAC_BIT-1 : RATE_FRAC_BIT-DLT_FRAC_BIT ] ;

			// target side signal buffering register ( target side pipeline 2nd - 5th stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					href_pos3_ff[ i ]	<= {SIZE_BIT{1'b0}} ;
					href_pos4_ff[ i ]	<= {SIZE_BIT{1'b0}} ;
					href_pos5_ff[ i ]	<= 1'b0 ;
					href_dlt3_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
					href_dlt4_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
				end
				else if ( srst ) begin
					href_pos3_ff[ i ]	<= {SIZE_BIT{1'b0}} ;
					href_pos4_ff[ i ]	<= {SIZE_BIT{1'b0}} ;
					href_pos5_ff[ i ]	<= 1'b0 ;
					href_dlt3_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
					href_dlt4_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
				end
				else if ( tgt_end_i || fstart_asrt_ff ) begin
					href_pos3_ff[ i ]	<= {SIZE_BIT{1'b0}} ;
					href_pos4_ff[ i ]	<= {SIZE_BIT{1'b0}} ;
					href_pos5_ff[ i ]	<= 1'b0 ;
					href_dlt3_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
					href_dlt4_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
				end
				else if ( enable ) begin
					if ( tgt_en_ff ) begin
						href_pos3_ff[ i ]	<= href_pos[ i ] ;
						href_pos4_ff[ i ]	<= href_pos3_ff[ i ] ;
						href_pos5_ff[ i ]	<= href_pos4_ff[ i ][ 0 ] ;
						href_dlt3_ff[ i ]	<= href_dlt[ i ] ;
						href_dlt4_ff[ i ]	<= href_dlt3_ff[ i ] ;
					end
				end
			end

			//--------------------
			// pixel flow control
			//--------------------
			// pointer difference in case that source and target pipeline are enabled.
			assign delta_pix_p1[ i ]	= { 1'b0 , src_wdt3_ff + PPC - 1 } - { 1'b0 , href_pos3_ff[ i ] } ;
			// pointer difference in case that source pipeline is enabled.
			assign delta_pix_p2[ i ]	= { 1'b0 , src_wdt3_ff + PPC - 1 } - { 1'b0 , href_pos4_ff[ i ] } ;
			// pointer difference in case that target pipeline is enabled.
			assign delta_pix_p3[ i ]	= { 1'b0 , src_wdt4_ff + PPC - 1 } - { 1'b0 , href_pos3_ff[ i ] } ;
			// pointer difference in case that source and target pipeline are disabled.
			assign delta_pix_p4[ i ]	= { 1'b0 , src_wdt4_ff + PPC - 1 } - { 1'b0 , href_pos4_ff[ i ] } ;
//			// target side pipeline control in case that source and target pipeline are enabled.
//			assign pipe_tgt_en1[ i ]	= ~delta_pix_p1[ i ][SIZE_BIT]
//										& ( delta_pix_p1[ i ][SIZE_BIT-1:0] >= 3 )
//										& en_ff ;
//			// target side pipeline control in case that source pipeline is enabled.
//			assign pipe_tgt_en2[ i ]	= ~delta_pix_p2[ i ][SIZE_BIT]
//										& ( delta_pix_p2[ i ][SIZE_BIT-1:0] >= 3 )
//										& en_ff ;
//			// target side pipeline control in case that target pipeline is enabled.
//			assign pipe_tgt_en3[ i ]	= ~delta_pix_p3[ i ][SIZE_BIT]
//										& ( delta_pix_p3[ i ][SIZE_BIT-1:0] >= 3 )
//										& en_ff ;
//			// target side pipeline control in case that source and target pipeline are disabled.
//			assign pipe_tgt_en4[ i ]	= ~delta_pix_p4[ i ][SIZE_BIT]
//										& ( delta_pix_p4[ i ][SIZE_BIT-1:0] >= 3 )
//										& en_ff ;

//			// source side pipeline control in case that source and target pipeline are enabled.
//			assign pipe_src_en1[ i ]	= delta_pix_p1[ i ][SIZE_BIT]
//										| ( delta_pix_p1[ i ][SIZE_BIT-1:0] < SFT_DATA_NUM - 3 ) ;
//			// source side pipeline control in case that source pipeline is enabled.
//			assign pipe_src_en2[ i ]	= delta_pix_p2[ i ][SIZE_BIT]
//										| ( delta_pix_p2[ i ][SIZE_BIT-1:0] < SFT_DATA_NUM - 3 ) ;
//			// source side pipeline control in case that target pipeline is enabled.
//			assign pipe_src_en3[ i ]	= delta_pix_p3[ i ][SIZE_BIT]
//										| ( delta_pix_p3[ i ][SIZE_BIT-1:0] < SFT_DATA_NUM - 3 ) ;

			// source and target pipeline enable register & flow control statemachine
			always @( * ) begin
				case ( { src_en_ff , tgt_en_ff } )
					2'b11	: begin
						delta_pix_p[ i ]	= delta_pix_p1[ i ] ;
//						pipe_tgt_en[ i ]	= pipe_tgt_en1[ i ] ;
//						pipe_src_en[ i ]	= pipe_src_en1[ i ] ;
					end
					2'b10	: begin
						delta_pix_p[ i ]	= delta_pix_p2[ i ] ;
//						pipe_tgt_en[ i ]	= pipe_tgt_en2[ i ] ;
//						pipe_src_en[ i ]	= pipe_src_en2[ i ] ;
					end
					2'b01	: begin
						delta_pix_p[ i ]	= delta_pix_p3[ i ] ;
//						pipe_tgt_en[ i ]	= pipe_tgt_en3[ i ] ;
//						pipe_src_en[ i ]	= pipe_src_en3[ i ] ;
					end
					2'b00	: begin
						delta_pix_p[ i ]	= delta_pix_p4[ i ] ;
//						pipe_tgt_en[ i ]	= pipe_tgt_en4[ i ] ;
//						pipe_src_en[ i ]	= 1'b0 ;
					end
					default	: begin
//						pipe_tgt_en[ i ]	= 1'b0 ;
//						pipe_src_en[ i ]	= 1'b0 ;
					end
				endcase
			end
		end
	endgenerate

	wire	src_en ;
	wire	tgt_en ;
	assign src_en	= delta_pix_p[ PPC - 1 ][ SIZE_BIT ]
					| ( delta_pix_p[ PPC - 1 ] < SFT_DATA_NUM - 2 - PPC ) ;
	assign tgt_en	= ~delta_pix_p[ PPC - 1 ][ SIZE_BIT ]
					& ( delta_pix_p[ PPC - 1 ] >= 3 ) ;
//					& ( delta_pix_p[ PPC - 1 ] >= 2 + PPC ) ;

	wire	[ PPC - 1 : 0 ]	src_end ;
	wire	[ PPC - 1 : 0 ]	tgt_end ;

	generate
		for ( i = 0 ; i < PPC ; i = i + 1 ) begin	: end_flag_gen
			assign src_end[ i ]	= ( src_wdt3_ff + i == src_wdt_i ) ;
			assign tgt_end[ i ]	= ( tgt_wdt4_ff + i == tgt_wdt_i ) ;
		end
	endgenerate

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			ring_state_ff	<= PIXEL_INIT ;
			valid_umsk_ff	<= 1'b0 ;
			src_en_ff		<= 1'b0 ;
			tgt_en_ff		<= 1'b0 ;
			wr_line_pos_ff	<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			ring_state_ff	<= PIXEL_INIT ;
			valid_umsk_ff	<= 1'b0 ;
			src_en_ff		<= 1'b0 ;
			tgt_en_ff		<= 1'b0 ;
			wr_line_pos_ff	<= {SIZE_BIT{1'b0}} ;
		end
		else if ( tgt_end_i || fstart_asrt_ff ) begin
			ring_state_ff	<= PIXEL_INIT ;
			valid_umsk_ff	<= 1'b0 ;
			src_en_ff		<= 1'b0 ;
			tgt_en_ff		<= 1'b0 ;
			wr_line_pos_ff	<= {SIZE_BIT{1'b0}};
		end
		else if ( enable && ~calc_term_ff ) begin
			case ( ring_state_ff )
				PIXEL_INIT			: begin
					if ( tgt_wdt2_ff == PPC ) begin // taarget counter pipeline delay
//					if ( tgt_wdt2_ff == { {(SIZE_BIT-1){1'b0}} , 1'b1 } ) begin // taarget counter pipeline delay
						ring_state_ff	<= PIXEL_LSTART ;
						valid_umsk_ff	<= 1'b1 ;
						src_en_ff		<= 1'b1 ;
						tgt_en_ff		<= 1'b0 ;
					end
					else begin
						ring_state_ff	<= PIXEL_INIT ;
						src_en_ff		<= 1'b0 ;
						tgt_en_ff		<= 1'b1 ;
					end
				end
				PIXEL_LSTART		: begin
					if ( ( src_wdt3_ff == SFT_DATA_NUM - PPC ) ) begin // source counter MAX
//					if ( ( src_wdt3_ff == { {(SIZE_BIT-FX_SEL_BIT){1'b0}} , { FX_SEL_BIT{1'b1} } } ) ) begin // source counter MAX
//					if ( ( src_wdt3_ff == { {(SIZE_BIT-3){1'b0}} , 3'b111 } ) ) begin
						ring_state_ff	<= PIXEL_EN ;
						src_en_ff		<= src_en ;
						tgt_en_ff		<= tgt_en ;
//						src_en_ff		<= pipe_src_en2 ;
//						tgt_en_ff		<= pipe_tgt_en2 ;
					end
				end
				PIXEL_EN			: begin
					if ( src_en_ff && tgt_en_ff && |src_end && |tgt_end ) begin
//					if ( src_en_ff && tgt_en_ff && ( src_wdt3_ff == src_wdt_i ) && ( tgt_wdt4_ff == tgt_wdt_i ) ) begin
						ring_state_ff	<= PIXEL_BOTH_LEND ;
						valid_umsk_ff	<= 1'b0 ;
						src_en_ff		<= 1'b0 ;
						tgt_en_ff		<= 1'b0 ;
					end
//					else if ( src_en_ff && !tgt_en_ff && ( src_wdt3_ff == src_wdt_i ) ) begin
					else if ( src_en_ff && |src_end ) begin
						ring_state_ff	<= PIXEL_SRC_LEND ;
						src_en_ff		<= 1'b0 ;
						tgt_en_ff		<= 1'b1 ;
					end
//					else if ( !src_en_ff && tgt_en_ff && ( tgt_wdt4_ff == tgt_wdt_i ) ) begin
					else if ( tgt_en_ff && |tgt_end ) begin
						ring_state_ff	<= PIXEL_TGT_LEND ;
						src_en_ff		<= 1'b1 ;
						tgt_en_ff		<= 1'b0 ;
					end
					else begin
						ring_state_ff	<= PIXEL_EN ;
						src_en_ff		<= src_en ;
						tgt_en_ff		<= tgt_en ;
//						src_en_ff		<= pipe_src_en ;
//						tgt_en_ff		<= pipe_tgt_en ;
					end
				end

				PIXEL_SRC_LEND		: begin
					if ( |tgt_end ) begin
						ring_state_ff	<= PIXEL_BOTH_LEND ;
						valid_umsk_ff	<= 1'b0 ;
						src_en_ff		<= 1'b0 ;
						tgt_en_ff		<= 1'b0 ;
					end
				end
				PIXEL_TGT_LEND		: begin
					if ( |src_end ) begin
						ring_state_ff	<= PIXEL_BOTH_LEND ;
						valid_umsk_ff	<= 1'b0 ;
						src_en_ff		<= 1'b0 ;
						tgt_en_ff		<= 1'b0 ;
					end
				end
				PIXEL_BOTH_LEND		: begin
					wr_line_pos_ff	<= shgt_cnt_ff ;
					if ( tgt_wdt4_ff[ 0 ] == {SIZE_BIT{1'b0}} ) begin
						ring_state_ff	<= PIXEL_LSTART ;
						valid_umsk_ff	<= 1'b1 ;
						src_en_ff		<= 1'b1 ;
						tgt_en_ff		<= 1'b0 ;
					end
					else begin
						ring_state_ff	<= PIXEL_INIT ;
						valid_umsk_ff	<= 1'b0 ;
						src_en_ff		<= 1'b0 ;
						tgt_en_ff		<= 1'b1 ;
					end
				end
				default				: begin
					ring_state_ff	<= PIXEL_INIT ;
					valid_umsk_ff	<= 1'bx ;
					src_en_ff		<= 1'bx ;
					tgt_en_ff		<= 1'bx ;
					wr_line_pos_ff	<= {SIZE_BIT{1'bx}} ;
				end
			endcase
		end
	end

	generate
		for ( i = 0 ; i < PPC ; i = i + 1 ) begin	: fxd_gen

			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					fx_sel_ff[ i ]	<= { FX_SEL_BIT{1'b0} } ;
				end
				else if ( srst ) begin
					fx_sel_ff[ i ]	<= { FX_SEL_BIT{1'b0} } ;
				end
				else if ( tgt_end_i ) begin
					fx_sel_ff[ i ]	<= { FX_SEL_BIT{1'b0} } ;
				end
				else if ( enable ) begin
					if ( !calc_term_ff ) begin
						fx_sel_ff[ i ]	<= delta_pix_p[ i ][ FX_SEL_BIT-1 : 0 ] ;
//						fx_sel_ff	<= delta_pix_p[ FX_SEL_BIT-1 : 0 ] ;
					end
				end
			end

			// data selection from pixel buffer
			assign fxm2_sel[ i ]	= ( fx_sel_ff[ i ] >= { FX_SEL_BIT{1'b1} } -1 )	? pixel_sft_ff[ fx_sel_ff[ i ] ]
																					: pixel_sft_ff[ fx_sel_ff[ i ] + 2 ] ;
//	assign fxm2_sel		= ( {PIXEL_BIT{( fx_sel_ff == 3'b000 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b001 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b010 )}} & pixel_sft_ff[4] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b011 )}} & pixel_sft_ff[5] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b100 )}} & pixel_sft_ff[6] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b101 )}} & pixel_sft_ff[7] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b110 )}} & pixel_sft_ff[6] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b111 )}} & pixel_sft_ff[7] ) ;	// <-!!! in case of left border pixel

			assign fxm1_sel[ i ]	= ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b1} } )	? pixel_sft_ff[ fx_sel_ff[ i ] - 1 ]
																					: pixel_sft_ff[ fx_sel_ff[ i ] + 1 ] ;
//	assign fxm1_sel		= ( {PIXEL_BIT{( fx_sel_ff == 3'b000 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b001 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b010 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b011 )}} & pixel_sft_ff[4] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b100 )}} & pixel_sft_ff[5] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b101 )}} & pixel_sft_ff[6] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b110 )}} & pixel_sft_ff[7] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b111 )}} & pixel_sft_ff[6] ) ;

			assign fx0_sel[ i ]		= pixel_sft_ff[ fx_sel_ff[ i ] ] ;
//	assign fx0_sel		= ( {PIXEL_BIT{( fx_sel_ff == 3'b000 )}} & pixel_sft_ff[0] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b001 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b010 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b011 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b100 )}} & pixel_sft_ff[4] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b101 )}} & pixel_sft_ff[5] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b110 )}} & pixel_sft_ff[6] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b111 )}} & pixel_sft_ff[7] ) ;

			assign fxp1_sel[ i ]	= ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b0} } )	? pixel_sft_ff[ 1 ]
																					: pixel_sft_ff[fx_sel_ff[ i ]-1] ;
//	assign fxp1_sel		= ( {PIXEL_BIT{( fx_sel_ff == 3'b000 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b001 )}} & pixel_sft_ff[0] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b010 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b011 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b100 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b101 )}} & pixel_sft_ff[4] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b110 )}} & pixel_sft_ff[5] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b111 )}} & pixel_sft_ff[6] ) ;

			assign fxp2_sel[ i ]	= ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b0} } 	)	? pixel_sft_ff[ 2 ]
									: ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b0} } +1 )	? pixel_sft_ff[ 1 ]
																					: pixel_sft_ff[ fx_sel_ff[ i ] - 2 ] ;
//	assign fxp2_sel		= ( {PIXEL_BIT{( fx_sel_ff == 3'b000 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b001 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b010 )}} & pixel_sft_ff[0] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b011 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b100 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b101 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b110 )}} & pixel_sft_ff[4] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b111 )}} & pixel_sft_ff[5] ) ;

			assign fxp3_sel[ i ]	= ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b0} } 	)	? pixel_sft_ff[ 3 ]
									: ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b0} } +1 )	? pixel_sft_ff[ 0 ]
									: ( fx_sel_ff[ i ] == { FX_SEL_BIT{1'b0} } +2 )	? pixel_sft_ff[ 1 ]
																					: pixel_sft_ff[ fx_sel_ff[ i ] - 3 ] ;
//	assign fxp3_sel		= ( {PIXEL_BIT{( fx_sel_ff == 3'b000 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b001 )}} & pixel_sft_ff[0] )	// <-!!! in case of right border pixel
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b010 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b011 )}} & pixel_sft_ff[0] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b100 )}} & pixel_sft_ff[1] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b101 )}} & pixel_sft_ff[2] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b110 )}} & pixel_sft_ff[3] )
//						| ( {PIXEL_BIT{( fx_sel_ff == 3'b111 )}} & pixel_sft_ff[4] ) ;

			// selected data buffering register ( scaling transaction pipeline 1st stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					fxm2_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxm1_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fx0_sel_ff[ i ]		<= {PIXEL_BIT{1'b0}} ;
					fxp1_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp2_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp3_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( srst ) begin
					fxm2_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxm1_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fx0_sel_ff[ i ]		<= {PIXEL_BIT{1'b0}} ;
					fxp1_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp2_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp3_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( tgt_end_i ) begin
					fxm2_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxm1_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fx0_sel_ff[ i ]		<= {PIXEL_BIT{1'b0}} ;
					fxp1_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp2_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp3_sel_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( enable ) begin
					if ( tgt_en_ff ) begin
						fxm2_sel_ff[ i ]	<= fxm2_sel[ i ] ;
						fxm1_sel_ff[ i ]	<= fxm1_sel[ i ] ;
						fx0_sel_ff[ i ]		<= fx0_sel[ i ] ;
						fxp1_sel_ff[ i ]	<= fxp1_sel[ i ] ;
						fxp2_sel_ff[ i ]	<= fxp2_sel[ i ] ;
						fxp3_sel_ff[ i ]	<= fxp3_sel[ i ] ;
					end
				end
			end

			// data alignment for 422 format
			assign fxm1[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT]	= fxm1_sel_ff[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT] ;
			assign fx0[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT]	= fx0_sel_ff[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT] ;
			assign fxp1[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT]	= fxp1_sel_ff[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT] ;
			assign fxp2[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT]	= fxp2_sel_ff[ i ][PIXEL_BIT-1:PIXEL_BIT-Q_BIT] ;


			if ( PLANE >= 2 ) begin : chroma_sampling

				wire	smpl_011110 ;
				wire	smpl_101010 ;
				wire	smpl_010100 ;
				wire	smpl_010101 ;
				wire	smpl_001010 ;

				assign smpl_011110	=  smpl_mode_i ;	// 444 format
				assign smpl_101010	= ~smpl_mode_i & ( ~tgt_wdt5_ff + i[0] ) & ~href_pos5_ff[ i ] ;	// source even(Cb) / target even(Cb) pixel
				assign smpl_010100	= ~smpl_mode_i & ( ~tgt_wdt5_ff + i[0] ) &  href_pos5_ff[ i ] ;	// source odd(Cr)  / target even(Cb) pixel
				assign smpl_010101	= ~smpl_mode_i & (  tgt_wdt5_ff + i[0] ) & ~href_pos5_ff[ i ] ;	// source even(Cb) / target odd(Cr)  pixel
				assign smpl_001010	= ~smpl_mode_i & (  tgt_wdt5_ff + i[0] ) &  href_pos5_ff[ i ] ;	// source odd(Cr)  / target odd(Cr)  pixel
//				assign smpl_101010	= ~smpl_mode_i & ~tgt_wdt5_ff & ~href_pos5_ff ;	// source even(Cb) / target even(Cb) pixel
//				assign smpl_010100	= ~smpl_mode_i & ~tgt_wdt5_ff &  href_pos5_ff ;	// source odd(Cr)  / target even(Cb) pixel
//				assign smpl_010101	= ~smpl_mode_i &  tgt_wdt5_ff & ~href_pos5_ff ;	// source even(Cb) / target odd(Cr)  pixel
//				assign smpl_001010	= ~smpl_mode_i &  tgt_wdt5_ff &  href_pos5_ff ;	// source odd(Cr)  / target odd(Cr)  pixel

				assign fxm1[ i ][PIXEL_BIT-Q_BIT-1:0]	= ( {(PIXEL_BIT-Q_BIT){smpl_011110}} & fxm1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_101010}} & fxm2_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010100}} & fxm1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010101}} & fxm1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_001010}} & fx0_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] ) ;

				assign fx0[ i ][PIXEL_BIT-Q_BIT-1:0]	= ( {(PIXEL_BIT-Q_BIT){smpl_011110}} & fx0_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_101010}} & fx0_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010100}} & fxm1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010101}} & fxp1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_001010}} & fx0_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] ) ;

				assign fxp1[ i ][PIXEL_BIT-Q_BIT-1:0]	= ( {(PIXEL_BIT-Q_BIT){smpl_011110}} & fxp1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_101010}} & fx0_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010100}} & fxp1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010101}} & fxp1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_001010}} & fxp2_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] ) ;

				assign fxp2[ i ][PIXEL_BIT-Q_BIT-1:0]	= ( {(PIXEL_BIT-Q_BIT){smpl_011110}} & fxp2_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_101010}} & fxp2_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010100}} & fxp1_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_010101}} & fxp3_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] )
														| ( {(PIXEL_BIT-Q_BIT){smpl_001010}} & fxp2_sel_ff[ i ][PIXEL_BIT-Q_BIT-1:0] ) ;
			end

			// aligned data buffering register ( scaling transaction pipeline 2nd stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					fxm1_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fx0_ff[ i ]		<= {PIXEL_BIT{1'b0}} ;
					fxp1_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp2_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( srst ) begin
					fxm1_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fx0_ff[ i ]		<= {PIXEL_BIT{1'b0}} ;
					fxp1_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp2_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( tgt_end_i ) begin
					fxm1_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fx0_ff[ i ]		<= {PIXEL_BIT{1'b0}} ;
					fxp1_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
					fxp2_ff[ i ]	<= {PIXEL_BIT{1'b0}} ;
				end
				else if ( enable ) begin
					fxm1_ff[ i ]	<= fxm1[ i ] ;
					fx0_ff[ i ]		<= fx0[ i ] ;
					fxp1_ff[ i ]	<= fxp1[ i ] ;
					fxp2_ff[ i ]	<= fxp2[ i ] ;
				end
			end

		// deltax signal buffering register ( scaling transaction pipeline 1st - 2nd stage )
		always @( posedge clk or negedge rst_n ) begin
			if ( !rst_n ) begin
				dltx_dly1_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
			end
			else if ( srst ) begin
				dltx_dly1_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
			end
			else if ( tgt_end_i ) begin
				dltx_dly1_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
			end
			else if ( enable ) begin
				if ( tgt_en_ff ) begin
					dltx_dly1_ff[ i ]	<= href_dlt4_ff[ i ] ;
//					dltx_dly1_ff	<= href_dlt4_ff ;
				end
			end
		end

		always @( posedge clk or negedge rst_n ) begin
			if ( !rst_n ) begin
				dltx_dly2_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
			end
			else if ( srst ) begin
				dltx_dly2_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
			end
			else if ( tgt_end_i ) begin
				dltx_dly2_ff[ i ]	<= {DLT_FRAC_BIT{1'b0}} ;
			end
			else if ( enable ) begin
				dltx_dly2_ff[ i ]	<= dltx_dly1_ff[ i ] ;
			end
		end


		end
	endgenerate

	// valid_fx_dltx signal buffering register ( scaling transaction pipeline 1st - 2nd stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			vdltx_dly1_ff	<= 1'b0 ;
			vdltx_dly2_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			vdltx_dly1_ff	<= 1'b0 ;
			vdltx_dly2_ff	<= 1'b0 ;
		end
		else if ( tgt_end_i ) begin
			vdltx_dly1_ff	<= 1'b0 ;
			vdltx_dly2_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			vdltx_dly1_ff	<= tgt_en_ff & valid_umsk_ff ;
			vdltx_dly2_ff	<= vdltx_dly1_ff ;
		end
	end


	// sram address counter ( scaling transaction pipeline 7th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			adr_cnt_ff	<= {RAM_ADR_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			adr_cnt_ff	<= {RAM_ADR_BIT{1'b0}} ;
		end
		else if ( tgt_end_i ) begin
			adr_cnt_ff	<= {RAM_ADR_BIT{1'b0}} ;
		end
		else if ( enable ) begin
			if ( vfxd_dly_ff ) begin
				if ( adr_clr ) begin
					adr_cnt_ff	<= {RAM_ADR_BIT{1'b0}} ;
				end
				else begin
					adr_cnt_ff	<= adr_cnt_ff + 1'b1 ;
				end
			end
		end
	end

	assign adr_clr	= ( adr_cnt_ff == ( tgt_wdt_i / PPC ) ) ;

	// sram select counter ( scaling transaction pipeline 7th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			sel_cnt_ff	<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
		end
		else if ( srst ) begin
			sel_cnt_ff	<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
		end
		else if ( tgt_end_i ) begin
			sel_cnt_ff	<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
		end
		else if ( enable ) begin
			if ( adr_clr && vfxd_dly_ff ) begin
				sel_cnt_ff	<= { sel_cnt_ff[RAM_COUNT-2:0] , sel_cnt_ff[RAM_COUNT-1] } ;
			end
		end
	end

	// valid/data delay register ( scaling transaction pipeline 7th stage )
	// sram control signal register ( scaling transaction pipeline 8th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			vfxd_dly_ff		<= 1'b0 ;
			fxd_dly_ff		<= {PIXEL_BIT{1'b0}} ;
			ram_wen_ff		<= {RAM_COUNT{1'b0}} ;
			ram_wadr_ff 	<= {RAM_ADR_BIT{1'b0}} ;
			ram_wdata_ff	<= {RAM_DATA_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			vfxd_dly_ff		<= 1'b0 ;
			fxd_dly_ff		<= {PIXEL_BIT{1'b0}} ;
			ram_wen_ff		<= {RAM_COUNT{1'b0}} ;
			ram_wadr_ff 	<= {RAM_ADR_BIT{1'b0}} ;
			ram_wdata_ff	<= {RAM_DATA_BIT{1'b0}} ;
		end
		else if ( tgt_end_i ) begin
			vfxd_dly_ff		<= 1'b0 ;
			fxd_dly_ff		<= {PIXEL_BIT{1'b0}} ;
			ram_wen_ff		<= {RAM_COUNT{1'b0}} ;
			ram_wadr_ff 	<= {RAM_ADR_BIT{1'b0}} ;
			ram_wdata_ff	<= {RAM_DATA_BIT{1'b0}} ;
		end
		else if ( enable ) begin
			vfxd_dly_ff		<= valid_fxd_i ;
			fxd_dly_ff		<= fxd_i ;
			ram_wen_ff		<= {RAM_COUNT{vfxd_dly_ff}} & sel_cnt_ff ;
			if ( vfxd_dly_ff ) begin
				ram_wadr_ff		<= adr_cnt_ff ;
				ram_wdata_ff	<= fxd_dly_ff ;
			end
		end
	end

	//-------------------
	// line flow control
	//-------------------
	// pointer difference
	assign delta_line_p		= { 1'b0 , wr_line_pos_ff } - { 1'b0 , rd_line_pos_i } ;

	// scaling transaction pipeline control
	generate
		if ( RAM_COUNT == 5 ) begin : ram_cnt_5_grnt
			assign write_grant	= ( ~delta_line_p[SIZE_BIT]
								& ( delta_line_p[SIZE_BIT-1:0] != { {(SIZE_BIT-3){1'b0}} , 3'b100 } ) )
									// In case write process is faster than read process.
								| delta_line_p[SIZE_BIT] ;
									// In case read process is faster than write process.
		end
		else begin : ram_cnt_3_grnt
			assign write_grant	= ( ~delta_line_p[SIZE_BIT]
								& ( delta_line_p[SIZE_BIT-1:0] != { {(SIZE_BIT-2){1'b0}} , 2'b11 } ) )
									// In case write process is faster than read process.
								| delta_line_p[SIZE_BIT] ;
									 // In case read process is faster than write process.
		end
	endgenerate

	// scaling transaction pipeline enable register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			en_ff	<= 1'b1 ;
		end
		else if ( srst ) begin
			en_ff	<= 1'b1 ;
		end
		else if ( tgt_end_i ) begin
			en_ff	<= 1'b1 ;
		end
		else if ( enable ) begin
			en_ff	<= write_grant ;
		end
	end

endmodule

`default_nettype wire
