//----------------------------------------------------------------------------------------
//Copyright (C) 2012 Macnica Inc. All Rights Reserved.
//
//Use in source and binary forms, with or without modification, are permitted provided
//by agreeing to the following terms and conditions:
//
//REDISTRIBUTIONS OR SUBLICENSING IN SOURCE AND BINARY FORM ARE NOT ALLOWED.
//THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS "AS IS"
//AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
//IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
//DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER BE LIABLE
//FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
//DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
//SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
//CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
//OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
//OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//AND ALSO REGARDING THE REFERENCE SOFTWARE, REDISTRIBUTION OR SUBLICENSING
//IN SOURCE AND BINARY FORM ARE NOT ALLOWED.
//----------------------------------------------------------------------------------------
// DESCRIPTION
//		vertical direction scaling circuit
//----------------------------------------------------------------------------------------
// REVISION HISTORY
//		v1.0 Mar. 13 2012	: Initial Version Release
//----------------------------------------------------------------------------------------
// PARAMETERS
//		Q_BIT			: quantized bit width per color plane
//		PLANE			: color plane count
//		SIZE_BIT		: size setting signal bit width
//		RATE_FRAC_BIT	: fractional part bit width for coordinate calculation
//		RAM_COUNT		: recommended value ( SIZE_BIT ~ SIZE_BIT + 2 )
//
//		RATE_BIT		: scaling rate calculation bit width ( SIZE_BIT + RATE_FRAC_BIT )
//		REF_BIT			: reference pixel pointer (multiplication curcuit result) bit width ( SIZE_BIT + RATE_BIT )
//		DLT_FRAC_BIT	: depending on filter table resolution ( fixed value )
//
//		PIXEL_BIT		: pixel bit width ( Q_BIT * PLANE )
//		RAM_ADR_BIT		: ram address bus width ( SIZE_BIT )
//		RAM_DATA_BIT	: ram data bus width ( PIXEL_BIT )
//
//		DVEND			: dividend bit width ( SIZE_BIT )
//		DVSOR			: divisor bit width ( 2 + ( RAM_COUNT == 5 ) * 1 )
//
//----------------------------------------------------------------------------------------
// I/O PORTS
//		clk				: clock for all circuit
//		rst_n			: asynchronous reset ( low active )
//		srst			: synchronous reset
//		enable			: clock enable
//
//		scan_mode_i		: output scan mode 0:interlace 1:progressive
//
//		src_hgt_i		: vertical size of input frame data
//		tgt_wdt_i		: horizontal size of output frame data
//		tgt_hgt_i		: vertical size of output frame data
//		v_scl_rate_i	: vertical scaling rate
//		wr_line_pos_i	: write line position
//
//		en_scl_o	 	: enable to vertical scaling core block
//		valid_gy_dlty_o	: data valid to vertical scaling core block
//		gym1_o			: vertical y-1 pixel data to vertical scaling core block
//		gy0_o			: vertical y   pixel data to vertical scaling core block
//		gyp1_o			: vertical y+1 pixel data to vertical scaling core block
//		gyp2_o			: vertical y+2 pixel data to vertical scaling core block
//		deltay_o	 	: fractional value of vertical reference coordinate
//
//		valid_gyd_i		: data valid from vertical scaling core block
//		gyd_i			: interpolated pixel data from vertical scaling core block
//
//		ram_radr_o		: ram ( line buffer ) read address
//		ram_ren_o		: ram ( line buffer ) read enable
//		ram0_rdata_i	: ram ( line buffer ) #0 read data
//		ram1_rdata_i	: ram ( line buffer ) #1 read data
//		ram2_rdata_i	: ram ( line buffer ) #2 read data
//		ram3_rdata_i	: ram ( line buffer ) #3 read data
//		ram4_rdata_i	: ram ( line buffer ) #4 read data
//
//		rd_line_pos_o	: read line position
//
//		tgt_end_o		: termination of output process
//
//		frame_start_o	: frame start of output frame data
//		frame_end_o		: frame end of output frame data
//		valid_o			: data valid of output frame data
//		pixel_o			: pixel data of output frame data
//		field_o			: field status of output frame data 1:even field / 0:odd field
//		ready_i			: data reception ready of output frame data
//
//		field_i			: gated field status 1:even field / 0:odd field
//
//		pix_err_i		: pixel error ccurred
//
//		width_o			: output target size
//		height_o		: output target size
//
//----------------------------------------------------------------------------------------
`timescale 1ps/1ps
`default_nettype none

module	scl16_dout_ctl (
	clk				,
	rst_n			,
	srst			,
	enable			,

	scan_mode_i		,

	src_hgt_i		,
	tgt_wdt_i		,
	tgt_hgt_i		,
	v_scl_rate_i	,
	wr_line_pos_i	,
	init_end_i		,

	en_scl_o,
	valid_gy_dlty_o	,
	gym1_o			,
	gy0_o			,
	gyp1_o			,
	gyp2_o			,
	deltay_o		,

	valid_gyd_i		,
	gyd_i			,

	ram_radr_o		,
	ram_ren_o		,
	ram0_rdata_i	,
	ram1_rdata_i	,
	ram2_rdata_i	,
	ram3_rdata_i	,
	ram4_rdata_i	,

	rd_line_pos_o	,
	tgt_end_o		,

	width_o			,
	height_o		,

	frame_start_o	,
	frame_end_o		,
	valid_o			,
	pixel_o			,
	field_o			,
	ready_i			,

	field_i			,

	pix_err_i
) ;

// =============================================================================
// DEFINE INCLUDE
// =============================================================================

// =============================================================================
// PARAMETER DEFINITION
// =============================================================================
	function integer	log2 ;
		input integer	value ;
		begin
			value	= value - 1 ;
			for (log2 = 0 ; value > 0 ; log2 = log2 + 1 ) begin
				value	= value >> 1 ;
			end
		end
	endfunction

	// ---------------------------------------------------------------------
	// Below parameters have to be defined from upper module
	// ---------------------------------------------------------------------
	parameter PPC					= 4				; // add by sugino
	parameter Q_BIT					= 8				;
	parameter PLANE					= 3				;
	parameter SIZE_BIT				= 12			;
	parameter RATE_FRAC_BIT			= SIZE_BIT + 2	;
	parameter RAM_COUNT				= 5				;

	// ---------------------------------------------------------------------
	// Please do not change the following parameters
	// ---------------------------------------------------------------------
	parameter RATE_BIT				= SIZE_BIT + RATE_FRAC_BIT		;
	parameter REF_BIT				= SIZE_BIT + RATE_BIT			;
	parameter DLT_FRAC_BIT			= 5								;

	parameter PIXEL_BIT				= Q_BIT * PLANE					;

	parameter PXL_BIT_PER_CLK		= PIXEL_BIT * PPC				; // add by sugino
//	parameter DLT_BIT_PER_CLK		= DLT_FRAC_BIT * PPC			; // add by sugino

	parameter RAM_ADR_BIT			= SIZE_BIT - log2(PPC - 1)		; // add by sugino
//	parameter RAM_ADR_BIT			= SIZE_BIT						;
	parameter RAM_DATA_BIT			= PXL_BIT_PER_CLK				; // add by sugino
//	parameter RAM_DATA_BIT			= PIXEL_BIT						;

	parameter DVEND					= SIZE_BIT						;
	parameter DVSOR					= 2 + ( RAM_COUNT == 5 ) * 1	;

// =============================================================================
// PORT DECLARATION
// =============================================================================
	input	wire								clk				;
	input	wire								rst_n			;
	input	wire								srst			;
	input	wire								enable			;

	input	wire								scan_mode_i		;

	input	wire	[ SIZE_BIT-1 : 0 ]			src_hgt_i		;
	input	wire	[ SIZE_BIT-1 : 0 ]			tgt_wdt_i		;
	input	wire	[ SIZE_BIT-1 : 0 ]			tgt_hgt_i		;
	input	wire	[ RATE_BIT-1 : 0 ]			v_scl_rate_i	;
	input	wire	[ SIZE_BIT-1 : 0 ]			wr_line_pos_i	;
	input	wire								init_end_i		;

	output	wire								en_scl_o		;
	output	wire								valid_gy_dlty_o	;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	gym1_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	gy0_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	gyp1_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	gyp2_o			;
	output	wire	[ DLT_FRAC_BIT-1 : 0 ]		deltay_o		;

	input	wire								valid_gyd_i		;
	input	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	gyd_i			;

	output	wire	[ RAM_ADR_BIT-1 : 0 ]		ram_radr_o		;
	output	wire								ram_ren_o		;
	input	wire	[ RAM_DATA_BIT-1 : 0 ]		ram0_rdata_i	;
	input	wire	[ RAM_DATA_BIT-1 : 0 ]		ram1_rdata_i	;
	input	wire	[ RAM_DATA_BIT-1 : 0 ]		ram2_rdata_i	;
	input	wire	[ RAM_DATA_BIT-1 : 0 ]		ram3_rdata_i	;
	input	wire	[ RAM_DATA_BIT-1 : 0 ]		ram4_rdata_i	;

	output	wire	[ SIZE_BIT-1 : 0 ]			rd_line_pos_o	;
	output	wire								tgt_end_o		;

	output	wire	[ SIZE_BIT-1 : 0 ]			width_o			;
	output	wire	[ SIZE_BIT-1 : 0 ]			height_o		;

	output	wire								frame_start_o	;
	output	wire								frame_end_o		;
	output	wire								valid_o			;
	output	wire	[ PXL_BIT_PER_CLK-1 : 0 ]	pixel_o			;
	output	wire								field_o			;
	input	wire								ready_i			;

	input	wire								field_i			;

	input	wire								pix_err_i		;

// =============================================================================
// REG / WIRE DECLARATION
// =============================================================================


	reg								pix_err_lat1_ff		;
	wire							pix_err_pls			;

	wire	[ SIZE_BIT-1 : 0 ]		next_line_num		;

	wire	[ REF_BIT-1 : 0 ]		vref_position		;
	wire	[ SIZE_BIT-1 : 0 ]		vref_pos			;
	wire	[ DLT_FRAC_BIT-1 : 0 ]	vref_dlt			;

	reg								valid_mult1_ff		;
	reg								valid_mult2_ff		;

	wire	[ DVSOR-1 : 0 ]			div_divisor			;
	wire							div_valid			;
	wire	[ DVSOR-1 : 0 ]			div_remainder		;

	wire	[ RAM_COUNT-1 : 0 ]		ram_sel				;
	reg		[ DVSOR-1 : 0 ]			rem_lat_ff			;
	reg		[ SIZE_BIT-1 : 0 ]		rd_line_pos_ff		;
	reg		[ DLT_FRAC_BIT-1 : 0 ]	vref_dlt_ff			;
	reg		[ RAM_COUNT-1 : 0 ]		ram_sel_ff			;

	reg		[ SIZE_BIT-1 : 0 ]		twdt_cnt_ff			;
	reg		[ SIZE_BIT-1 : 0 ]		thgt_cnt_ff			;

	wire							h_start				;
	wire							h_end				;
	wire							v_end				;

	reg								valid_dly1_ff		;
	reg								valid_dly2_ff		;
	reg								valid_dly3_ff		;
	reg								even_line_ff		;

	wire	[ RAM_COUNT-1 : 0 ]		gy0_sel				;
	wire	[ RAM_COUNT-1 : 0 ]		gyp1_sel			;

	reg		[ DLT_FRAC_BIT-1 : 0 ]	dlty_dly1_ff		;
	reg		[ DLT_FRAC_BIT-1 : 0 ]	dlty_dly2_ff		;
	reg		[ DLT_FRAC_BIT-1 : 0 ]	dlty_dly3_ff		;
	reg		[ RAM_COUNT-1 : 0 ]		gy0_dly1_ff			;
	reg		[ RAM_COUNT-1 : 0 ]		gy0_dly2_ff			;
	reg		[ RAM_COUNT-1 : 0 ]		gyp1_dly1_ff		;
	reg		[ RAM_COUNT-1 : 0 ]		gyp1_dly2_ff		;

	reg		[ RAM_DATA_BIT-1 : 0 ]	ram0_lat_ff			;
	reg		[ RAM_DATA_BIT-1 : 0 ]	ram1_lat_ff			;
	reg		[ RAM_DATA_BIT-1 : 0 ]	ram2_lat_ff			;

	wire	[ RAM_DATA_BIT-1 : 0 ]	gym1				;
	wire	[ RAM_DATA_BIT-1 : 0 ]	gy0					;
	wire	[ RAM_DATA_BIT-1 : 0 ]	gyp1				;
	wire	[ RAM_DATA_BIT-1 : 0 ]	gyp2				;
	reg		[ RAM_DATA_BIT-1 : 0 ]	gym1_ff				;
	reg		[ RAM_DATA_BIT-1 : 0 ]	gy0_ff				;
	reg		[ RAM_DATA_BIT-1 : 0 ]	gyp1_ff				;
	reg		[ RAM_DATA_BIT-1 : 0 ]	gyp2_ff				;

	wire							frame_start			;
	wire							frame_end			;

	reg								fstart_dly3_ff		;
	reg								fstart_dly4_ff		;
	reg								fstart_dly5_ff		;
	reg								fstart_dly6_ff		;
	reg								fstart_dly7_ff		;

	reg								fend_dly1_ff		;
	reg								fend_dly2_ff		;
	reg								fend_dly3_ff		;
	reg								fend_dly4_ff		;
	reg								fend_dly5_ff		;
	reg								fend_dly6_ff		;
	reg								fend_dly7_ff		;

	reg								tend_dly1_ff		;
	reg								tend_dly2_ff		;
	reg								tend_dly3_ff		;
	reg								tend_dly4_ff		;
	reg								tend_dly5_ff		;
	reg								tend_dly6_ff		;
	reg								tend_dly7_ff		;

	reg								fstart_out_ff		;
	reg								fend_out_ff			;
	reg								valid_out_ff		;
	reg		[ PXL_BIT_PER_CLK-1 : 0 ]		pixel_out_ff		;
	reg								field_out_ff		;

	reg								fstart_out_done_ff	;

	reg								tgt_end_ff			;

	wire	[ SIZE_BIT : 0 ]		delta_line_p		;
	wire							read_grant			;
	reg								proc_wait_ff		;
	reg								en_ff				;
	wire							cnt_en				;

	reg		[ SIZE_BIT-1 : 0 ]		width_o_ff			;
	reg		[ SIZE_BIT-1 : 0 ]		height_o_ff			;
	wire	[ SIZE_BIT-1 : 0 ]		tgt_hgt_adj			;


// =============================================================================
// FUNCTION DESCRIPTION
// =============================================================================

	// latching pixel error signal
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			pix_err_lat1_ff		<= 1'b0 ;
		end
		else if ( srst ) begin
			pix_err_lat1_ff		<= 1'b0 ;
		end
		else if ( tgt_end_ff ) begin
			pix_err_lat1_ff		<= 1'b0 ;
		end
		else if ( enable && ready_i) begin
			pix_err_lat1_ff		<= pix_err_i ;
		end
	end

	assign	pix_err_pls	= ready_i & ~tgt_end_ff & ( pix_err_i & ~pix_err_lat1_ff ) ;

	//------------------------------
	// reference position calcurate
	//------------------------------

	assign next_line_num	= ( thgt_cnt_ff == tgt_hgt_i ) ? {SIZE_BIT{1'b0}} : ( thgt_cnt_ff + 1'b1 ) ;

	// reference position calculator

	scl16_unsigned_mult
		#(
			.APORT_BIT	( SIZE_BIT		) ,
			.BPORT_BIT	( RATE_BIT		)
		)
		u_mult_for_dout_ctl (
			.clk		( clk			) ,
			.rst_n		( rst_n			) ,
			.enable		( enable		) ,

			.dataa_i	( next_line_num	) ,
			.datab_i	( v_scl_rate_i	) ,

			.result_o 	( vref_position	)
		);

	assign vref_pos		= vref_position[ RATE_FRAC_BIT+SIZE_BIT-1 : RATE_FRAC_BIT ] ;
	assign vref_dlt		= vref_position[ RATE_FRAC_BIT-1 : RATE_FRAC_BIT-DLT_FRAC_BIT ] ;

	// multiplier valid delay register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			valid_mult1_ff	<= 1'b0 ;
			valid_mult2_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			valid_mult1_ff	<= 1'b0 ;
			valid_mult2_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			valid_mult1_ff	<= h_start ;
			valid_mult2_ff	<= valid_mult1_ff ;
		end
	end

	// divider module for calculating line buffer number

	generate
		if ( RAM_COUNT == 5 ) begin : lbuf5_sel_num
			assign div_divisor		= 3'b101 ;
		end
		else begin : lbuf3_sel_num
			assign div_divisor		= 2'b11 ;
		end
	endgenerate

	scl16_divider
		#(
			.DVEND			( DVEND				) ,
			.DVSOR			( DVSOR				) ,
			.PIPELINE		( 0					)
		)
		u_divider (
			.clk			( clk				) ,
			.rst_n			( rst_n				) ,
			.srst			( srst				) ,
			.enable 		( enable			) ,

			.valid_i		( valid_mult2_ff	) ,
			.dividend_i		( vref_pos			) ,
			.divisor_i		( div_divisor		) ,

			.ready_o		( ),
			.valid_o		( div_valid			) ,
			.quotient_o		( ),
			.remainder_o	( div_remainder		)
		);

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			rem_lat_ff		<= {DVSOR{1'b0}} ;
		end
		else if ( srst ) begin
			rem_lat_ff		<= {DVSOR{1'b0}} ;
		end
		else if ( enable ) begin
			if ( div_valid ) begin
				rem_lat_ff	<= div_remainder ;
			end
		end
	end

	generate
		if ( RAM_COUNT == 5 ) begin : lbuf5_sel_sig
			assign ram_sel	= ( {RAM_COUNT{( rem_lat_ff == 3'b000 )}} & 5'b0_0001 )
							| ( {RAM_COUNT{( rem_lat_ff == 3'b001 )}} & 5'b0_0010 )
							| ( {RAM_COUNT{( rem_lat_ff == 3'b010 )}} & 5'b0_0100 )
							| ( {RAM_COUNT{( rem_lat_ff == 3'b011 )}} & 5'b0_1000 )
							| ( {RAM_COUNT{( rem_lat_ff == 3'b100 )}} & 5'b1_0000 ) ;
		end
		else begin : lbuf3_sel_sig
			assign ram_sel	= ( {RAM_COUNT{( rem_lat_ff == 2'b00 )}} & 3'b001 )
							| ( {RAM_COUNT{( rem_lat_ff == 2'b01 )}} & 3'b010 )
							| ( {RAM_COUNT{( rem_lat_ff == 2'b10 )}} & 3'b100 ) ;
		end
	endgenerate

	// current line information latch register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			rd_line_pos_ff	<= {SIZE_BIT{1'b0}} ;
			vref_dlt_ff		<= {DLT_FRAC_BIT{1'b0}} ;
			ram_sel_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
		end
		else if ( srst ) begin
			rd_line_pos_ff	<= {SIZE_BIT{1'b0}} ;
			vref_dlt_ff		<= {DLT_FRAC_BIT{1'b0}} ;
			ram_sel_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
		end
		else if ( cnt_en ) begin
			if ( h_end ) begin
				rd_line_pos_ff	<= vref_pos ;
				vref_dlt_ff		<= vref_dlt ;
				ram_sel_ff		<= ram_sel ;
			end
		end
	end

	assign rd_line_pos_o	= rd_line_pos_ff ;

	//------------------
	// output data flow
	//------------------
	// target width counter ( pipeline 1st stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			twdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			twdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( cnt_en ) begin
			if ( h_end ) begin
				twdt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
			end
			else begin
				twdt_cnt_ff		<= twdt_cnt_ff + 1'b1 ;
			end
		end
	end

	// target height counter ( pipeline 1st stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			thgt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			thgt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( cnt_en ) begin
			if ( v_end ) begin
				thgt_cnt_ff		<= {SIZE_BIT{1'b0}} ;
			end
			else if ( h_end ) begin
				thgt_cnt_ff		<= thgt_cnt_ff + 1'b1 ;
			end
		end
	end

	assign h_start	= ( twdt_cnt_ff == {SIZE_BIT{1'b0}} ) ;
	assign h_end	= ( twdt_cnt_ff == ( tgt_wdt_i / PPC ) ) | pix_err_pls ;
//	assign h_end	= ( twdt_cnt_ff == tgt_wdt_i ) | pix_err_pls ;
	assign v_end	= ( ( twdt_cnt_ff == ( tgt_wdt_i / PPC ) ) & ( thgt_cnt_ff == tgt_hgt_i ) ) | pix_err_pls ;
//	assign v_end	= ( ( twdt_cnt_ff == tgt_wdt_i ) & ( thgt_cnt_ff == tgt_hgt_i ) ) | pix_err_pls ;

	assign ram_radr_o	= twdt_cnt_ff ;
	assign ram_ren_o	= cnt_en ;

	// valid delay register ( pipeline 2nd-4th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			valid_dly1_ff	<= 1'b0 ;
			valid_dly2_ff	<= 1'b0 ;
			valid_dly3_ff	<= 1'b0 ;
			even_line_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			valid_dly1_ff	<= 1'b0 ;
			valid_dly2_ff	<= 1'b0 ;
			valid_dly3_ff	<= 1'b0 ;
			even_line_ff	<= 1'b0 ;
		end
		else if ( enable && ready_i ) begin
			valid_dly1_ff	<= cnt_en ;
			valid_dly2_ff	<= valid_dly1_ff & ( scan_mode_i | ~( even_line_ff ^ field_i ) ) ;
			valid_dly3_ff	<= valid_dly2_ff ;
			even_line_ff	<= thgt_cnt_ff[0] ;
		end
	end

	assign valid_gy_dlty_o	= valid_dly3_ff ;
	assign en_scl_o			= enable & ready_i ;

	generate
		if ( RAM_COUNT == 5 ) begin : lbuf5_conn

			wire	[ RAM_COUNT-1 : 0 ]		gym1_sel ;
			wire	[ RAM_COUNT-1 : 0 ]		gyp2_sel ;

			reg		[ RAM_COUNT-1 : 0 ]		gym1_dly1_ff ;
			reg		[ RAM_COUNT-1 : 0 ]		gym1_dly2_ff ;
			reg		[ RAM_COUNT-1 : 0 ]		gyp2_dly1_ff ;
			reg		[ RAM_COUNT-1 : 0 ]		gyp2_dly2_ff ;

			reg		[ RAM_DATA_BIT-1 : 0 ]	ram3_lat_ff ;
			reg		[ RAM_DATA_BIT-1 : 0 ]	ram4_lat_ff ;

			// line selection signal generate
			assign gym1_sel		= ( rd_line_pos_ff == {SIZE_BIT{1'b0}} )
								? { ram_sel_ff[RAM_COUNT-2:0] , ram_sel_ff[RAM_COUNT-1] }
								: { ram_sel_ff[0] , ram_sel_ff[RAM_COUNT-1:1] } ;

			assign gy0_sel		= ram_sel_ff ;

			assign gyp1_sel		= ( rd_line_pos_ff == src_hgt_i )
								? { ram_sel_ff[0] , ram_sel_ff[RAM_COUNT-1:1] }
								: { ram_sel_ff[RAM_COUNT-2:0] , ram_sel_ff[RAM_COUNT-1] } ;

			assign gyp2_sel		= ( rd_line_pos_ff == src_hgt_i )
								? { ram_sel_ff[1:0] , ram_sel_ff[RAM_COUNT-1:2] }
								: ( rd_line_pos_ff == src_hgt_i - 1'b1 )
								? ram_sel_ff
								: { ram_sel_ff[RAM_COUNT-3:0] , ram_sel_ff[RAM_COUNT-1:RAM_COUNT-2] } ;

			// current line information delay register ( pipeline 2nd-4th stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					dlty_dly1_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly2_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly3_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					gym1_dly1_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gym1_dly2_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gy0_dly1_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gy0_dly2_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gyp1_dly1_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gyp1_dly2_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gyp2_dly1_ff	<= { {(RAM_COUNT-3){1'b0}} , 3'b100 } ;
					gyp2_dly2_ff	<= { {(RAM_COUNT-3){1'b0}} , 3'b100 } ;
				end
				else if ( srst ) begin
					dlty_dly1_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly2_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly3_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					gym1_dly1_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gym1_dly2_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gy0_dly1_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gy0_dly2_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gyp1_dly1_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gyp1_dly2_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gyp2_dly1_ff	<= { {(RAM_COUNT-3){1'b0}} , 3'b100 } ;
					gyp2_dly2_ff	<= { {(RAM_COUNT-3){1'b0}} , 3'b100 } ;
				end
				else if ( enable && ready_i ) begin
					dlty_dly1_ff	<= vref_dlt_ff ;
					dlty_dly2_ff	<= dlty_dly1_ff ;
					dlty_dly3_ff	<= dlty_dly2_ff ;
					gym1_dly1_ff	<= gym1_sel ;
					gym1_dly2_ff	<= gym1_dly1_ff ;
					gy0_dly1_ff		<= gy0_sel ;
					gy0_dly2_ff		<= gy0_dly1_ff ;
					gyp1_dly1_ff	<= gyp1_sel ;
					gyp1_dly2_ff	<= gyp1_dly1_ff ;
					gyp2_dly1_ff	<= gyp2_sel ;
					gyp2_dly2_ff	<= gyp2_dly1_ff ;
				end
			end

			// line buffer data latch register ( pipeline 3rd stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					ram0_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram1_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram2_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram3_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram4_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
				end
				else if ( srst ) begin
					ram0_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram1_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram2_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram3_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram4_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
				end
				else if ( enable && ready_i ) begin
					ram0_lat_ff		<= ram0_rdata_i ;
					ram1_lat_ff		<= ram1_rdata_i ;
					ram2_lat_ff		<= ram2_rdata_i ;
					ram3_lat_ff		<= ram3_rdata_i ;
					ram4_lat_ff		<= ram4_rdata_i ;
				end
			end

			assign gym1		= ( {RAM_DATA_BIT{gym1_dly2_ff[0]}} & ram0_lat_ff )
							| ( {RAM_DATA_BIT{gym1_dly2_ff[1]}} & ram1_lat_ff )
							| ( {RAM_DATA_BIT{gym1_dly2_ff[2]}} & ram2_lat_ff )
							| ( {RAM_DATA_BIT{gym1_dly2_ff[3]}} & ram3_lat_ff )
							| ( {RAM_DATA_BIT{gym1_dly2_ff[4]}} & ram4_lat_ff ) ;

			assign gy0		= ( {RAM_DATA_BIT{gy0_dly2_ff[0]}} & ram0_lat_ff )
							| ( {RAM_DATA_BIT{gy0_dly2_ff[1]}} & ram1_lat_ff )
							| ( {RAM_DATA_BIT{gy0_dly2_ff[2]}} & ram2_lat_ff )
							| ( {RAM_DATA_BIT{gy0_dly2_ff[3]}} & ram3_lat_ff )
							| ( {RAM_DATA_BIT{gy0_dly2_ff[4]}} & ram4_lat_ff ) ;

			assign gyp1		= ( {RAM_DATA_BIT{gyp1_dly2_ff[0]}} & ram0_lat_ff )
							| ( {RAM_DATA_BIT{gyp1_dly2_ff[1]}} & ram1_lat_ff )
							| ( {RAM_DATA_BIT{gyp1_dly2_ff[2]}} & ram2_lat_ff )
							| ( {RAM_DATA_BIT{gyp1_dly2_ff[3]}} & ram3_lat_ff )
							| ( {RAM_DATA_BIT{gyp1_dly2_ff[4]}} & ram4_lat_ff ) ;

			assign gyp2		= ( {RAM_DATA_BIT{gyp2_dly2_ff[0]}} & ram0_lat_ff )
							| ( {RAM_DATA_BIT{gyp2_dly2_ff[1]}} & ram1_lat_ff )
							| ( {RAM_DATA_BIT{gyp2_dly2_ff[2]}} & ram2_lat_ff )
							| ( {RAM_DATA_BIT{gyp2_dly2_ff[3]}} & ram3_lat_ff )
							| ( {RAM_DATA_BIT{gyp2_dly2_ff[4]}} & ram4_lat_ff ) ;
		end
		else begin : lbuf3_conn

			// line selection signal generate
			assign gy0_sel		= ram_sel_ff ;

			assign gyp1_sel		= ( rd_line_pos_ff == src_hgt_i )
								? { ram_sel_ff[0] , ram_sel_ff[RAM_COUNT-1:1] }
								: { ram_sel_ff[RAM_COUNT-2:0] , ram_sel_ff[RAM_COUNT-1] } ;

			// current line information delay register ( pipeline 2nd-4th stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					dlty_dly1_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly2_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly3_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					gy0_dly1_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gy0_dly2_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gyp1_dly1_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gyp1_dly2_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
				end
				else if ( srst ) begin
					dlty_dly1_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly2_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					dlty_dly3_ff	<= {DLT_FRAC_BIT{1'b0}} ;
					gy0_dly1_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gy0_dly2_ff		<= { {(RAM_COUNT-1){1'b0}} , 1'b1 } ;
					gyp1_dly1_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
					gyp1_dly2_ff	<= { {(RAM_COUNT-2){1'b0}} , 2'b10 } ;
				end
				else if ( enable && ready_i ) begin
					dlty_dly1_ff	<= vref_dlt_ff ;
					dlty_dly2_ff	<= dlty_dly1_ff ;
					dlty_dly3_ff	<= dlty_dly2_ff ;
					gy0_dly1_ff		<= gy0_sel ;
					gy0_dly2_ff		<= gy0_dly1_ff ;
					gyp1_dly1_ff	<= gyp1_sel ;
					gyp1_dly2_ff	<= gyp1_dly1_ff ;
				end
			end

			// line buffer data latch register ( pipeline 3rd stage )
			always @( posedge clk or negedge rst_n ) begin
				if ( !rst_n ) begin
					ram0_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram1_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram2_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
				end
				else if ( srst ) begin
					ram0_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram1_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
					ram2_lat_ff		<= {RAM_DATA_BIT{1'b0}} ;
				end
				else if ( enable && ready_i ) begin
					ram0_lat_ff		<= ram0_rdata_i ;
					ram1_lat_ff		<= ram1_rdata_i ;
					ram2_lat_ff		<= ram2_rdata_i ;
				end
			end

			assign gym1		= {RAM_DATA_BIT{1'b0}} ;

			assign gy0		= ( {RAM_DATA_BIT{gy0_dly2_ff[0]}} & ram0_lat_ff )
							| ( {RAM_DATA_BIT{gy0_dly2_ff[1]}} & ram1_lat_ff )
							| ( {RAM_DATA_BIT{gy0_dly2_ff[2]}} & ram2_lat_ff ) ;

			assign gyp1		= ( {RAM_DATA_BIT{gyp1_dly2_ff[0]}} & ram0_lat_ff )
							| ( {RAM_DATA_BIT{gyp1_dly2_ff[1]}} & ram1_lat_ff )
							| ( {RAM_DATA_BIT{gyp1_dly2_ff[2]}} & ram2_lat_ff ) ;

			assign gyp2		= {RAM_DATA_BIT{1'b0}} ;
		end
	endgenerate

	assign deltay_o		= dlty_dly3_ff ;

	// line buffer data selection register ( pipeline 4th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			gym1_ff		<= {RAM_DATA_BIT{1'b0}} ;
			gy0_ff		<= {RAM_DATA_BIT{1'b0}} ;
			gyp1_ff		<= {RAM_DATA_BIT{1'b0}} ;
			gyp2_ff		<= {RAM_DATA_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			gym1_ff		<= {RAM_DATA_BIT{1'b0}} ;
			gy0_ff		<= {RAM_DATA_BIT{1'b0}} ;
			gyp1_ff		<= {RAM_DATA_BIT{1'b0}} ;
			gyp2_ff		<= {RAM_DATA_BIT{1'b0}} ;
		end
		else if ( enable && ready_i ) begin
			gym1_ff		<= gym1 ;
			gy0_ff		<= gy0 ;
			gyp1_ff		<= gyp1 ;
			gyp2_ff		<= gyp2 ;
		end
	end

	assign gym1_o	= gym1_ff ;
	assign gy0_o	= gy0_ff ;
	assign gyp1_o	= gyp1_ff ;
	assign gyp2_o	= gyp2_ff ;

	assign frame_start	= ( twdt_cnt_ff == { {(SIZE_BIT-1){1'b0}} , 1'b1 } )
						& ( thgt_cnt_ff == { {(SIZE_BIT-1){1'b0}} , ( ~scan_mode_i & field_i ) } )
						& cnt_en ;

	assign frame_end	= ( ( twdt_cnt_ff == ( tgt_wdt_i / PPC ) )
//	assign frame_end	= ( ( twdt_cnt_ff == tgt_wdt_i )
						& ( thgt_cnt_ff == ( tgt_hgt_i - ( ~scan_mode_i & ( field_i ^ tgt_hgt_i[0] ) ) ) ) & cnt_en ) ;

	// frame boundary signal delay register ( pipeline 2nd-8th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			fstart_dly3_ff	<= 1'b0 ;
			fstart_dly4_ff	<= 1'b0 ;
			fstart_dly5_ff	<= 1'b0 ;
			fstart_dly6_ff	<= 1'b0 ;
			fstart_dly7_ff	<= 1'b0 ;

			fend_dly1_ff	<= 1'b0 ;
			fend_dly2_ff	<= 1'b0 ;
			fend_dly3_ff	<= 1'b0 ;
			fend_dly4_ff	<= 1'b0 ;
			fend_dly5_ff	<= 1'b0 ;
			fend_dly6_ff	<= 1'b0 ;
			fend_dly7_ff	<= 1'b0 ;

			tend_dly1_ff	<= 1'b0 ;
			tend_dly2_ff	<= 1'b0 ;
			tend_dly3_ff	<= 1'b0 ;
			tend_dly4_ff	<= 1'b0 ;
			tend_dly5_ff	<= 1'b0 ;
			tend_dly6_ff	<= 1'b0 ;
			tend_dly7_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			fstart_dly3_ff	<= 1'b0 ;
			fstart_dly4_ff	<= 1'b0 ;
			fstart_dly5_ff	<= 1'b0 ;
			fstart_dly6_ff	<= 1'b0 ;
			fstart_dly7_ff	<= 1'b0 ;

			fend_dly1_ff	<= 1'b0 ;
			fend_dly2_ff	<= 1'b0 ;
			fend_dly3_ff	<= 1'b0 ;
			fend_dly4_ff	<= 1'b0 ;
			fend_dly5_ff	<= 1'b0 ;
			fend_dly6_ff	<= 1'b0 ;
			fend_dly7_ff	<= 1'b0 ;

			tend_dly1_ff	<= 1'b0 ;
			tend_dly2_ff	<= 1'b0 ;
			tend_dly3_ff	<= 1'b0 ;
			tend_dly4_ff	<= 1'b0 ;
			tend_dly5_ff	<= 1'b0 ;
			tend_dly6_ff	<= 1'b0 ;
			tend_dly7_ff	<= 1'b0 ;
		end
		else if ( enable && ready_i ) begin
			fstart_dly3_ff	<= frame_start ;
			fstart_dly4_ff	<= fstart_dly3_ff ;
			fstart_dly5_ff	<= fstart_dly4_ff ;
			fstart_dly6_ff	<= fstart_dly5_ff ;
			fstart_dly7_ff	<= fstart_dly6_ff ;

			fend_dly1_ff	<= ( frame_end | ( pix_err_pls & fstart_out_done_ff ) ) ;
			fend_dly2_ff	<= fend_dly1_ff ;
			fend_dly3_ff	<= fend_dly2_ff ;
			fend_dly4_ff	<= fend_dly3_ff ;
			fend_dly5_ff	<= fend_dly4_ff ;
			fend_dly6_ff	<= fend_dly5_ff ;
			fend_dly7_ff	<= fend_dly6_ff ;

			tend_dly1_ff	<= v_end & cnt_en ;
			tend_dly2_ff	<= tend_dly1_ff ;
			tend_dly3_ff	<= tend_dly2_ff ;
			tend_dly4_ff	<= tend_dly3_ff ;
			tend_dly5_ff	<= tend_dly4_ff ;
			tend_dly6_ff	<= tend_dly5_ff ;
			tend_dly7_ff	<= tend_dly6_ff ;
		end
	end

	// output register ( pipeline 9th stage )
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			fstart_out_ff	<= 1'b0 ;
			fend_out_ff		<= 1'b0 ;
			valid_out_ff	<= 1'b0 ;
			pixel_out_ff	<= {PXL_BIT_PER_CLK{1'b0}} ;
			tgt_end_ff		<= 1'b0 ;
		end
		else if ( srst ) begin
			fstart_out_ff	<= 1'b0 ;
			fend_out_ff		<= 1'b0 ;
			valid_out_ff	<= 1'b0 ;
			pixel_out_ff	<= {PXL_BIT_PER_CLK{1'b0}} ;
			tgt_end_ff		<= 1'b0 ;
		end
		else if ( enable && ready_i ) begin
			fstart_out_ff	<= fstart_dly7_ff ;
			fend_out_ff		<= fend_dly7_ff ;
			valid_out_ff	<= ( valid_gyd_i | fstart_dly7_ff | fend_dly7_ff ) ;
			pixel_out_ff	<= {PXL_BIT_PER_CLK{valid_gyd_i}} & gyd_i ;
			tgt_end_ff		<= tend_dly7_ff ;
		end
	end

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			field_out_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			field_out_ff	<= 1'b0 ;
		end
		else if ( enable && ready_i ) begin
			if ( fstart_dly7_ff ) begin
				field_out_ff	<= field_i ;
			end
		end
	end

	// not to output frame_end_o if frame_start_o was not asserted in error
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			fstart_out_done_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			fstart_out_done_ff	<= 1'b0 ;
		end
		else if ( enable && ready_i ) begin
			if ( fstart_dly7_ff ) begin
				fstart_out_done_ff	<= 1'b1 ;
			end
		end
	end

	assign frame_start_o	= fstart_out_ff ;
	assign frame_end_o		= fend_out_ff ;
	assign valid_o			= valid_out_ff ;
	assign pixel_o			= pixel_out_ff ;
	assign field_o			= field_out_ff ;
	assign tgt_end_o		= tgt_end_ff & ready_i ;


	// =====================================================
	// 					line flow control
	// =====================================================

	// pointer difference
	assign delta_line_p 	= { 1'b0 , wr_line_pos_i } - { 1'b0 , rd_line_pos_ff } ;

	// scaling transaction pipeline control
	generate
		if ( RAM_COUNT == 5 ) begin
			assign read_grant	= ( ~delta_line_p[SIZE_BIT]
								& ( delta_line_p[SIZE_BIT-1:0] >= { {(SIZE_BIT-3){1'b0}} , 3'b011 } ) )
								| ( wr_line_pos_i > src_hgt_i ) ; // frame end condition.
		end
		else begin
			assign read_grant	= ( ~delta_line_p[SIZE_BIT]
								& ( delta_line_p[SIZE_BIT-1:0] >= { {(SIZE_BIT-2){1'b0}} , 2'b10 } ) )
								| ( wr_line_pos_i > src_hgt_i ) ; // frame end condition.
		end
	endgenerate

	// scaling transaction wait register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			proc_wait_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			proc_wait_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			if ( v_end && cnt_en ) begin
				proc_wait_ff	<= 1'b1 ;
			end
		end
	end

	// scaling transaction pipeline enable register
	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			en_ff	<= 1'b0 ;
		end
		else if ( srst ) begin
			en_ff	<= 1'b0 ;
		end
		else if ( enable ) begin
			en_ff	<= read_grant & ~( h_end & cnt_en ) & ~proc_wait_ff ;
		end
	end

	assign cnt_en	= enable & ready_i & ( en_ff | pix_err_pls ) ;
	// <NOTE> pix_err_pls : force to assert cnt_en when pixel error occurred


	// =====================================================
	//	 output target size
	// =====================================================

	assign tgt_hgt_adj	= ( scan_mode_i ) ? tgt_hgt_i : ( ( tgt_hgt_i - {{(SIZE_BIT-1){1'b0}} , field_i} ) >> 1 ) ;

	always @( posedge clk or negedge rst_n ) begin
		if ( !rst_n ) begin
			width_o_ff		<= {SIZE_BIT{1'b0}} ;
			height_o_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( srst ) begin
			width_o_ff		<= {SIZE_BIT{1'b0}} ;
			height_o_ff		<= {SIZE_BIT{1'b0}} ;
		end
		else if ( init_end_i ) begin
			width_o_ff		<= tgt_wdt_i + 1'b1 ;
			height_o_ff		<= tgt_hgt_adj + 1'b1 ;
		end
	end

	assign width_o		= width_o_ff ;
	assign height_o		= height_o_ff ;


endmodule

`default_nettype wire
