# # vocalips2musicxml_04.pl # # Rev 0.0 Jan. 2, 2010 by zhuo : initial. # Rev 0.1 Jan. 7, 2010 by zhuo : "remove-rest shorter than n" option added. # Rev 0.2 Jan.26, 2010- Feb.12, 2010 by zhuo : # * assumes a tick-based vocalips text file # (generated by vsqtxt2lipsync_052.pl) run with # -i -m e3 -t (track_index) option. # * using lolxml.pm (original lib) for xml. # (0.2 is not complete, continuing to Rev. 0.3) # Rev.0.3 Feb. 13,2010- by zhuo measure splitting ok. no xml output yet. # Rev.0.4 Feb. 25,2010- by zhuo xml output # # # input: resulting tab-separated file of vocalips 0.42 (in standard mode) # output: music similar to the example on sinsy.jp # # usage: # perl vocalips2musicxml_04.pl \ # [-b bpm] [-u tpq] [-n numer] [-d denom] [-m length] [-o offset] \ # [-p pitch_offset] [-q q_unit] [-r gap] < vocalips_txt > musicxml.xml # # -b bpm : the number of beats in a single minute. 120.0 by default. # -u tpq : the length of a quarter note (a beat), in tick. 480 by default. # -n numer : The upper number of the meter. 4 by default. # -d denom : The lower number of the meter. 4 by default. # -m length : The length of a measure, in tick. tpq * numer by default. # -o offset : The measure/quantize offset, in tick. 0 by default. # offset must be within [0, length). # -p pitch_o : The pitch in output is shifted by this semitone-value. # (-f from : The starting point, in tick. (defaults to the first note on)) # (-t to : The end point, in tick. (defaults to the last note-off.)) # -q q_unit : on/off time is quantized by this amount. in tick. # -r gap : remove the rests shorter than this gap (in ticks) by # making the preceding note longer. # use lolxml; ######################################## # set up params ######################################## $tool_name = "vocalips2musicxml"; $tool_version = "0.4"; ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(); $g_encoding_date = sprintf("%04d-%02d-%02d", $year + 1900, $mon + 1, $mday ); $g_default_tpq = 480; $g_default_numer = 4; %g_params = ( 'bpm' => 120.0, 'tpq' => $g_default_tpq, 'numer' => $g_default_numer, 'denom' => 4, 'mlen' => $g_default_tpq * $g_default_numer, 'offset' => 0, 'pitch' => 0, 'from' => 0, 'to' => -1, 'q_unit' => -1, 'rest_gap' => 0 ); %g_opts = ( '-b' => 'bpm', '-u' => 'tpq', '-n' => 'numer', '-d' => 'denom', '-m' => 'mlen', '-o' => 'offset', '-p' => 'pitch', '-f' => 'from', '-t' => 'to', '-q' => 'q_unit', '-r' => 'rest_gap' ); ######################################## # functions ######################################## # read_options() # Scan the command line and store the option parameters in $g_params{option} . # # args: none # return value: ref to a list of options specified # external: reads @ARGV, g_opts. OVERWRITES g_params with the values # # Dies if wrong command line options are specified. sub read_options { my $p_opts = []; while ( 1 ) { my $arg = $ARGV[0]; unless ( $arg =~ /^\-/ ) { last; } unless ( exists $g_opts{$arg} ) { die "$arg : undefined option.\n"; } shift @ARGV; unless ($ARGV[0] =~ /^[0-9\.]+$/ ) { die "$arg $ARGV[0] : must be a number.\n" }; $g_params{$g_opts{$arg}} = $ARGV[0]; push( @$p_opts, $g_opts{$arg} ); shift @ARGV; } return $p_opts; } # fix_mlen() # Corrects the g_params{'mlen'} according to options specified. # # args: ref to a list of options specified # return value: none # external: OVERWRITES g_params{'mlen'} to the correct value if necessary # # mlen is determined as follows: # 1) If none of tpq, numer, or mlen option (u,n,m) is given, # the default value is used. # 2) If tpq and/or numer (u,n) are specified but mlen is not, # mlen is set to given tpq multiplied by given numer. # 3) If tpq, numer, and mlen are all specified, # the mlen value specified is used. # sub fix_mlen { my ( $p_opts ) = @_; if ( grep(/tpq/, @$p_opts) || grep(/numer/, @$p_opts) ) { if ( ! grep(/mlen/, @$p_opts) ) { $g_params{'mlen'} = $g_params{'tpq'} * $g_params{'numer'}; } } } # read_vocalips_tsv() # Reads thru stdin the tab-separated values in vocalips format (-i -m e3), # removes the short gap between two notes by extending the first note, # and returns the results in a list of hashes. # # args: none # return value: note events, slurred, as a ref to a list of hash refs. # external: reads stdin; reads $g_params{'rest_gap'}. overwrites g_part_name. # # an event is represented in the form shown below: # {'beg'=>$beg, 'end'=>$end, 'utt'=>$utt, 'keynum'=>$keynum, 'lyric'=>$lyric } sub read_vocalips_tsv { my $p_events = []; # skip the header part of the input while (<>) { chop; if ( $_ =~ s/^\#\#\tbegin\ttrack\t// ) { $g_part_name = $_; } last if ( $_ =~ /^\#\#\tbegin\teventlist/ ); } # read the body while (<>) { last if ( $_ =~ /^\#\#\tend\teventlist/ ); my ( $beg, $end, $utt, $vel, $keynum, $lyric, @rest ) = split( /\t/, $_ ); if ( $keynum != 0 ) { $keynum += $g_params{'pitch'}; if ( $keynum < 0 ) { $keynum = 0; } if ( $keynum > 127 ) { $keynum = 127; } } if ( $utt eq 'SIL' && $end - $beg < $g_params{'rest_gap'} && $#$p_events >= 0 ) { $p_events->[ $#$p_events ]->{'end'} = $end; next; # note: Make the previous utterance longer, by # replacing the 'end' value of the previous note # with that of this rest; This takes place # when a very short silence (rest) follows a note. } # pre-fetched lyric at the silence is cleared, # since it is unnecessary in this application. if ( $utt eq 'SIL' ) { $keynum = -1; $lyric = ''; } push( @$p_events, { 'beg'=>$beg, 'end'=>$end, 'utt'=>$utt, 'keynum'=>$keynum, 'lyric'=>$lyric } ); # note: the 'end' value may be altered at the next iteration. } # fill the first silence with a long SIL if ( $#$p_events >= 0 ) { my $first_beg = $p_events->[ 0 ]->{'beg'}; if ( $first_beg > 0 ) { splice( @$p_events, 0, 0, { 'beg'=>0, 'end'=>$first_beg, 'utt'=>'SIL', 'keynum'=>-1, 'lyric'=>'' } ); } } return $p_events; } # quantize_events() # Quantizes all the begin/end ticks in the events # # args: ref to events # return value: none # external: reading $g_params('q_unit'), MODIFIES the event attributes sub quantize_events { my ( $p_events ) = @_; if ( $g_params{'q_unit'} == -1 ) { # print "quantize not specified. \n"; return; } foreach my $pe (@$p_events) { $pe->{'beg'} = quantize_tick( $pe->{'beg'} ); $pe->{'end'} = quantize_tick( $pe->{'end'} ); } } # quantize_tick() # Quantizes the input tick to a quantized grid value. # # args: tick. # return values: quantized tick. # external: reads g_params{'q_unit'}, g_params{'offset'} # # offset q_unit+offset # |-------********|********-------|-------*-------| # <-offset> # -----********|********-------|---------------|--- # 0 q_unit # < / q_unit> # -----********|********-------|---------------|--- # 0 1 # < +0.5> # -------------****************|---------------|--- # 0 1 # < int> / / / ...../ # -------------*---------------*---------------|--- # 0 1 # <* q_unit> # -------------*---------------*---------------|--- # 0 q_unit # < +offst> offset q_unit+offset # |---------------*---------------*---------------| sub quantize_tick { my ( $tick ) = @_; return int( int(($tick - $g_params{'offset'}) / $g_params{'q_unit'} + 0.5) * $g_params{'q_unit'} + $g_params{'offset'} ); } # split_measure_give_tie() # Modify the list of refs to events so every note # that goes across a measure-border is split into two notes # at the border. # Also, all the notes are given "tie-start" / "tie-stop" # attributes; tie-start is set to 1 if a tie is starting. # # args: a ref to list of events (the list is modified) # return value: none # external: reads g_params{'mlen'}, g_params{'offset'}; # # * Assumes that the input events start at tick 0. # (assured in read_vocalips_tsv() ) sub split_measure_give_tie { my ( $p_events ) = @_; # set up the initial measure location my $m_pos = $g_params{'offset'}; # replace a note with two if it continues to the next measure; # Note: index-based traversal is used, because the number of # the events may increase during the traversal. my $e_idx = 0; while ( $e_idx <= $#$p_events ) { my $pe = $p_events->[$e_idx]; my $e_beg = $pe->{'beg'}; my $e_end = $pe->{'end'}; # m_pos -> m_pos -> m_pos # ----|--------------|---------------|--------- # |e_beg e_end # |e_beg e_end # Note that the first p_events always start at 0; # therefore, the loop below would not take place. while ( $m_pos <= $e_beg ) { $m_pos += $g_params{'mlen'}; } # now e_beg < m_pos (see above) if ( ! (exists $pe->{'tie_start'}) ) { $pe->{'tie_start'} = 0; } if ( ! (exists $pe->{'tie_stop'}) ) { $pe->{'tie_stop'} = 0; } if ( $m_pos < $e_end ) { # the note continues to the next measure; split it into two. my $isUtt = ($pe->{'utt'} eq 'UTT') ? 1 : 0; $pe->{'end'} = $m_pos; $pe->{'tie_start'} = $isUtt; splice( @$p_events, $e_idx + 1, 0, { 'beg'=>$m_pos, 'end'=>$e_end, 'utt'=>$pe->{'utt'}, 'keynum'=>$pe->{'keynum'}, 'lyric'=>($isUtt ? '[' : ''), 'tie_start'=>0, 'tie_stop'=>$isUtt } ); } ++$e_idx; # next time, look at the second note (just inserted by the code above) } # process the last silence to fill the full measure. if ( $p_events->[ $#$p_events ]->{'end'} < $m_pos ) { # print "last event end = $p_events->[ $#$p_events ]->{'end'}, smaller than $m_pos.\n"; if ( $p_events->[ $#$p_events ]->{'utt'} eq 'SIL' ) { $p_events->[ $#$p_events ]->{'end'} = $m_pos; } else { my $last_beg = $p_events->[ $#$p_events ]->{'end'}; push( @$p_events, { 'beg'=>$last_beg, 'end'=>$m_pos, 'utt'=>'SIL', 'keynum'=>-1, 'lyric'=> '', 'tie_start'=>0, 'tie_stop'=>0 } ); } } } # generate a list for a note. # # args: see below # return value: a list for the note, in "lolxml" format # # relying on: g_pitchclass, g_accidental @g_pitchclass = ("C","C","D","D","E","F","F","G","G","A","A","B"); @g_accidental = ( 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0 ); sub build_note { my ( $beg, $end, $utt, $keynum, $lyric, $tie_start, $tie_stop ) = @_; my $note = [ 'note' ]; if ( $utt eq "UTT" ) { my $pitch = ['pitch']; push( @$pitch, [ 'step', $g_pitchclass[$keynum % 12] ] ); $g_accidental[$keynum%12] && push( @$pitch, [ 'alter', 1 ] ); push( @$pitch, [ 'octave', int(($keynum-24)/12 ) ] ); push( @$note, $pitch ); } else { push( @$note, [ 'rest', ['', '', ''] ] ); } push( @$note, [ 'duration', int( $end - $beg ) ] ); $tie_start && push( @$note, ['tie', ['type', 'start', '']] ); $tie_stop && push( @$note, ['tie', ['type', 'stop', '']] ); push( @$note, [ 'voice', 1 ] ); if ($tie_start || $tie_stop) { my $notations = ['notations']; if ( $tie_stop ) { push( @$notations, ['tied', ['type', 'stop', '']] ); } if ( $tie_start ) { push( @$notations, ['tied', ['type', 'start', '']] ); } push( @$note, $notations ); } if ($utt eq 'UTT') { push( @$note, [ 'lyric', ['text', $lyric ] ] ); } return $note; } # Build a list of lists (or, a tree structure) from the input # event-list. # # in: $p_events: the event list # out: $lol: a list of lists, representing the xml tree structure # refer to: $g_encoding_date, $g_part_name, $g_params{*} # # Note: No events can go over a measure border; Such a long note event # must be split at the measure borders and then tied. sub build_score_partwise { my ( $p_events ) = @_; my $xml_body = ['score-partwise', [ 'version', '1.1', ''], ['identification', ['encoding', ['software', "$tool_name $tool_version" ], ['encoding-date', $g_encoding_date ]]], ['part-list', ['score-part', ['id', 'P1', '' ], ['part-name', $g_part_name ]]]]; my $xml_part = ['part', ['id', 'P1', '' ], ['==============================='], #single elm list is a comment ['measure', ['number', 1, ''], ['attributes', ['divisions', $g_params{'tpq'} ], ['time', ['beats', $g_params{'numer'}], ['beat-type', $g_params{'denom'} ]]], ['direction', ['placement', "above", '' ], ['sound', ['tempo', $g_params{'bpm'}, '' ]]], ['note', ['rest', ['', '', ''] ], ['duration', $g_params{'mlen'} ], ['voice', 1], ['type', 'whole'], ['staff', 1]]]]; push( @$xml_body, $xml_part ); my $m_cnt = 1; my $m_tick = 0; my $meas; foreach $pe ( @$p_events ) { if ( $pe->{'beg'} >= $m_tick ) { ++$m_cnt; $m_tick += $g_params{'mlen'}; push( @$xml_part, ['==============================='] ); $meas = ['measure', ['number', $m_cnt, '']]; push( @$xml_part, $meas ); } my $note = &build_note( $pe->{'beg'}, $pe->{'end'}, $pe->{'utt'}, $pe->{'keynum'}, $pe->{'lyric'}, $pe->{'tie_start'}, $pe->{'tie_stop'} ); push( @$meas, $note ); # printf( "note inserted:%8d %8d %s\n", $pe->{'beg'}, $pe->{'end'}, $pe->{'lyric'} ); # print &lolxml02::toxml( $note ); # print "\n"; } return $xml_body; } my $p_opts = &read_options(); &fix_mlen( $p_opts ); ##check p_opts ; seems ok #foreach my $k (keys(%g_params)) { print "$k = $g_params{$k}\n"; } exit; my $p_events = &read_vocalips_tsv(); ##1 . just after reading (rest-gap is already fixed, though); seems ok #print "check 1:\n"; foreach my $e (@$p_events) { foreach my $k (sort(keys(%$e))) { printf( "%s=%s ", $k, $e->{$k} ); } print "\n"; } exit 0; &quantize_events( $p_events ); ##2. after quantization #print "check 2:\n"; foreach my $e (@$p_events) { foreach my $k (sort(keys(%$e))) { printf( "%s=%s ", $k, $e->{$k} ); } print "\n"; } exit 0; &split_measure_give_tie( $p_events ); ##3. after measure-split #print "check 3:\n"; foreach my $e (@$p_events) { foreach my $k (sort(keys(%$e))) { printf( "%s=%s ", $k, $e->{$k} ); } print "\n"; } exit 0; # need check : Feb 25 my $xml_head = ' '; print $xml_head; print "\n"; my $score_partwise = &build_score_partwise( $p_events ); print &lolxml02::toxml( $score_partwise, "", "\t" ); exit 0;