ubuntuusers.de

pdf2mp3.py

Autor:
busfahrer
Datum:
24. Juli 2010 20:38
Code:
  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
#!/usr/bin/env python
# ###################################################
# pdf2mp3.py - little script/program to convert a
# pdf-file or ascii-file (.dat, .txt) into a mp3 audio or wav file
#
# Copyright (C) 2010 Hannes Rennau
# hannes@bolding-burchard.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the
# Free Software Foundation, Inc.,
# 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
# ###################################################

# LIST OF PACKAGES NEEDED:
# you need to install the following packages:
# sudo apt-get install python poppler-utils festival festvox-rablpc16k
# lame espeak wavbreaker 

# HOW TO USE:
# 1.create a file with the name pdf2mp3 and copy the content of
#   the whole text in there
# 2.make the file an executable via:
#   >>> chmod +x pdf2mp3
# 3.copy file to /usr/bin to make usage of program possible from everywhere
# on your computer:  >>> sudo cp pdf2mp3 /usr/bin/
# 4.after that get help calling: 
#   pdf2mp3 -h
#  
# 5.EXAMPLE:
#   you want to convert yourfilename.pdf into a mp3 file, then just type:
#   pdf2mp3 -v en -f yourfilename.pdf -o yourfilename.mp3   
#   (for the english voice 'en', for german voice 'de',
#   type: espeak --voices to get list of voices available on your system)
#
#
# edited by busfahrer 24.07.2010

import os,sys
import string
import time
import glob
from subprocess import call, Popen, PIPE
from optparse import OptionParser as op

def main():
    """Parses command line
    """
    

    parser = op(usage='%prog -v [de,en,...] -f filename[.pdf|.txt|.dat] \
                                     -o [wav|mp3] [optional: --ascii]',
    description='This script convertes ASCII files (basically those files with \
                 extension .txt or .dat) or pdf files into an mp3 (or wav) \
                 audio file.',version=r'$v0.1$')

    parser.add_option('-v','--voice', type='string',metavar='VOICENAME',
                      help='name of the voice to be used.\
                      type: ***espeak --voices*** to get list of available \
                      voices on your system.')
    parser.add_option('-f','--file',  type='string',metavar='SOURCEFILENAME',
                      help='input path of file to read \
                      (and late on convert to audio file).This can be a pdf or \
                      ascii (.txt or .dat) file. extension must be given!')
    parser.add_option('-o','--output', type='string',metavar='OUTPUTFILENAME',
                      help='Output filename (with extension .wav or .mp3 that \
                      the program knows which audio format you want.)')

    options,args = parser.parse_args()

    if options.voice is None:
        print 'no voice name given, use -v voicename \
              [type ***espeak --voices*** for list of available voices]'
        return 2

    if options.file is None:
        print 'no input file name given [please use: -f filename]'
        return 2

    if options.output is None:
        print 'no output file name given \
              [please use: -o outputfilename.[wav|mp3]'
        return 2

    filename_inp = str(options.file)
    filename_out = str(options.output)
    ifwav = False
    ifmp3 = False
    if filename_out[-4:] != '.wav' and filename_out[-4:] != '.mp3':
        print 'please decide whether you want wav or mp3 format by \
               typing -o filename.wav or -o filename.mp3'
    elif filename_out[-4:] == '.wav':
        ifwav = True
    else:
        ifmp3 = True

    if os.path.isfile(filename_inp) and (ifwav or ifmp3): 
        if filename_inp[-4:] != '.dat' and filename_inp[-4:] != '.txt' and \
        filename_inp[-4:] != '.pdf':
            print '*** input file does not have extension (.txt, .dat, .pdf) ***'
        elif filename_inp[-4:] == '.pdf':
            print 'converting pdf file: ' + filename_inp + ' to ASCII'
            pdf_convert_to_ascii(filename_inp)
            if ifwav:
                convert_to_wav(filename_inp[:-4] + '.txt', filename_out,
                               options.voice)
                join_wav_files(filename_out)
            elif ifmp3:
                convert_to_wav(filename_inp[:-4] + '.txt', filename_out,
                               options.voice)
                join_wav_files(filename_out)
                convert_wav_2_mp3(filename_out)
        elif filename_inp[-4:] == '.dat' or filename_inp[-4:] == '.txt':
            if ifwav:
                convert_to_wav(filename_inp, filename_out, options.voice)
                join_wav_files(filename_out)
            elif ifmp3:
                convert_to_wav(filename_inp, filename_out, options.voice)
                join_wav_files(filename_out)
                convert_wav_2_mp3(filename_out)

    else:
        print '*** input file %s does not exist ***'% filename_inp


    print ifmp3,ifwav
 
def pdf_convert_to_ascii(input_pdf_file):
    call(['pdftotext', input_pdf_file, input_pdf_file[:-4] + '.txt'])

def convert_to_wav(input_ascii_file,output_wav_file,language):
    cat_out = Popen(['cat', input_ascii_file], stdout=PIPE)
    sed_out = Popen(['sed', 's/[^a-zA-Z .,!?]//g'], stdin=cat_out.stdout,
                     stdout=PIPE)
    Popen(['espeak', '-v', language, '-w', output_wav_file[:-4] + '.wav'],
           stdin=sed_out.stdout)
    time.sleep(3)

def convert_wav_2_mp3(input_wav_file):
    call(['lame', '-f', input_wav_file[:-4] + '.wav', 
          input_wav_file[:-4] + '.mp3'])
    os.remove(input_wav_file[:-4] + '.wav')

# espeak creates multiple wav, how many is obtained here
def nr_wav_files(valid_path,filename_out):
    x = 0
    for root, dirs, files in os.walk(valid_path):
        for f in files:
            if str.find(f,filename_out[:-4] + '.wav_') >= 0:
                x = x + 1
    return x

def join_wav_files(filename_out):
    join_wav = filename_out[:-4] + '.wav '
    for wav_files in range(nr_wav_files('.', filename_out)-1):
        if wav_files < 9:
            join_wav += filename_out[:-4] + '.wav_0' + str(wav_files + 1) + ' '
        else:
            join_wav += filename_out[:-4] + '.wav_' + str(wav_files + 1) + ' '
    print join_wav
    os.system('wavmerge -o merged.wav ' + join_wav)
    for f in glob.glob('*.wav_*'):
        os.remove(f)
    os.rename('merged.wav', filename_out[:-4] + '.wav')

if __name__=='__main__':
    ret = main()
    sys.exit(ret)