Jun 16, 2012

zipfile in python

zip 文档是一个文件
没办法简单的从文档中删除或修改一个文件
解决办法是遍历所有文件
如果不是目标文件,就读出(没有解压缩),写入新文档
如果是目标文件,就删除,或解压缩出来进行修改,修改后压缩入新文档。
参见这儿

python 中处理 zip 文档的包为 zipfile
使用例子


# -*- coding: utf-8 -*-

import sys
import os
import subprocess
import shutil
import zipfile as zf
import cssutils


# FORCE_REPLACE: 1 not css processing, use replacing
#                0 css processing
FORCE_REPLACE = 1

def epub_proc(src, des, tmpdir):
    '''Epub file processing.
    src: src epub file
    des: des epub file
    tmpdir: tmp folder
    
    An Epub file is a zip file. 
    Check each file in the src zip file.
    If it is a css file, process by css_proc and write back to des.
    If not, copy to des      '''   
    
    
    srcdir, fname = os.path.split(src)
    bookname, ext = os.path.splitext(fname)
        
    if not zf.is_zipfile(src):
        print fname + 'is not an epub book'
    
    print 'processing ' + bookname + '.'
            
    srczip = zf.ZipFile(src, 'r')
    deszip = zf.ZipFile(des, 'w',  zf.ZIP_DEFLATED)
    
    
    flist =  srczip.namelist() 
    for f in flist:
        if f.endswith('.css'):
            if FORCE_REPLACE:
                deszip.write('style.css', f)
            else:
                srczip.extract(f, tmpdir)
                good2go = css_proc(os.path.join(tmpdir, f))
                if good2go:
                    deszip.write(os.path.join(tmpdir, f), f)
                else:
                    deszip.write('style.css', f)
        else:
            data = srczip.read(f)
            deszip.writestr(f, data)
            
    srczip.close()
    deszip.close()     



def css_proc(css):
    #of = open(css,'a')
    #of.write('last line')
    #of.close()
    
    sheet = cssutils.parseFile(css)
    font_set = False
    lineheight_set  = False
    #print sheet.cssText
    for rule in sheet:
        if rule.type == rule.FONT_FACE_RULE:
            for property in rule.style:            
                if property.name == 'src':
                    property.value = 'url(res:///system/fonts/zw.ttf), url(res:///media/fonts/zw.ttf), url(res:///sdcard/fonts/zw.ttf), url(res:///system/fonts/DroidSansFallback.ttf), url(c:/WINDOWS/Fonts/msyh.ttf)'
                    font_set = True
                    continue
        if rule.type == rule.STYLE_RULE and rule.selectorText == 'p':
            rule.style.setProperty('line-height','150%')
            rule.style.setProperty('margin-top','0')
            rule.style.setProperty('margin-bottom','0')
            lineheight_set = True
            
            #for property in rule.style:
            #    if property.name == 'line-height':
            #        property.value = '140%'
            #        lineHeightSet = True
            #        break
            
                  
    
    of = open(css, 'w')
    of.write(sheet.cssText)
    of.close()
    
    
    return font_set and lineheight_set
    
    
    


if __name__ == '__main__':
    cwddir = os.getcwd()
    
    if len(sys.argv) < 2:
        srcdir = os.path.join(cwddir, 'epub')
    else:
        srcdir = sys.argv[1]
    
    desdir = os.path.join(cwddir, 'epuboutput')
    tmpdir = os.path.join(cwddir, 'epubtmp')
    
    if not os.path.exists(desdir):
        os.mkdir(desdir)
        
    if not os.path.isdir(desdir):
        print 'Can not make destination folder'
        raise SystemExit
    
    if not os.path.exists(tmpdir):
        os.mkdir(tmpdir)
        
    if not os.path.isdir(tmpdir):
        print 'Can not make tmp folder'
        raise SystemExit
    
    if os.path.exists(srcdir):
        currsrcdir = srcdir
        currdesdir = desdir
        dirList=os.listdir(currsrcdir)
        for fname in dirList:
            if fname.endswith('.epub'):
                #print fname
                epub_proc(os.path.join(currsrcdir, fname), os.path.join(currdesdir, fname),tmpdir)
                
    else:
        print 'can not open ' + srcdir
    
    
    shutil.rmtree(tmpdir)
    

0 comments: