python table_python处理html的table标签
import sysimport csvimport urllib2import BeautifulSoup#page= urllib2.urlopen(sys.argv[1]).read()soup= BeautifulSoup.BeautifulSoup(open(sys.argv[1]).read())csvout= csv.writer(sys.stdout)for t...
import sys
import csv
import urllib2
import BeautifulSoup
#page = urllib2.urlopen(sys.argv[1]).read()
soup = BeautifulSoup.BeautifulSoup(open(sys.argv[1]).read())
csvout = csv.writer(sys.stdout)
for table in soup.findAll('table'):
print "
#print '#'
#print '# Table'
#print '# Fields: ' + ','.join([tr.text for tr in table.findAll('th')])
for row in table.findAll('tr'):
print "
#csvout.writerow([tr.text for tr in row.findAll('td')])
for tr in row.findAll('td'):
print "
"print tr.text.encode("utf-8")
print "
"print "
"print "
"break
#!/bin/bash
#process.h
basedir=$(dirname $1)
echo $basedir
#echo \
\\\> >> $basedir/baobei.htmlprodname=$(grep -o '
.*' $1 | cut -d \> -f 2 | cut -d \< -f 1)prodname=$(echo $prodname | cut -d _ -f 1)
price=$(grep -o '[0-9]*' $1 | cut -d \> -f 2 | cut -d \< -f 1)
echo \
echo \
echo \
Name\\> >> $basedir/baobei.htmlecho \
$prodname\\> >> $basedir/baobei.htmlecho \
\> >> $basedir/baobei.htmlecho \
echo \
Price\\> >> $basedir/baobei.htmlecho \
$price\\> >> $basedir/baobei.htmlecho \
\> >> $basedir/baobei.htmlpython ./printtab.py $1 >> $basedir/baobei.html
echo \
\> >> $basedir/baobei.htmlimgsrc=$(head -n 1 $basedir/imglist)
if test y$imgsrc = y; then
rm -rf $basedir/baobei.html && exit;
fi
echo \ >> $basedir/baobei.html
cat $basedir/baobei.html | tr -d '\n' | tr -d '"' > $basedir/baobei.html.tmp
mv $basedir/baobei.html.tmp $basedir/baobei.html
#!/bn/bash
#process2.sh
basedir=$(dirname $1)
name=$(grep -o "
Name.*" $1 | cut -d \> -f 4 | cut -d \< -f 1 )if test "x$name" = "x" ; then
exit ;
fi
price=$(grep -o "
Price.*" $1 | cut -d \> -f 4 | cut -d \< -f 1 )if test "x$price" = "x" ; then
exit;
fi
if test "x$class" = "x"
then
class=$(grep -o "
产品类型.*" $1 | cut -d \> -f 4 | cut -d \< -f 1 )fi
if test "x$class" = "x"
then
class=$(grep -o "
设备类型.*" $1 | cut -d \> -f 4 | cut -d \< -f 1 )fi
if test "x$class" = "x"
then
class=$(grep -o "
打印针数.*" $1 | cut -d \> -f 4 | cut -d \< -f 1 )fi
if test "x$class" = "x"
then
class="条形码打印机"
fi
if $( echo $class | grep --quiet '票据' )
then
class="536187477"
elif $( echo $class | grep --quiet '发票' )
then
class="536187477"
elif $( echo $class | grep --quiet '票证' )
then
class="536187477"
elif $( echo $class | grep --quiet '存折' )
then
class="536187477"
##################################################################
elif $( echo $class | grep --quiet '针' )
then
class="536187477"
##################################################################
elif $( echo $class | grep --quiet '灯泡' )
then
class="536187479"
elif $( echo $class | grep --quiet 'UHE' )
then
class="536187479"
elif $( echo $class | grep --quiet 'UHP' )
then
class="536187479"
elif $( echo $class | grep --quiet 'HSCR' )
then
class="536187479"
###############################################################
elif $( echo $class | grep --quiet '条形码打印机' )
then
class="536187480"
##################################################################
elif $( echo $class | grep --quiet '证卡打印' )
then
class="536187483"
##################################################################
elif $( echo $class | grep --quiet '条码' )
then
class="536187481"
elif $( echo $class | grep --quiet '扫描' )
then
class="536187481"
elif $( echo $class | grep --quiet '阅读' )
then
class="536187481"
elif $( echo $class | grep --quiet '采集' )
then
class="536187481"
elif $( echo $class | grep --quiet '手持' )
then
class="536187481"
elif $( echo $class | grep --quiet '数据终端' )
then
class="536187481"
##################################################################
elif $( echo $class | grep --quiet '激光' )
then
class="536187484"
##################################################################
elif $( echo $class | grep --quiet '喷墨' )
then
class="536187486"
##################################################################
elif $( echo $class | grep --quiet '复印' )
then
class="536187615"
##################################################################
elif $( echo $class | grep --quiet '一体机' )
then
class="536187485"
##################################################################
elif $( echo $class | grep --quiet '硒鼓' )
then
class="536187616"
elif $( echo $class | grep --quiet '墨盒' )
then
class="536187616"
else
class="536187616"
fi
################################################################
imagepath=$(find $basedir -type f -iname "*.jpg")
if test "x$imagepath" = "x"; then
exit ;
fi
image=$(md5sum $imagepath | cut -d ' ' -f 1)
cp -rf $imagepath $basedir/../../template/$image.tbi
################################################################
desc=$(cat $1)
################################################################
echo -e \"$name\""\t"110514"\t"\",$class,\""\t"1"\t"\"上海\""\t"\"上海\""\t"\"b\""\t"$price"\t"0.000000"\t"1"\t"7"\t"2"\t"0.000000"\t"0.000000"\t"0.000000"\t""\t""\t"1"\t"1"\t"0"\t"1"\t"1"\t"0"\t"\"2012-10-16 13:09:48\""\t""\t"\"$desc\""\t""\t"\"20000:31140\;20196:3228846\;29969:107401\;30681:32998\;31468:102250\;31479:92188\;3415558:27513\;3415563:21959\;3415571:21959\;3415581:10122\;3415609:22041\;7884463:75957615\;14319244:80897641\;14319250:123483713\;14791484:10285019\;\""\t""\t""\t"0"\t"0"\t"\"2012-10-16 13:37:51\""\t"100"\t""\t"0"\t"\"$image:0:0:\|\;\""\t"\"\""\t"\"\""\t"\",\""\t"\",\""\t"\"\""\t"\"\""\t"0"\t"\"15758222730\""\t"15758222730
classtable = {
17 "536187477" : "票据打印机" ,
18 "536187478" : "针式打印机" ,
19 "536187479" : "投影灯泡" ,
20 "536187480" : "条形码打印机" ,
21 "536187481" : "条码设备" ,
22 "536187483" : "证卡打印机" ,
23 "536187484" : "激光打印机" ,
24 "536187485" : "多功能一体机" ,
25 "536187486" : "喷墨打印机" ,
26 "536187615" : "复印复合机" ,
27 "536187616" : "硒鼓" ,
28 }
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐



所有评论(0)