2016-08-03 10 views
-2

この小さなスクリプトは、ファイルにキーワードを書きますが、各キーワードの間に余分な改行を追加します。どうすればそれを止めることができますか?私。代わりに私がファイルに書き込むときに余分な改行を削除する

Apple 
Banana 
Crayon 

たい

Apple 

Banana 

Crayon 

の私は "listwrite" をグーグルで試みたが、助けにはなりませんでした。

これは非常に簡単なことだと確信していますが、わかりません。コードで

#!/usr/local/bin/python 
################################################### 
# nerv3.py 
# Goal: Named entity recognition script to pull names/place from text 
# called as python nerv3.py text_path_or_file 
# 
# Inputs: 
# path - text file or directory containing text files 
# output - output file name 
# uuid 
# Outputs: 
# Output file written 
# People, Places, Others files 
# 
################################################### 

#gonna need to install AlchemyAPI 
import AlchemyAPI 
import argparse 
import xml.etree.ElementTree as ET 
import collections 
import codecs 
import os 
#from IPython import embed 
#================================================= 
def listwrite(output_file,thelist): 
    for item in thelist: 
     item.encode('utf-8') 
     output_file.write("%s\n\n" % item) 

#================================================= 

def main(): 

    tmpdir = "/tmp/pagekicker" 

    #personal api key saved as api_key.txt 
    parser = argparse.ArgumentParser() 
    parser.add_argument('path', help = "target file or directory for NER") 
    parser.add_argument('output', help = "target file for output") 
    parser.add_argument('uuid', help = "uuid") 
    args = parser.parse_args() 

    in_file = args.path 
    out_file = args.output 
    uuid = args.uuid 
    folder = os.path.join(tmpdir, uuid) 
    print folder  
    cwd = os.getcwd() 
    apikey_location = os.path.join(cwd, "api_key.txt") 

    with open(in_file) as f: 
     text = f.read() 

    alchemyObj = AlchemyAPI.AlchemyAPI() 
    alchemyObj.loadAPIKey(apikey_location) 

    result = alchemyObj.TextGetRankedNamedEntities(text) 

    root = ET.fromstring(result) 

    place_list = ['City', 'Continent', 'Country', 'Facility', 'GeographicFeature',\ 
    'Region', 'StateOrCounty'] 
    People = {} 
    Places = {} 
    Other = {} 

    for entity in root.getiterator('entity'): 
     if entity[0].text == 'Person': 
      People[entity[3].text]=[entity[1].text, entity[2].text] 
     elif entity[0].text in place_list: 
      Places[entity[3].text] = [entity[1].text, entity[2].text] 
     else: 
      Other[entity[3].text] = [entity[1].text, entity[2].text] 

    #print lists ordered by relevance 
    Places_s = sorted(Places, key = Places.get, reverse = True) 
    People_s = sorted(People, key = People.get, reverse = True) 
    Other_s = sorted(Other, key = Other.get, reverse = True) 

# here is where things seem to go awry 
    with codecs.open(out_file, mode = 'w', encoding='utf-8') as o: 
     listwrite(o, People_s) 
     listwrite(o, Places_s) 
     listwrite(o, Other_s) 
    out_file = os.path.join(folder, 'People') 
    with codecs.open(out_file, mode= 'w', encoding='utf-8') as o: 
      listwrite(o, People_s) 
    out_file = os.path.join(folder, 'Places') 
     with codecs.open(out_file, mode= 'w', encoding='utf-8') as o: 
     listwrite(o, Places_s) 
    out_file = os.path.join(folder, 'Other') 
    with codecs.open(out_file, mode= 'w', encoding='utf-8') as o: 
      listwrite(o, Other_s) 
#================================================= 
if __name__ == '__main__': 
    main() 
+0

あなたの 'listwrite'関数で'%s \ n \ n "'を ''%s \ n "'に変更します –

答えて

2
def listwrite(output_file,thelist): 
    for item in thelist: 
     item.encode('utf-8') 
     output_file.write("%s\n\n" % item) 

、listwriteを関数として定義されます。 itemthelistで、itemとそれに続く2つの改行文字を書き込みます。余分な行を削除するには、\nのいずれかを削除するだけです。

def listwrite(output_file,thelist): 
    for item in thelist: 
     item.encode('utf-8') 
     output_file.write("%s\n" % item) 
関連する問題