This notebook describes code to parse .nbo or .out files generated by Gaussian09-GENNBO6 modules. Lines related to NLMO dipole components are extracted and saved to .csv file.
'''Custom styling. File *.css injects the contents of css_file in the
header of the notebook's HTML file. Other css files are in the /css directory'''
# to (de)activate line numbering pres Esc while in the cell
# followed by l (lower L)
'''Sanity check since we are changing directories and .css file path
would be incorrect upon cell re-load'''
from IPython.core.display import HTML
import string,sys,os,os.path,re
css_file = './css/yellowish.css'
if os.path.isfile(css_file):
css_file
else:
%cd..
HTML(open(css_file, "r").read())
import pandas as pd
import numpy as np
pd.set_option('max_columns', 50)
# Enter subdirectory and the input filename
%cd dipoles
filename = 'form.nbo'
# Save the file path, name and extension
fullpath = os.path.abspath(filename)
path,file=os.path.split(fullpath)
basename, extension = os.path.splitext(filename)
# Parse the text section of Dipole Analysis into the list 'capture'
start = 0
begin = 0
end = 1
capture = []
with open (filename, 'r') as f:
for line in f:
# condition to end parsing
if begin == 1 and '-------' in line:
end = 0
# parse the chunk
if start == 1 and begin == 1 and end == 1 and not ("deloc" in line):
if re.match(r"\s$", line): continue # if there's a space in the line
capture += [line.lstrip()]
# First condition to initiate capture
if 'DIPOLE MOMENT ANALYSIS:' in line:
start = 1
# Second condition to initiate capture
if start == 1 and '==============' in line:
begin = 1
# Extract values
def getdipvalues(list):
orbnum = []
orbtype = []
dipX = []
dipY = []
dipZ = []
dipTot = []
try:
for item in capture:
#Regex with capturing groups to parse lines in the dipole section
pattern = re.search(r"([0-9]{1,3})\.\s([A-Z]{2}.+)\s{7,13}(-?\d\.\d\d)\s?\s(-?\d\.\d\d)\s?\s(-?\d\.\d\d)\s?\s(\d\.\d\d)\s?\s.+", item, re.MULTILINE)
if pattern:
orbnum.append(pattern.group(1).strip())
orbtype.append(pattern.group(2).strip())
dipX.append(pattern.group(3))
dipY.append(pattern.group(4))
dipZ.append(pattern.group(5))
dipTot.append(pattern.group(6))
return orbnum, orbtype, dipX, dipY, dipZ, dipTot
except ValueError, Argument:
print "The argument does not contain list.\n", Argument
# Create Pandas dataframe
orbnum, orbtype, dipX, dipY, dipZ, dipTot = getdipvalues(capture)
# Create Pandas DataFrame
df = pd.DataFrame({'NLMO': orbnum,'Type': orbtype,'X': dipX,'Y': dipY,'Z': dipZ,'Tot_Dip': dipTot},columns=['NLMO','Type','X','Y','Z','Tot_Dip'])
df[['X', 'Y','Z', 'Tot_Dip']] = df[['X', 'Y','Z', 'Tot_Dip']].astype(float)
df[['NLMO']] = df[['NLMO']].astype(int)
# Write dataframe to .csv file
try:
df.to_csv(basename+"_dip.csv",index=False, encoding='utf-8')
except IOError:
print "Error: can\'t find the file or read data"
else:
print "\n" +('-'*80)+"\n"
print ">> Contents of the dataframe was written to "+path+"\\"+basename+"_dip.csv file"
NLMO part of the DIPOLE MOMENT ANALYSIS: section is now shown in Table 1. The corresponding ***dip.csv** file was saved with path shown in the previous cell. ____
# Print html formatted table from the loaded css file
HTML(df.to_html(classes = 'grid', escape=False))
iPython Notebook ReadNboDip.ipynb version 1.0 created on Dec 23, 2014