Commit 078375a6 authored by DanFaria's avatar DanFaria
Browse files

Merge branch 'MajorCoreUpdate'

parents 5b5c6d55 da679e6d
This diff is collapsed.
......@@ -12,19 +12,16 @@
* limitations under the License. *
* *
*******************************************************************************
* A matching algorithm that extends the Lexicons of the source and target *
* ontologies. *
* An algorithm that extends the Lexicons of the source and target ontologies. * *
* *
* @author Daniel Faria *
******************************************************************************/
package aml.match;
package aml.ext;
public interface LexiconExtender
{
/**
* Extends the Lexicons of the source and target Ontologies
* @param the minimum confidence threshold below
* which synonyms will not be added to the Lexicons
*/
public void extendLexicons(double thresh);
public void extendLexicons();
}
/******************************************************************************
* Copyright 2013-2016 LASIGE *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); you may *
* not use this file except in compliance with the License. You may obtain a *
* copy of the License at http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*******************************************************************************
* An algorithm that extends the Lexicons of the source and target ontologies *
* by removing sections between parenthesis. *
* *
* @author Daniel Faria *
******************************************************************************/
package aml.ext;
import java.util.Vector;
import aml.AML;
import aml.ontology.Lexicon;
import aml.ontology.Provenance;
import aml.settings.EntityType;
import aml.settings.LexicalType;
import aml.util.StringParser;
public class ParenthesisExtender implements LexiconExtender
{
@Override
public void extendLexicons()
{
AML aml = AML.getInstance();
Lexicon source = aml.getSource().getLexicon();
extend(source);
Lexicon target = aml.getTarget().getLexicon();
extend(target);
}
private void extend(Lexicon l)
{
for(EntityType e : EntityType.values())
{
Vector<String> nm = new Vector<String>(l.getNames(e));
for(String n: nm)
{
if(StringParser.isFormula(n) || !n.contains("(") || !n.contains(")"))
continue;
String newName;
double weight = 0.0;
if(n.matches("\\([^()]+\\)") || n.contains(") or ("))
{
newName = n.replaceAll("[()]", "");
weight = 1.0;
}
else if(n.contains(")("))
continue;
else
{
newName = "";
char[] chars = n.toCharArray();
boolean copy = true;
for(char c : chars)
{
if(c == '(')
copy = false;
if(copy)
newName += c;
if(c == ')')
copy = true;
}
newName = newName.trim();
weight = Math.sqrt(newName.length() * 1.0 / n.length());
}
if(newName.equals(""))
continue;
//Get the classes with the name
Vector<Integer> tr = new Vector<Integer>(l.getInternalEntities(e, n));
for(Integer j : tr)
for(Provenance p : l.get(n, j))
l.add(j, newName, p.getLanguage(),
LexicalType.INTERNAL_SYNONYM, p.getSource(), weight*p.getWeight());
}
}
}
}
\ No newline at end of file
/******************************************************************************
* Copyright 2013-2016 LASIGE *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); you may *
* not use this file except in compliance with the License. You may obtain a *
* copy of the License at http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*******************************************************************************
* An algorithm that extends the Lexicons of the source and target ontologies *
* by removing stop words. *
* *
* @author Daniel Faria *
******************************************************************************/
package aml.ext;
import java.util.Set;
import java.util.Vector;
import aml.AML;
import aml.ontology.Lexicon;
import aml.ontology.Provenance;
import aml.settings.EntityType;
import aml.settings.LexicalType;
import aml.settings.SizeCategory;
import aml.util.StopList;
import aml.util.StringParser;
public class StopWordExtender implements LexiconExtender
{
//Attributes
private SizeCategory s;
private Set<String> stopList;
private final double WEIGHT = 0.98;
//Public Methods
@Override
public void extendLexicons()
{
stopList = StopList.read();
AML aml = AML.getInstance();
s = aml.getSizeCategory();
Lexicon source = aml.getSource().getLexicon();
extend(source);
Lexicon target = aml.getTarget().getLexicon();
extend(target);
}
//Private Methods
private void extend(Lexicon l)
{
//Process Classes (remove only leading and trailing stop words)
Vector<String> nm = new Vector<String>(l.getNames(EntityType.CLASS));
for(String n: nm)
{
if(StringParser.isFormula(n))
continue;
//Build a synonym by removing all leading and trailing stopWords
String[] nameWords = n.split(" ");
//First find the first word in the name that is not a stopWord
int start = 0;
for(int i = 0; i < nameWords.length; i++)
{
if(!stopList.contains(nameWords[i]))
{
start = i;
break;
}
}
//Then find the last word in the name that is not a stopWord
int end = nameWords.length;
for(int i = nameWords.length - 1; i > 0; i--)
{
if(!stopList.contains(nameWords[i]))
{
end = i+1;
break;
}
}
//If the name contains no leading or trailing stopWords proceed to next name
if(start == 0 && end == nameWords.length)
continue;
//Otherwise build the synonym
String newName = "";
for(int i = start; i < end; i++)
newName += nameWords[i] + " ";
newName = newName.trim();
//Get the entities with the name
Vector<Integer> tr = new Vector<Integer>(l.getInternalEntities(EntityType.CLASS, n));
for(Integer i : tr)
{
for(Provenance p : l.get(n, i))
{
double weight = p.getWeight() * WEIGHT;
l.add(i, newName, p.getLanguage(),
LexicalType.INTERNAL_SYNONYM, p.getSource(), weight);
}
}
}
EntityType[] types = EntityType.values();
//Process Individuals and Properties (remove all stop words)
//If the SizeCategory is Large or Huge, process Classes this
//was as well
int start = 1;
if(s.equals(SizeCategory.LARGE) || s.equals(SizeCategory.HUGE))
start = 0;
for(int h = start; h < types.length; h++)
{
nm = new Vector<String>(l.getNames(types[h]));
for(String n: nm)
{
if(StringParser.isFormula(n))
continue;
//Build a synonym by removing all leading and trailing stopWords
String[] nameWords = n.split(" ");
String newName = "";
for(int i = 0; i < nameWords.length; i++)
if(!stopList.contains(nameWords[i]))
newName += nameWords[i] + " ";
newName = newName.trim();
//Get the entities with the name
Vector<Integer> tr = new Vector<Integer>(l.getInternalEntities(types[h], n));
for(Integer i : tr)
{
for(Provenance p : l.get(n, i))
{
double weight = p.getWeight() * WEIGHT;
l.add(i, newName, p.getLanguage(),
LexicalType.INTERNAL_SYNONYM, p.getSource(), weight);
}
}
}
}
}
}
\ No newline at end of file
/******************************************************************************
* Copyright 2013-2016 LASIGE *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); you may *
* not use this file except in compliance with the License. You may obtain a *
* copy of the License at http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*******************************************************************************
* A filtering algorithm based on cardinality. *
* *
* @author Daniel Faria *
******************************************************************************/
package aml.filter;
import aml.AML;
import aml.match.Alignment;
import aml.match.Mapping;
import aml.ontology.RelationshipMap;
import aml.settings.EntityType;
import aml.settings.MappingStatus;
import aml.settings.SelectionType;
import aml.util.InteractionManager;
public class CardinalitySelector implements Filterer, Flagger
{
//Attributes
private AML aml;
private double thresh;
private SelectionType type;
private Alignment a;
private Alignment aux;
private InteractionManager im;
private int card;
//Constructors
/**
* Constructs a Selector with the given similarity threshold
* and automatic SelectionType
* @param thresh: the similarity threshold
*/
public CardinalitySelector(double thresh, int c)
{
aml = AML.getInstance();
this.thresh = thresh;
type = SelectionType.getSelectionType();
aux = null;
im = aml.getInteractionManager();
card = c;
}
/**
* Constructs a Selector with the given similarity threshold
* and SelectionType
* @param thresh: the similarity threshold
* @param type: the SelectionType
*/
public CardinalitySelector(double thresh, int c, SelectionType type)
{
this(thresh,c);
this.type = type;
}
/**
* Constructs a Selector with the given similarity threshold
* and automatic SelectionType, and using the given auxiliary
* Alignment as the basis for selection
* @param thresh: the similarity threshold
* @param aux: the auxiliary Alignment
*/
public CardinalitySelector(double thresh, int c, Alignment aux)
{
this(thresh,c);
this.aux = aux;
}
/**
* Constructs a Selector with the given similarity threshold
* and SelectionType, and using the given auxiliary Alignment
* as the basis for selection
* @param thresh: the similarity threshold
* @param type: the SelectionType
* @param aux: the auxiliary Alignment
*/
public CardinalitySelector(double thresh, int c, SelectionType type, Alignment aux)
{
this(thresh, c, type);
this.aux = aux;
}
//Public Methods
@Override
public void filter()
{
System.out.println("Performing Selection");
long time = System.currentTimeMillis()/1000;
Alignment selected;
a = aml.getAlignment();
if(!type.equals(SelectionType.HYBRID))
selected = parentFilter(a);
//In normal selection mode
if(aux == null)
selected = filterNormal();
//In co-selection mode
else
selected = filterWithAux();
if(selected.size() < a.size())
{
for(Mapping m : selected)
if(m.getStatus().equals(MappingStatus.FLAGGED))
m.setStatus(MappingStatus.UNKNOWN);
aml.setAlignment(selected);
}
System.out.println("Finished in " + (System.currentTimeMillis()/1000-time) + " seconds");
}
/**
* Selects a given Alignment
* @param a: the Alignment to select
* @return: the selected Alignment
*/
public Alignment filter(Alignment a)
{
Alignment selected = new Alignment();
a.sortDescending();
for(Mapping m : a)
{
boolean toAdd = false;
if(m.getStatus().equals(MappingStatus.CORRECT))
toAdd = true;
else if(m.getSimilarity() >= thresh && !m.getStatus().equals(MappingStatus.INCORRECT))
{
int sourceCard = selected.getSourceMappings(m.getSourceId()).size();
int targetCard = selected.getTargetMappings(m.getTargetId()).size();
if((sourceCard < card && targetCard < card) ||
(!type.equals(SelectionType.STRICT) && !selected.containsBetterMapping(m)) ||
(type.equals(SelectionType.HYBRID) && m.getSimilarity() > 0.75 && sourceCard <= card && targetCard <= card))
toAdd = true;
}
if(toAdd)
selected.add(new Mapping(m));
}
return selected;
}
@Override
public void flag()
{
System.out.println("Running Cardinality Flagger");
long time = System.currentTimeMillis()/1000;
a = aml.getAlignment();
for(Mapping m : a)
if(a.containsConflict(m) && m.getStatus().equals(MappingStatus.UNKNOWN))
m.setStatus(MappingStatus.FLAGGED);
System.out.println("Finished in " + (System.currentTimeMillis()/1000-time) + " seconds");
}
private Alignment filterNormal()
{
//The alignment to store selected mappings
Alignment selected = new Alignment();
//Sort the active alignment
a.sortDescending();
//Then select Mappings in ranking order (by similarity)
for(Mapping m : a)
{
boolean toAdd = false;
if(m.getStatus().equals(MappingStatus.CORRECT))
toAdd = true;
else if(m.getSimilarity() >= thresh && !m.getStatus().equals(MappingStatus.INCORRECT))
{
int sourceCard = selected.getSourceMappings(m.getSourceId()).size();
int targetCard = selected.getTargetMappings(m.getTargetId()).size();
if((sourceCard < card && targetCard < card) ||
(!type.equals(SelectionType.STRICT) && !selected.containsBetterMapping(m)) ||
(type.equals(SelectionType.HYBRID) && m.getSimilarity() > 0.75 && sourceCard <= card && targetCard <= card))
toAdd = true;
else if(im.isInteractive())
{
im.classify(m);
if(m.getStatus().equals(MappingStatus.CORRECT))
toAdd = true;
}
}
if(toAdd)
selected.add(m);
}
return selected;
}
private Alignment filterWithAux()
{
//The alignment to store selected mappings
Alignment selected = new Alignment();
//Sort the auxiliary alignment
aux.sortDescending();
//Then perform selection based on it
for(Mapping n : aux)
{
Mapping m = a.get(n.getSourceId(), n.getTargetId());
if(m == null)
continue;
boolean toAdd = false;
if(m.getStatus().equals(MappingStatus.CORRECT))
toAdd = true;
else if(m.getSimilarity() >= thresh && !m.getStatus().equals(MappingStatus.INCORRECT))
{
int sourceCard = selected.getSourceMappings(m.getSourceId()).size();
int targetCard = selected.getTargetMappings(m.getTargetId()).size();
if((sourceCard < card && targetCard < card) ||
(!type.equals(SelectionType.STRICT) && !selected.containsBetterMapping(m)) ||
(type.equals(SelectionType.HYBRID) && m.getSimilarity() > 0.75 && sourceCard <= card && targetCard <= card))
toAdd = true;
else if(im.isInteractive())
{
im.classify(m);
if(m.getStatus().equals(MappingStatus.CORRECT))
toAdd = true;
}
}
if(toAdd)
selected.add(m);
}
return selected;
}
private Alignment parentFilter(Alignment in)
{
RelationshipMap r = aml.getRelationshipMap();
Alignment out = new Alignment();
for(Mapping m : in)
{
int src = m.getSourceId();
int tgt = m.getTargetId();
if(!aml.getURIMap().getType(src).equals(EntityType.CLASS))
continue;
boolean add = true;
for(Integer t : in.getSourceMappings(src))
{
if(r.isSubclass(t,tgt) &&
in.getSimilarity(src, t) >= in.getSimilarity(src, tgt))
{
add = false;
break;
}
}
if(!add)
continue;
for(Integer s : in.getTargetMappings(tgt))
{
if(r.isSubclass(s,src) &&
in.getSimilarity(s, tgt) >= in.getSimilarity(src, tgt))
{
add = false;
break;
}
}
if(add)
out.add(m);
}
return out;
}
}
\ No newline at end of file
......@@ -40,7 +40,7 @@ public class CustomFilterer
Vector<Problem> steps = aml.getFlagSteps();
if(steps.contains(Problem.OBSOLETION))
{
ObsoleteFilter o = new ObsoleteFilter();
ObsoleteFilterer o = new ObsoleteFilterer();
o.filter();
}
if(steps.contains(Problem.CARDINALITY))
......
......@@ -50,7 +50,7 @@ public class CustomFlagger
}
if(steps.contains(Problem.OBSOLETION))
{
ObsoleteFilter o = new ObsoleteFilter();
ObsoleteFilterer o = new ObsoleteFilterer();
o.flag();
}
if(steps.contains(Problem.QUALITY))
......
/******************************************************************************
* Copyright 2013-2016 LASIGE *
* *
* Licensed under the Apache License, Version 2.0 (the "License"); you may *
* not use this file except in compliance with the License. You may obtain a *
* copy of the License at http://www.apache.org/licenses/LICENSE-2.0 *
* *
* Unless required by applicable law or agreed to in writing, software *
* distributed under the License is distributed on an "AS IS" BASIS, *
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
* See the License for the specific language governing permissions and *
* limitations under the License. *
* *
*******************************************************************************
* An Ontology Data Property Entity. *
* *
* @author Daniel Faria *
******************************************************************************/
package aml.ontology;
import java.util.HashSet;
import java.util.Set;
public class DataProperty extends Property
{
//Attributes