package org.aksw.jena_sparql_api_sparql_path.main;
import java.io.IOException;
import java.sql.SQLException;
import java.util.List;
import org.aksw.jena_sparql_api.concepts.Concept;
import org.aksw.jena_sparql_api.concepts.Path;
import org.aksw.jena_sparql_api.core.QueryExecutionFactory;
import org.aksw.jena_sparql_api.http.QueryExecutionFactoryHttp;
import org.aksw.jena_sparql_api.sparql_path.core.algorithm.ConceptPathFinder;
import org.apache.jena.rdf.model.Resource;
import org.apache.jena.rdf.model.Statement;
import org.apache.jena.util.iterator.Map1;
//class EdgeTransition
// extends DefaultEdge
//{
// public EdgeTransition() {
// }
//}
/**
* Just some idea:
* The property matrix query might not run on DBpedia (we have to try out), BUT:
*
* First, we can partition by graph.
* Then, we can fetch all properties
* We could even use a partitioned approach for this step.
*
* Afterwards, we could take each individual property, and try to find all
* successor properties of it, e.g.
*
* Select Distinct ?y {
* { Select * { -- See Note [1]
* ?a ?x ?b .
* Filter(?x = <foobar>)
* } Limit 100000 Offset 10000 }
*
* ?b ?y ?c .
* }
*
* [1] Note: we could do Distinct ?b, but it might not improve performance (much), as ?b will have few duplicates
*
*
*
* @author raven
*
*/
class PropertySummaryCreator {
}
public class MainCli {
public static void main2(String[] args) throws IOException, SQLException {
QueryExecutionFactory qef = new QueryExecutionFactoryHttp("http://localhost:8810/sparql", "http://fp7-pp.publicdata.eu/");
Concept sourceConcept = Concept.create("?s a <http://fp7-pp.publicdata.eu/ontology/Project>", "s");
System.out.println(sourceConcept);
Concept tmpTargetConcept = Concept.create("?s <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?lon ; <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat", "s");
ConceptPathFinder.findPaths(qef, sourceConcept, tmpTargetConcept, 10, 10);
}
public static void main(String[] args) throws IOException, SQLException {
String sparqlServiceIri = "http://fp7-pp.publicdata.eu/sparql";
//String sparqlServiceIri = "http://localhost:8810/sparql";
QueryExecutionFactory qef = new QueryExecutionFactoryHttp(sparqlServiceIri);
Concept sourceConcept = Concept.create("?s ?_p_ ?_o_", "s");
System.out.println(sourceConcept);
Concept tmpTargetConcept = Concept.create("?s <http://www.w3.org/2003/01/geo/wgs84_pos#long> ?lon ; <http://www.w3.org/2003/01/geo/wgs84_pos#lat> ?lat", "s");
List<Path> paths = ConceptPathFinder.findPaths(qef, sourceConcept, tmpTargetConcept, 10, 10);
System.out.println(paths);
}
}
class Map1StatementToSubject
implements Map1<Statement, Resource>
{
@Override
public Resource apply(Statement stmt) {
return stmt.getSubject().asResource();
}
}
/*
* Note: Dijkstra would only keep the shortest path to a node - but here we want all paths...
*
*
* Schema:
* context: The id of the path segement
* in_inverse: whether this node was reached by forward or backward traversal
* path_length: accumulated path length
*
* cost: some meta data..., for example, how many forward / backwards traks were used
*
* Path "Fact" table:
* TODO I guess the segment_id is globally unique...
*
* process_id | ant_id | segment_id | context_id | to_node_id | is_inverse | path_length | backward_step_count |
* X | 1 | 1 | null | foo | false | 0 | 0 |
* 2 | 1 | bar | baz | 1 | 0 |
*
* Solution Cache:
* We can track which paths were found between two nodes, but we also need a completeness level,
* i.e. whether all paths of e.g. length 1, 2, 3, ... have been found.
* If yes, then we can make full use of the cache, otherwise, we need to scan whether there
* are paths which have not been checked.
* This on the other hand means, that we could have to keep track of
* - which paths have not been seen yet (because the iteration was not far enough yet)
* - which paths have been skipped, e.g. because their cost estimate was too high.
*
*
*
* Maintaining this minimum path matrix would require N^2 space...
* With ~50000 properties on DBpedia, there is no point to pre-compute this, but only cache results
* of on-demand computations.
*
*
* A shortest path cache:
* This could help estimating, whether 2 nodes are connected at all, and what would be their shortest route.
*
* Somehow this begs the question of what pg_routing actually does...
* Anyway, a pure Java solution is preferred...
*
* Path "Cache" table:
* start_node_id | end_node_id |
*
*
* Questions:
*
* We could start caching paths between nodes after a certain length!
*
*
* So, when we search for paths between nodes, we ask an "Edge-Provider" for all outgoing edges.
* We could then keep track of whether the edge provider was done or not with the provisioning.
* -> state + done flag needed
*
*
*
*
* How to figure out that two paths meet:
* If one ant either reaches a goal node (to_node_id is a goal) or
* to_node_id was reached by another ant
*
* Problem with this approach:
* We do not keep track of sub-solutions -> bad!
*
*
* Property adjacency retrieval:
* This is in the general case some kind of partitioned query execution....
*
*/