@inproceedings{dgillick10naacl,
title = {{Non-Expert Evaluation of Summarization Systems is Risky}},
author = {D. Gillick and Y. Liu},
booktitle = {Proceedings of NAACL, Workshop on Creating Speech and Language Data With Amazon’s Mechanical Turk},
year = {2010}
}
[abstract] [bib] [paper]
Who’s Calling? Demographics of Mobile Phone Use in Rwanda
Joshua Blumenstock, Dan Gillick, Nathan Eagle
AAAI Artificial Intelligence for Development Symposium, 2010
We describe how new sources of data can be used to better understand the demographic structure of the population of Rwandan mobile phone users. After combining anonymous call data records with follow-up phone interviews, we detect significant differences in phone usage among different social and economic subgroups of the population. However, initial experiments suggest that predicting demographics from call usage, and vice-versa, is quite difficult.
@inproceedings{dgillick10aid,
title = {{Who's Calling? Demographics of Mobile Phone Use in Rwanda}},
author = {J. Blumenstock and D. Gillick and N. Eagle},
booktitle = {Proceedings of the AAAI Artificial Intelligence for Development Symposium},
year = {2010},
}
[abstract] [bib] [paper]
2009
The ICSI/UTD Summarization System at TAC 2009
Dan Gillick, Benoit Favre, Dilek Hakkani-Tur, Berndt Bohnet, Yang Liu, Shasha Xie
TAC, 2009
We describe improvements to our 2008 sys- tem that result in a top-performing summa- rization system. The motivating ideas are (1) improve sentence boundary detection to avoid damaging errors in preprocessing; (2) prune sentences that are unlikely to work well in a summary; (3) leverage sentence position to improve update summarization; (4) focus on high-precision sentence compression to im- prove readability rather than content.
@inproceedings{dgillick09tac,
title = {{The ICSI/UTD Summarization System at TAC 2009}}
author = {D. Gillick and B. Favre and D. Hakkani-Tur and B. Bohnet and Y. Liu and S. Xie},
booktitle = {TAC},
year = {2009},
}
[abstract] [bib] [paper][
slides]
A Scalable Global Model for Summarization
Dan Gillick, Benoit Favre
NAACL, 2009
We present an Integer Linear Program for
exact inference under a maximum coverage
model for automatic summarization. We compare
our model, which operates at the sub-sentence
or "concept"-level, to a sentence level
model, previously solved with an ILP.
Our model scales more efficiently to larger
problems because it does not require a
quadratic number of variables to address redundancy
in pairs of selected sentences. We
also show how to include sentence compression
in the ILP formulation, which has the
desirable property of performing compression
and sentence selection simultaneously. The
resulting system performs at least as well as
the best systems participating in the recent
Text Analysis Conference, as judged by a variety
of automatic and manual content-based
metrics.
@inproceedings{dgillick09sgm,
title = {{A Scalable Global Model for Summarization}},
author = {D. Gillick and B. Favre},
booktitle = {Proceedings of NAACL Workshop on Integer Linear Programming for Natural Language Processing},
year = {2009},
}
[abstract] [bib] [paper]
Sentence Boundary Detection and the Problem with the U.S.
Dan Gillick
NAACL, 2009
Sentence Boundary Detection is widely used
but often with outdated tools. We discuss what
makes it difficult, which features are relevant,
and present a fully statistical system, now publicly
available, that gives the best known error
rate on a standard news corpus: Of some
27,000 examples, our system makes 67 errors,
23 involving the word "U.S."
@inproceedings{dgillick09sbd,
title = {{Sentence Boundary Detection and the Problem with the U.S.}},
author = {D. Gillick},
booktitle = {Proceedings of NAACL: Short Papers},
year = {2009},
}
[abstract] [bib] [paper] [code]
A Global Optimization Framework for Meeting Summarization
Dan Gillick, Korbinian Riedhammer, Benoit Favre, Dilek Hakkani-Tur
ICASSP, 2009
We introduce a model for extractive meeting summarization based on the
hypothesis that utterances convey bits of information, or
concepts. Using keyphrases as concepts weighted by frequency, and an
integer linear program to determine the best set of utterances, that
is, covering as many concepts as possible while satisfying a length
constraint, we achieve ROUGE scores at least as good as a ROUGE-based
oracle derived from human summaries. This brings us to a critical
discussion of ROUGE and the future of extractive meeting
summarization.
@inproceedings{dgillick09meetingilp,
title = {{A Global Optimization Framework for Meeting Summarization}},
author = {D. Gillick and K. Riedhammer and B. Favre and D. Hakkani-Tur},
booktitle = {ICASSP},
year = {2009},
}
[abstract] [bib] [paper]
2008
The ICSI Summarization System at TAC 2008
Dan Gillick, Benoit Favre, Dilek Hakkani-Tur
TAC, 2008
The ICSI multi-document summarization system
relies on a general framework that casts
summarization as a global optimization problem
with an integer linear programming solution.
Our primary submission, a simple
sentence extractor with an n-gram frequency
heuristic, gives results at least as good as any
reported on the non-update part of the main
task. Our secondary submission adds compressed
sentence alternatives, achieving high
ROUGE scores but lower manual scores. We
also observe that an oracle version of our sentence
extractor is nearly a direct optimization
of ROUGE. We show oracle results for the
TAC data set and discuss their significance.
Finally, we provide a detailed analysis of the
linguistic quality of our two systems, suggesting
specifically where improvements might be
most useful.
@inproceedings{dgillick08tac,
title = {{The ICSI Summarization System at TAC 2008}},
author = {D. Gillick and B. Favre and D. Hakkani-Tur},
booktitle = {TAC},
year = {2008},
}
[abstract] [bib] [paper]
Unsupervised Learning of Edit Parameters for Matching Name Variants
Dan Gillick, Dilek Hakkani-Tur, Michael Levit
Interspeech, 2008
Since named entities are often written in different ways, question answering (QA) and other language processing tasks stand to benefit from entity matching. We address the problem of finding equivalent person names in unstructured text. Our approach is a generalization of spelling correction: We compare to candidate matches by applying a set of edits to an input name. We introduce a novel unsupervised method for learning spelling edit probabilities which improves overall F-Measure on our own name-matching task by 12%. Relevance is demonstrated by application to the GALE Distillation task.
@inproceedings{dgillick08names,
title = {{Unsupervised Learned of Edit Parameters for Name Matching Name Variants}},
author = {D. Gillick and D. Hakkani-Tur and M. Levit},
booktitle = {Interspeech},
year = {2008},
}
[abstract] [bib] [paper] [names list] [names labels]
Packing the Meeting Summarization Knapsack
Korbinian Riedhammer, Dan Gillick, Benoit Favre, Dilek Hakkani-Tur
Interspeech, 2008
Despite considerable work in automatic meeting summarization over the last few years, comparing results remains difficult due to varied task conditions and evaluations. To address this issue, we present a method for determining the best possible extractive summary given an evaluation metric like ROUGE. Our oracle system is based on a knapsack-packing framework, and though NP-Hard, can be solved nearly optimally by a genetic algorithm. To frame new research results in a meaningful context, we suggest presenting our oracle results alongside two simple baselines. We show oracle and baseline results for a variety of evaluation scenarios that have recently appeared in this field.
@inproceedings{koried08knapsack,
title = {{Packing the Meeting Summarization Knapsack}},
author = {K. Riedhammer and D. Gillick and B. Favre and D. Hakkani-Tur},
booktitle = {Interspeech},
year = {2008},
}
[abstract] [bib] [paper]
Before
Integrating Several Annotation Layers for Statistical Information Distillation
Michael Levit, Dilek Hakkani-Tur, Gokhan Tur, Dan Gillick
Automatic Speech Recognition and Understanding Workshop (ASRU), 2007
We present a sentence extraction algorithm for Information
Distillation, a task where for a given templated query, relevant passages must be extracted from massive audio and textual document sources. For each sentence of the relevant documents (that are assumed to be known from the upstream
stages) we employ statistical classification methods to estimate the extent of its relevance to the query, whereby two aspects of relevance are taken into account: the template (type)
of the query and its slots (free-text descriptions of names, organizations, topic, events and so on, around which templates
are centered). The idiosyncrasy of the presented method is in
the choice of features used for classification. We extract our
features from charts, compilations of elements from various
annotation levels, such as word transcriptions, syntactic and
semantic parses, and Information Extraction annotations. In
our experiments we show that this integrated approach outperforms a purely lexical baseline by as much as 30% relative
in terms of F-measure. We also investigate the algorithm's behavior under noisy conditions, by comparing its performance
on ASR output and on corresponding manual transcriptions.
@inproceedings{mlevit07distill,
title = {{Integrating Several Annotation Layers for Statistical Information Distillation}},
author = {M. Levit and D. Hakkani-Tur and G. Tur and D. Gillick},
booktitle = {ASRU},
year = {2007},
}
[abstract] [bib] [paper]
Why Generative Phrase Models Underperform Surface Heuristics
John Denero, Dan Gillick, James Zhang, Dan Klein
North American chapter of the Association for Computational Linguistics (NAACL), 2006
We investigate why weights from generative models underperform heuristic estimates in phrasebased machine translation. We first propose a simple generative, phrase-based model and verify that
its estimates are inferior to those given by surface
statistics. The performance gap stems primarily
from the addition of a hidden segmentation variable, which increases the capacity for overfitting
during maximum likelihood training with EM. In
particular, while word level models benefit greatly
from re-estimation, phrase-level models do not: the
crucial difference is that distinct word alignments
cannot all be correct, while distinct segmentations
can. Alternate segmentations rather than alternate
alignments compete, resulting in increased determinization of the phrase table, decreased generalization, and decreased final BLEU score. We also
show that interpolation of the two methods can result in a modest increase in BLEU score.
@inproceedings{jdenero06generative,
title = {{Why Generative Phrase Models Underperform Surface Heuristics}},
author = {J. Denero and D. Gillick and J. Zhang and D. Klein},
booktitle = {NAACL},
year = {2006},
}
[abstract] [bib] [paper]
Speaker Detection Without Models
Dan Gillick, Stephen Stafford, Barbara Peskin
International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2005
In order to capture sequential information and to take advantage
of extended training data conditions, we developed an algorithm
for speaker detection that scores a test segment by comparing it directly to similar instances of that speech in the training data. This
non-parametric technique, though at an early stage in its development, achieves error rates close to 1% on the NIST 2001 Extended
Data task and performs extremely well in combination with a standard Gaussian Mixture Model system. We also present a new scoring method that significantly improves performance by capturing
only positive evidence.
@inproceedings{dgillick05sid,
title = {{Speaker Detection Without Models}},
author = {D. Gillick and S. Stafford and B. Peskin},
booktitle = {ICASSP},
year = {2005},
}
[abstract] [bib] [paper]